[Vertex AI] Make GenerationConfig properties internal (#13904)

andrewheard · web-flow · commit 8328630971a8 · 2024-10-17T10:09:34.000-04:00
diff --git a/FirebaseVertexAI/CHANGELOG.md b/FirebaseVertexAI/CHANGELOG.md
@@ -55,6 +55,9 @@
 - [changed] **Breaking Change**: The minimum deployment target for the SDK is
   now macOS 12.0; all other platform minimums remain the same at iOS 15.0,
   macCatalyst 15.0, tvOS 15.0, and watchOS 8.0. (#13903)
+- [changed] **Breaking Change**: All of the public properties of
+  `GenerationConfig` are now `internal`; they all remain configurable in the
+  initializer. (#13904)
 - [changed] The default request timeout is now 180 seconds instead of the
   platform-default value of 60 seconds for a `URLRequest`; this timeout may
   still be customized in `RequestOptions`. (#13722)
diff --git a/FirebaseVertexAI/Sources/GenerateContentResponse.swift b/FirebaseVertexAI/Sources/GenerateContentResponse.swift
@@ -172,8 +172,8 @@ public struct FinishReason: DecodableProtoEnum, Hashable, Sendable {
 
   /// The token generation was stopped because the response was flagged for safety reasons.
   ///
-  /// > NOTE: When streaming, the ``CandidateResponse/content`` will be empty if content filters
-  /// > blocked the output.
+  /// > NOTE: When streaming, the ``Candidate/content`` will be empty if content filters blocked the
+  /// > output.
   public static let safety = FinishReason(kind: .safety)
 
   /// The token generation was stopped because the response was flagged for unauthorized citations.
diff --git a/FirebaseVertexAI/Sources/GenerationConfig.swift b/FirebaseVertexAI/Sources/GenerationConfig.swift
@@ -18,109 +18,127 @@ import Foundation
 /// requests to the backend model.
 @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
 public struct GenerationConfig {
-  /// A parameter controlling the degree of randomness in token selection. A
-  /// temperature of zero is deterministic, always choosing the
-  /// highest-probability response. Typical values are between 0 and 1
-  /// inclusive. Defaults to 0 if unspecified.
-  public let temperature: Float?
+  /// Controls the degree of randomness in token selection.
+  let temperature: Float?
 
-  /// The `topP` parameter changes how the model selects tokens for output.
-  /// Tokens are selected from the most to least probable until the sum of
-  /// their probabilities equals the `topP` value. For example, if tokens A, B,
-  /// and C have probabilities of 0.3, 0.2, and 0.1 respectively and the topP
-  /// value is 0.5, then the model will select either A or B as the next token
-  /// by using the `temperature` and exclude C as a candidate.
-  /// Defaults to 0.95 if unset.
-  public let topP: Float?
+  /// Controls diversity of generated text.
+  let topP: Float?
 
-  /// The `topK` parameter changes how the model selects tokens for output. A
-  /// `topK` of 1 means the selected token is the most probable among all the
-  /// tokens in the model's vocabulary, while a `topK` of 3 means that the next
-  /// token is selected from among the 3 most probable using the `temperature`.
-  /// For each token selection step, the `topK` tokens with the highest
-  /// probabilities are sampled. Tokens are then further filtered based on
-  /// `topP` with the final token selected using `temperature` sampling.
-  /// Defaults to 40 if unspecified.
-  public let topK: Int?
+  /// Limits the number of highest probability words considered.
+  let topK: Int?
 
-  /// The maximum number of generated response messages to return. This value
-  /// must be between [1, 8], inclusive. If unset, this will default to 1.
-  ///
-  /// - Note: Only unique candidates are returned. Higher temperatures are more
-  ///     likely to produce unique candidates. Setting `temperature` to 0 will
-  ///     always produce exactly one candidate regardless of the
-  ///     `candidateCount`.
-  public let candidateCount: Int?
+  /// The number of response variations to return.
+  let candidateCount: Int?
 
-  /// Specifies the maximum number of tokens that can be generated in the
-  /// response. The number of tokens per word varies depending on the
-  /// language outputted. The maximum value is capped at 1024. Defaults to 0
-  /// (unbounded).
-  public let maxOutputTokens: Int?
+  /// Maximum number of tokens that can be generated in the response.
+  let maxOutputTokens: Int?
 
   /// Controls the likelihood of repeating the same words or phrases already generated in the text.
-  ///
-  /// Higher values increase the penalty of repetition, resulting in more diverse output. The
-  /// maximum value for `presencePenalty`  is up to, but not including, `2.0`; the minimum value is
-  /// `-2.0`.
-  ///
-  /// > Note: While both `presencePenalty` and ``frequencyPenalty`` discourage repetition,
-  /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase has
-  /// > already appeared, whereas `frequencyPenalty` increases the penalty for *each* repetition of
-  /// > a word/phrase.
-  ///
-  /// > Important: Supported by `gemini-1.5-pro-002` and` gemini-1.5-flash-002` only.
-  public let presencePenalty: Float?
+  let presencePenalty: Float?
 
   /// Controls the likelihood of repeating words, with the penalty increasing for each repetition.
-  ///
-  /// Higher values increase the penalty of repetition, resulting in more diverse output. The
-  /// maximum value for `frequencyPenalty`  is up to, but not including, `2.0`; the minimum value is
-  /// `-2.0`.
-  ///
-  /// > Note: While both `frequencyPenalty` and ``presencePenalty`` discourage repetition,
-  /// > `frequencyPenalty` increases the penalty for *each* repetition of a word/phrase, whereas
-  /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase has
-  /// > already appeared.
-  ///
-  /// > Important: Supported by `gemini-1.5-pro-002` and` gemini-1.5-flash-002` only.
-  public let frequencyPenalty: Float?
+  let frequencyPenalty: Float?
 
-  /// A set of up to 5 `String`s that will stop output generation. If
-  /// specified, the API will stop at the first appearance of a stop sequence.
-  /// The stop sequence will not be included as part of the response.
-  public let stopSequences: [String]?
+  /// A set of up to 5 `String`s that will stop output generation.
+  let stopSequences: [String]?
 
   /// Output response MIME type of the generated candidate text.
-  ///
-  /// Supported MIME types:
-  /// - `text/plain`: Text output; the default behavior if unspecified.
-  /// - `application/json`: JSON response in the candidates.
-  public let responseMIMEType: String?
+  let responseMIMEType: String?
 
   /// Output schema of the generated candidate text.
-  /// If set, a compatible ``responseMIMEType`` must also be set.
-  ///
-  /// Compatible MIME types:
-  ///   - `application/json`: Schema for JSON response.
-  ///
-  /// Refer to the [Control generated
-  /// output](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/control-generated-output)
-  /// guide for more details.
-  public let responseSchema: Schema?
+  let responseSchema: Schema?
 
   /// Creates a new `GenerationConfig` value.
   ///
-  /// - Parameter temperature: See ``temperature``
-  /// - Parameter topP: See ``topP``
-  /// - Parameter topK: See ``topK``
-  /// - Parameter candidateCount: See ``candidateCount``
-  /// - Parameter maxOutputTokens: See ``maxOutputTokens``
-  /// - Parameter presencePenalty: See ``presencePenalty``
-  /// - Parameter frequencyPenalty: See ``frequencyPenalty``
-  /// - Parameter stopSequences: See ``stopSequences``
-  /// - Parameter responseMIMEType: See ``responseMIMEType``
-  /// - Parameter responseSchema: See ``responseSchema``
+  /// See the
+  /// [Configure model parameters](https://firebase.google.com/docs/vertex-ai/model-parameters)
+  /// guide and the
+  /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
+  /// for more details.
+  ///
+  /// - Parameters:
+  ///   - temperature:Controls the randomness of the language model's output. Higher values (for
+  ///     example, 1.0) make the text more random and creative, while lower values (for example,
+  ///     0.1) make it more focused and deterministic.
+  ///
+  ///     > Note: A temperature of 0 means that the highest probability tokens are always selected.
+  ///     > In this case, responses for a given prompt are mostly deterministic, but a small amount
+  ///     > of variation is still possible.
+  ///
+  ///     > Important: The range of supported temperature values depends on the model; see the
+  ///     > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
+  ///     > for more details.
+  ///   - topP: Controls diversity of generated text. Higher values (e.g., 0.9) produce more diverse
+  ///     text, while lower values (e.g., 0.5) make the output more focused.
+  ///
+  ///     The supported range is 0.0 to 1.0.
+  ///
+  ///     > Important: The default `topP` value depends on the model; see the
+  ///     [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
+  ///     for more details.
+  ///   - topK: Limits the number of highest probability words the model considers when generating
+  ///     text. For example, a topK of 40 means only the 40 most likely words are considered for the
+  ///     next token. A higher value increases diversity, while a lower value makes the output more
+  ///     deterministic.
+  ///
+  ///     The supported range is 1 to 40.
+  ///
+  ///     > Important: Support for `topK` and the default value depends on the model; see the
+  ///     [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
+  ///     for more details.
+  ///   - candidateCount: The number of response variations to return; defaults to 1 if not set.
+  ///     Support for multiple candidates depends on the model; see the
+  ///     [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
+  ///     for more details.
+  ///   - maxOutputTokens: Maximum number of tokens that can be generated in the response.
+  ///     See the configure model parameters [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#max-output-tokens)
+  ///     for more details.
+  ///   - presencePenalty: Controls the likelihood of repeating the same words or phrases already
+  ///     generated in the text. Higher values increase the penalty of repetition, resulting in more
+  ///     diverse output.
+  ///
+  ///     > Note: While both `presencePenalty` and `frequencyPenalty` discourage repetition,
+  ///     > `presencePenalty` applies the same penalty regardless of how many times the word/phrase
+  ///     > has already appeared, whereas `frequencyPenalty` increases the penalty for *each*
+  ///     > repetition of a word/phrase.
+  ///
+  ///     > Important: The range of supported `presencePenalty` values depends on the model; see the
+  ///     > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
+  ///     > for more details
+  ///   - frequencyPenalty: Controls the likelihood of repeating words or phrases, with the penalty
+  ///     increasing for each repetition. Higher values increase the penalty of repetition,
+  ///     resulting in more diverse output.
+  ///
+  ///     > Note: While both `frequencyPenalty` and `presencePenalty` discourage repetition,
+  ///     > `frequencyPenalty` increases the penalty for *each* repetition of a word/phrase, whereas
+  ///     > `presencePenalty` applies the same penalty regardless of how many times the word/phrase
+  ///     > has already appeared.
+  ///
+  ///     > Important: The range of supported `frequencyPenalty` values depends on the model; see
+  ///     > the
+  ///     > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
+  ///     > for more details
+  ///   - stopSequences: A set of up to 5 `String`s that will stop output generation. If specified,
+  ///     the API will stop at the first appearance of a stop sequence. The stop sequence will not
+  ///     be included as part of the response. See the
+  ///     [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
+  ///     for more details.
+  ///   - responseMIMEType: Output response MIME type of the generated candidate text.
+  ///
+  ///     Supported MIME types:
+  ///     - `text/plain`: Text output; the default behavior if unspecified.
+  ///     - `application/json`: JSON response in the candidates.
+  ///     - `text/x.enum`: For classification tasks, output an enum value as defined in the
+  ///       `responseSchema`.
+  ///   - responseSchema: Output schema of the generated candidate text. If set, a compatible
+  ///     `responseMIMEType` must also be set.
+  ///
+  ///     Compatible MIME types:
+  ///     - `application/json`: Schema for JSON response.
+  ///
+  ///     Refer to the
+  ///     [Control generated output](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/control-generated-output)
+  ///     guide for more details.
   public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil,
               candidateCount: Int? = nil, maxOutputTokens: Int? = nil,
               presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,