@@ -18,109 +18,127 @@ import Foundation
18
18
/// requests to the backend model.
19
19
@available ( iOS 15 . 0 , macOS 12 . 0 , macCatalyst 15 . 0 , tvOS 15 . 0 , watchOS 8 . 0 , * )
20
20
public struct GenerationConfig {
21
- /// A parameter controlling the degree of randomness in token selection. A
22
- /// temperature of zero is deterministic, always choosing the
23
- /// highest-probability response. Typical values are between 0 and 1
24
- /// inclusive. Defaults to 0 if unspecified.
25
- public let temperature : Float ?
21
+ /// Controls the degree of randomness in token selection.
22
+ let temperature : Float ?
26
23
27
- /// The `topP` parameter changes how the model selects tokens for output.
28
- /// Tokens are selected from the most to least probable until the sum of
29
- /// their probabilities equals the `topP` value. For example, if tokens A, B,
30
- /// and C have probabilities of 0.3, 0.2, and 0.1 respectively and the topP
31
- /// value is 0.5, then the model will select either A or B as the next token
32
- /// by using the `temperature` and exclude C as a candidate.
33
- /// Defaults to 0.95 if unset.
34
- public let topP : Float ?
24
+ /// Controls diversity of generated text.
25
+ let topP : Float ?
35
26
36
- /// The `topK` parameter changes how the model selects tokens for output. A
37
- /// `topK` of 1 means the selected token is the most probable among all the
38
- /// tokens in the model's vocabulary, while a `topK` of 3 means that the next
39
- /// token is selected from among the 3 most probable using the `temperature`.
40
- /// For each token selection step, the `topK` tokens with the highest
41
- /// probabilities are sampled. Tokens are then further filtered based on
42
- /// `topP` with the final token selected using `temperature` sampling.
43
- /// Defaults to 40 if unspecified.
44
- public let topK : Int ?
27
+ /// Limits the number of highest probability words considered.
28
+ let topK : Int ?
45
29
46
- /// The maximum number of generated response messages to return. This value
47
- /// must be between [1, 8], inclusive. If unset, this will default to 1.
48
- ///
49
- /// - Note: Only unique candidates are returned. Higher temperatures are more
50
- /// likely to produce unique candidates. Setting `temperature` to 0 will
51
- /// always produce exactly one candidate regardless of the
52
- /// `candidateCount`.
53
- public let candidateCount : Int ?
30
+ /// The number of response variations to return.
31
+ let candidateCount : Int ?
54
32
55
- /// Specifies the maximum number of tokens that can be generated in the
56
- /// response. The number of tokens per word varies depending on the
57
- /// language outputted. The maximum value is capped at 1024. Defaults to 0
58
- /// (unbounded).
59
- public let maxOutputTokens : Int ?
33
+ /// Maximum number of tokens that can be generated in the response.
34
+ let maxOutputTokens : Int ?
60
35
61
36
/// Controls the likelihood of repeating the same words or phrases already generated in the text.
62
- ///
63
- /// Higher values increase the penalty of repetition, resulting in more diverse output. The
64
- /// maximum value for `presencePenalty` is up to, but not including, `2.0`; the minimum value is
65
- /// `-2.0`.
66
- ///
67
- /// > Note: While both `presencePenalty` and ``frequencyPenalty`` discourage repetition,
68
- /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase has
69
- /// > already appeared, whereas `frequencyPenalty` increases the penalty for *each* repetition of
70
- /// > a word/phrase.
71
- ///
72
- /// > Important: Supported by `gemini-1.5-pro-002` and` gemini-1.5-flash-002` only.
73
- public let presencePenalty : Float ?
37
+ let presencePenalty : Float ?
74
38
75
39
/// Controls the likelihood of repeating words, with the penalty increasing for each repetition.
76
- ///
77
- /// Higher values increase the penalty of repetition, resulting in more diverse output. The
78
- /// maximum value for `frequencyPenalty` is up to, but not including, `2.0`; the minimum value is
79
- /// `-2.0`.
80
- ///
81
- /// > Note: While both `frequencyPenalty` and ``presencePenalty`` discourage repetition,
82
- /// > `frequencyPenalty` increases the penalty for *each* repetition of a word/phrase, whereas
83
- /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase has
84
- /// > already appeared.
85
- ///
86
- /// > Important: Supported by `gemini-1.5-pro-002` and` gemini-1.5-flash-002` only.
87
- public let frequencyPenalty : Float ?
40
+ let frequencyPenalty : Float ?
88
41
89
- /// A set of up to 5 `String`s that will stop output generation. If
90
- /// specified, the API will stop at the first appearance of a stop sequence.
91
- /// The stop sequence will not be included as part of the response.
92
- public let stopSequences : [ String ] ?
42
+ /// A set of up to 5 `String`s that will stop output generation.
43
+ let stopSequences : [ String ] ?
93
44
94
45
/// Output response MIME type of the generated candidate text.
95
- ///
96
- /// Supported MIME types:
97
- /// - `text/plain`: Text output; the default behavior if unspecified.
98
- /// - `application/json`: JSON response in the candidates.
99
- public let responseMIMEType : String ?
46
+ let responseMIMEType : String ?
100
47
101
48
/// Output schema of the generated candidate text.
102
- /// If set, a compatible ``responseMIMEType`` must also be set.
103
- ///
104
- /// Compatible MIME types:
105
- /// - `application/json`: Schema for JSON response.
106
- ///
107
- /// Refer to the [Control generated
108
- /// output](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/control-generated-output)
109
- /// guide for more details.
110
- public let responseSchema : Schema ?
49
+ let responseSchema : Schema ?
111
50
112
51
/// Creates a new `GenerationConfig` value.
113
52
///
114
- /// - Parameter temperature: See ``temperature``
115
- /// - Parameter topP: See ``topP``
116
- /// - Parameter topK: See ``topK``
117
- /// - Parameter candidateCount: See ``candidateCount``
118
- /// - Parameter maxOutputTokens: See ``maxOutputTokens``
119
- /// - Parameter presencePenalty: See ``presencePenalty``
120
- /// - Parameter frequencyPenalty: See ``frequencyPenalty``
121
- /// - Parameter stopSequences: See ``stopSequences``
122
- /// - Parameter responseMIMEType: See ``responseMIMEType``
123
- /// - Parameter responseSchema: See ``responseSchema``
53
+ /// See the
54
+ /// [Configure model parameters](https://firebase.google.com/docs/vertex-ai/model-parameters)
55
+ /// guide and the
56
+ /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
57
+ /// for more details.
58
+ ///
59
+ /// - Parameters:
60
+ /// - temperature:Controls the randomness of the language model's output. Higher values (for
61
+ /// example, 1.0) make the text more random and creative, while lower values (for example,
62
+ /// 0.1) make it more focused and deterministic.
63
+ ///
64
+ /// > Note: A temperature of 0 means that the highest probability tokens are always selected.
65
+ /// > In this case, responses for a given prompt are mostly deterministic, but a small amount
66
+ /// > of variation is still possible.
67
+ ///
68
+ /// > Important: The range of supported temperature values depends on the model; see the
69
+ /// > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
70
+ /// > for more details.
71
+ /// - topP: Controls diversity of generated text. Higher values (e.g., 0.9) produce more diverse
72
+ /// text, while lower values (e.g., 0.5) make the output more focused.
73
+ ///
74
+ /// The supported range is 0.0 to 1.0.
75
+ ///
76
+ /// > Important: The default `topP` value depends on the model; see the
77
+ /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
78
+ /// for more details.
79
+ /// - topK: Limits the number of highest probability words the model considers when generating
80
+ /// text. For example, a topK of 40 means only the 40 most likely words are considered for the
81
+ /// next token. A higher value increases diversity, while a lower value makes the output more
82
+ /// deterministic.
83
+ ///
84
+ /// The supported range is 1 to 40.
85
+ ///
86
+ /// > Important: Support for `topK` and the default value depends on the model; see the
87
+ /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
88
+ /// for more details.
89
+ /// - candidateCount: The number of response variations to return; defaults to 1 if not set.
90
+ /// Support for multiple candidates depends on the model; see the
91
+ /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
92
+ /// for more details.
93
+ /// - maxOutputTokens: Maximum number of tokens that can be generated in the response.
94
+ /// See the configure model parameters [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#max-output-tokens)
95
+ /// for more details.
96
+ /// - presencePenalty: Controls the likelihood of repeating the same words or phrases already
97
+ /// generated in the text. Higher values increase the penalty of repetition, resulting in more
98
+ /// diverse output.
99
+ ///
100
+ /// > Note: While both `presencePenalty` and `frequencyPenalty` discourage repetition,
101
+ /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase
102
+ /// > has already appeared, whereas `frequencyPenalty` increases the penalty for *each*
103
+ /// > repetition of a word/phrase.
104
+ ///
105
+ /// > Important: The range of supported `presencePenalty` values depends on the model; see the
106
+ /// > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
107
+ /// > for more details
108
+ /// - frequencyPenalty: Controls the likelihood of repeating words or phrases, with the penalty
109
+ /// increasing for each repetition. Higher values increase the penalty of repetition,
110
+ /// resulting in more diverse output.
111
+ ///
112
+ /// > Note: While both `frequencyPenalty` and `presencePenalty` discourage repetition,
113
+ /// > `frequencyPenalty` increases the penalty for *each* repetition of a word/phrase, whereas
114
+ /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase
115
+ /// > has already appeared.
116
+ ///
117
+ /// > Important: The range of supported `frequencyPenalty` values depends on the model; see
118
+ /// > the
119
+ /// > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
120
+ /// > for more details
121
+ /// - stopSequences: A set of up to 5 `String`s that will stop output generation. If specified,
122
+ /// the API will stop at the first appearance of a stop sequence. The stop sequence will not
123
+ /// be included as part of the response. See the
124
+ /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
125
+ /// for more details.
126
+ /// - responseMIMEType: Output response MIME type of the generated candidate text.
127
+ ///
128
+ /// Supported MIME types:
129
+ /// - `text/plain`: Text output; the default behavior if unspecified.
130
+ /// - `application/json`: JSON response in the candidates.
131
+ /// - `text/x.enum`: For classification tasks, output an enum value as defined in the
132
+ /// `responseSchema`.
133
+ /// - responseSchema: Output schema of the generated candidate text. If set, a compatible
134
+ /// `responseMIMEType` must also be set.
135
+ ///
136
+ /// Compatible MIME types:
137
+ /// - `application/json`: Schema for JSON response.
138
+ ///
139
+ /// Refer to the
140
+ /// [Control generated output](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/control-generated-output)
141
+ /// guide for more details.
124
142
public init ( temperature: Float ? = nil , topP: Float ? = nil , topK: Int ? = nil ,
125
143
candidateCount: Int ? = nil , maxOutputTokens: Int ? = nil ,
126
144
presencePenalty: Float ? = nil , frequencyPenalty: Float ? = nil ,
0 commit comments