Skip to content

Commit 2e0297d

Browse files
committed
fix gold pipeline loader
1 parent ada4cdc commit 2e0297d

File tree

1 file changed

+26
-20
lines changed

1 file changed

+26
-20
lines changed

src/pipeline_defaults.jl

+26-20
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,6 @@ let MODEL_CHAT = MODEL_CHAT, MODEL_EMBEDDING = MODEL_EMBEDDING,
4040
generator_kwargs = (;
4141
answerer_kwargs = (;
4242
model = MODEL_CHAT),
43-
embedder_kwargs = (;
44-
truncate_dimension = EMBEDDING_DIMENSION,
45-
model = MODEL_EMBEDDING),
4643
refiner_kwargs = (;
4744
model = MODEL_CHAT))))
4845
## Silver - reranking added
@@ -65,9 +62,6 @@ let MODEL_CHAT = MODEL_CHAT, MODEL_EMBEDDING = MODEL_EMBEDDING,
6562
generator_kwargs = (;
6663
answerer_kwargs = (;
6764
model = MODEL_CHAT),
68-
embedder_kwargs = (;
69-
truncate_dimension = EMBEDDING_DIMENSION,
70-
model = MODEL_EMBEDDING),
7165
refiner_kwargs = (;
7266
model = MODEL_CHAT))))
7367
## Gold - reranking + web-search
@@ -84,14 +78,13 @@ let MODEL_CHAT = MODEL_CHAT, MODEL_EMBEDDING = MODEL_EMBEDDING,
8478
rephraser_kwargs = (;
8579
model = MODEL_CHAT),
8680
embedder_kwargs = (;
81+
truncate_dimension = nothing,
8782
model = MODEL_EMBEDDING),
8883
tagger_kwargs = (;
8984
model = MODEL_CHAT)),
9085
generator_kwargs = (;
9186
answerer_kwargs = (;
9287
model = MODEL_CHAT),
93-
embedder_kwargs = (;
94-
model = MODEL_EMBEDDING),
9588
refiner_kwargs = (;
9689
model = MODEL_CHAT))))
9790
end
@@ -119,7 +112,7 @@ See available pipeline options via `keys(RAG_CONFIGURATIONS)`.
119112
120113
Logic:
121114
- Updates the global `MODEL_CHAT` and `MODEL_EMBEDDING` to the requested models.
122-
- Update the global `EMBEDDING_DIMENSION` for the requested embedding dimensionality after truncation (`embedding_dimension`).
115+
- Updates the global `EMBEDDING_DIMENSION` for the requested embedding dimensionality after truncation (`embedding_dimension`).
123116
- Updates the global `RAG_CONFIG` and `RAG_KWARGS` to the requested `option`.
124117
- Updates the global `LOADED_CONFIG_KEY` to the configuration key for the given `option` and `kwargs` (used by the artifact system to download the correct knowledge packs).
125118
@@ -139,31 +132,34 @@ update_pipeline!(:bronze; model_chat = "llama3", model_embedding="nomic-embed-te
139132
load_index!()
140133
```
141134
"""
142-
function update_pipeline!(option::Symbol = :bronze; model_chat = MODEL_CHAT,
143-
model_embedding = MODEL_EMBEDDING, verbose::Bool = true,
144-
embedding_dimension::Integer = EMBEDDING_DIMENSION)
135+
function update_pipeline!(
136+
option::Symbol = :bronze; model_chat::Union{String, Nothing} = nothing,
137+
model_embedding::Union{String, Nothing} = nothing, verbose::Bool = true,
138+
embedding_dimension::Union{Integer, Nothing} = nothing)
145139
global RAG_CONFIGURATIONS, RAG_CONFIG, RAG_KWARGS, MODEL_CHAT, MODEL_EMBEDDING, EMBEDDING_DIMENSION, LOADED_CONFIG_KEY
146140

141+
## Set from globals if not provided
142+
model_chat = !isnothing(model_chat) ? model_chat : MODEL_CHAT
143+
model_embedding = !isnothing(model_embedding) ? model_embedding : MODEL_EMBEDDING
144+
## Do not set embedding dimensions, we might need to extract it from kwargs
145+
147146
## WARN about limited support for nomic-embed-text -- we need to create repeatable process
148147
if model_embedding == "nomic-embed-text"
149148
@warn "Knowledge packs for `nomic-embed-text` are currently not built automatically, so they might be missing / outdated. Please switch to OpenAI `text-embedding-3-large` for the best experience."
150149
end
151150

152151
@assert haskey(RAG_CONFIGURATIONS, option) "Invalid option: $option. Select one of: $(join(keys(RAG_CONFIGURATIONS),", "))"
153-
@assert embedding_dimension in [0, 1024, 3072] "Invalid embedding_dimension: $(embedding_dimension). Supported: 0, 1024, 3072. See the available artifacts."
152+
@assert (isnothing(embedding_dimension)||embedding_dimension in [0, 1024]) "Invalid embedding_dimension: $(embedding_dimension). Supported: 0 (no truncation) or 1024. See the available artifacts."
154153
## Model-specific checks, they do not fail but at least warn
155-
if model_embedding == "nomic-embed-text" && !iszero(embedding_dimension)
154+
if model_embedding == "nomic-embed-text" &&
155+
!(iszero(embedding_dimension) || isnothing(embedding_dimension))
156156
@warn "Invalid configuration for knowledge packs! For `nomic-embed-text`, `embedding_dimension` must be 0. See the available artifacts."
157157
end
158-
if model_embedding == "text-embedding-3-large" && embedding_dimension [1024, 0]
158+
if model_embedding == "text-embedding-3-large" &&
159+
(embedding_dimension [1024, 0] || !isnothing(embedding_dimension))
159160
@warn "Invalid configuration for knowledge packs! For `text-embedding-3-large`, `embedding_dimension` must be 0 or 1024. See the available artifacts."
160161
end
161162

162-
## Update model references
163-
MODEL_CHAT = model_chat
164-
MODEL_EMBEDDING = model_embedding
165-
EMBEDDING_DIMENSION = embedding_dimension
166-
167163
config = RAG_CONFIGURATIONS[option][:config]
168164
kwargs = RAG_CONFIGURATIONS[option][:kwargs]
169165
# update models in kwargs to the ones requested
@@ -179,8 +175,18 @@ function update_pipeline!(option::Symbol = :bronze; model_chat = MODEL_CHAT,
179175
if !isnothing(embedding_dimension)
180176
kwargs = setpropertynested(
181177
kwargs, [:embedder_kwargs], :truncate_dimension, embedding_dimension)
178+
else
179+
## load the value from defaults in config -- to match the artifacts
180+
val = getpropertynested(kwargs, [:embedder_kwargs], :truncate_dimension, nothing)
181+
embedding_dimension = isnothing(val) ? 0 : val
182182
end
183183

184+
## Update GLOBAL variables
185+
MODEL_CHAT = model_chat
186+
MODEL_EMBEDDING = model_embedding
187+
@info embedding_dimension
188+
EMBEDDING_DIMENSION = embedding_dimension
189+
184190
## Set the options
185191
config_key = get_config_key(config, kwargs)
186192
## detect significant changes

0 commit comments

Comments
 (0)