This repository has been archived by the owner on Feb 27, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add llm support for 0.2 modules along with tests
Signed-off-by: Ryan Levick <ryan.levick@fermyon.com>
- Loading branch information
Showing
13 changed files
with
348 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
use std::collections::HashMap; | ||
|
||
use anyhow::{ensure, Result}; | ||
use async_trait::async_trait; | ||
use serde::Serialize; | ||
|
||
use crate::llm; | ||
|
||
/// Report of which key-value functions a module successfully used, if any | ||
#[derive(Serialize, PartialEq, Eq, Debug)] | ||
pub struct LlmReport { | ||
pub infer: Result<(), String>, | ||
} | ||
|
||
#[derive(Default)] | ||
pub struct Llm { | ||
inferences: HashMap<(String, String), String>, | ||
embeddings: HashMap<(String, Vec<String>), Vec<Vec<f32>>>, | ||
} | ||
|
||
#[async_trait] | ||
impl llm::Host for Llm { | ||
async fn infer( | ||
&mut self, | ||
model: llm::InferencingModel, | ||
prompt: String, | ||
_params: Option<llm::InferencingParams>, | ||
) -> wasmtime::Result<Result<llm::InferencingResult, llm::Error>> { | ||
Ok(self | ||
.inferences | ||
.remove(&(model, prompt.clone())) | ||
.map(|r| llm::InferencingResult { | ||
text: r, | ||
usage: llm::InferencingUsage { | ||
prompt_token_count: 0, | ||
generated_token_count: 0, | ||
}, | ||
}) | ||
.ok_or_else(|| { | ||
llm::Error::RuntimeError(format!( | ||
"expected {:?}, got {:?}", | ||
self.inferences.keys(), | ||
prompt | ||
)) | ||
})) | ||
} | ||
|
||
async fn generate_embeddings( | ||
&mut self, | ||
model: llm::EmbeddingModel, | ||
text: Vec<String>, | ||
) -> wasmtime::Result<Result<llm::EmbeddingsResult, llm::Error>> { | ||
Ok(self | ||
.embeddings | ||
.remove(&(model, text.clone())) | ||
.map(|r| llm::EmbeddingsResult { | ||
embeddings: r, | ||
usage: llm::EmbeddingsUsage { | ||
prompt_token_count: 0, | ||
}, | ||
}) | ||
.ok_or_else(|| { | ||
llm::Error::RuntimeError(format!( | ||
"expected {:?}, got {:?}", | ||
self.embeddings.keys(), | ||
text | ||
)) | ||
})) | ||
} | ||
} | ||
|
||
pub(crate) async fn test( | ||
engine: &wasmtime::Engine, | ||
test_config: crate::TestConfig, | ||
pre: &wasmtime::component::InstancePre<crate::Context>, | ||
) -> Result<LlmReport> { | ||
Ok(LlmReport { | ||
infer: { | ||
let mut store = | ||
crate::create_store_with_context(engine, test_config.clone(), |context| { | ||
context | ||
.llm | ||
.inferences | ||
.insert(("model".into(), "Say hello".into()), "hello".into()); | ||
}); | ||
|
||
crate::run_command( | ||
&mut store, | ||
pre, | ||
&["llm-infer", "model", "Say hello"], | ||
|store| { | ||
ensure!( | ||
store.data().llm.inferences.is_empty(), | ||
"expected module to call `llm::infer` exactly once" | ||
); | ||
|
||
Ok(()) | ||
}, | ||
) | ||
.await | ||
}, | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
// A WASI interface dedicated to performing inferencing for Large Language Models. | ||
interface llm { | ||
/// A Large Language Model. | ||
type inferencing-model = string | ||
|
||
/// Inference request parameters | ||
record inferencing-params { | ||
/// The maximum tokens that should be inferred. | ||
/// | ||
/// Note: the backing implementation may return less tokens. | ||
max-tokens: u32, | ||
/// The amount the model should avoid repeating tokens. | ||
repeat-penalty: float32, | ||
/// The number of tokens the model should apply the repeat penalty to. | ||
repeat-penalty-last-n-token-count: u32, | ||
/// The randomness with which the next token is selected. | ||
temperature: float32, | ||
/// The number of possible next tokens the model will choose from. | ||
top-k: u32, | ||
/// The probability total of next tokens the model will choose from. | ||
top-p: float32 | ||
} | ||
|
||
/// The set of errors which may be raised by functions in this interface | ||
variant error { | ||
model-not-supported, | ||
runtime-error(string), | ||
invalid-input(string) | ||
} | ||
|
||
/// An inferencing result | ||
record inferencing-result { | ||
/// The text generated by the model | ||
// TODO: this should be a stream | ||
text: string, | ||
/// Usage information about the inferencing request | ||
usage: inferencing-usage | ||
} | ||
|
||
/// Usage information related to the inferencing result | ||
record inferencing-usage { | ||
/// Number of tokens in the prompt | ||
prompt-token-count: u32, | ||
/// Number of tokens generated by the inferencing operation | ||
generated-token-count: u32 | ||
} | ||
|
||
/// Perform inferencing using the provided model and prompt with the given optional params | ||
infer: func(model: inferencing-model, prompt: string, params: option<inferencing-params>) -> result<inferencing-result, error> | ||
|
||
/// The model used for generating embeddings | ||
type embedding-model = string | ||
|
||
/// Generate embeddings for the supplied list of text | ||
generate-embeddings: func(model: embedding-model, text: list<string>) -> result<embeddings-result, error> | ||
|
||
/// Result of generating embeddings | ||
record embeddings-result { | ||
/// The embeddings generated by the request | ||
embeddings: list<list<float32>>, | ||
/// Usage related to the embeddings generation request | ||
usage: embeddings-usage | ||
} | ||
|
||
/// Usage related to an embeddings generation request | ||
record embeddings-usage { | ||
/// Number of tokens in the prompt | ||
prompt-token-count: u32, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ world reactor { | |
import redis | ||
import key-value | ||
import http | ||
import llm | ||
export inbound-http | ||
export inbound-redis | ||
} |
Oops, something went wrong.