Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

refactor embeddings #254

Merged
merged 3 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions packages/adapter-postgres/seed.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
INSERT INTO public.accounts (id, name, email, avatarUrl, details) VALUES ('00000000-0000-0000-0000-000000000000', 'Default Agent', 'default@agent.com', '', '{}');
INSERT INTO public.rooms (id) VALUES ('00000000-0000-0000-0000-000000000000');
INSERT INTO public.participants (userId, roomId) VALUES ('00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000');

INSERT INTO public.accounts (id, name, email, "avatarUrl", details)
VALUES ('00000000-0000-0000-0000-000000000000', 'Default Agent', 'default@agent.com', '', '{}'::jsonb);

INSERT INTO public.rooms (id)
VALUES ('00000000-0000-0000-0000-000000000000');

INSERT INTO public.participants (id, "userId", "roomId")
VALUES ('00000000-0000-0000-0000-000000000001', '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000');
142 changes: 72 additions & 70 deletions packages/core/src/embedding.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,105 +22,106 @@ function getRootPath() {
return path.resolve(__dirname, "..");
}

/**
* Send a message to the OpenAI API for embedding.
* @param input The input to be embedded.
* @returns The embedding of the input.
*/
export async function embed(runtime: IAgentRuntime, input: string) {
// get the charcter, and handle by model type
const modelProvider = models[runtime.character.modelProvider];
const embeddingModel = modelProvider.model.embedding;

if (
runtime.character.modelProvider !== ModelProviderName.OPENAI &&
runtime.character.modelProvider !== ModelProviderName.OLLAMA &&
!settings.USE_OPENAI_EMBEDDING
) {

// make sure to trim tokens to 8192
const cacheDir = getRootPath() + "/cache/";

// if the cache directory doesn't exist, create it
if (!fs.existsSync(cacheDir)) {
fs.mkdirSync(cacheDir, { recursive: true });
}

const embeddingModel = await FlagEmbedding.init({
cacheDir: cacheDir
});

const trimmedInput = trimTokens(input, 8000, "gpt-4o-mini");

const embedding: number[] = await embeddingModel.queryEmbed(trimmedInput);
console.log("Embedding dimensions: ", embedding.length);
return embedding;

// commented out the text generation service that uses llama
// const service = runtime.getService<ITextGenerationService>(
// ServiceType.TEXT_GENERATION
// );

// const instance = service?.getInstance();

// if (instance) {
// return await instance.getEmbeddingResponse(input);
// }
}

// TODO: Fix retrieveCachedEmbedding
// Check if we already have the embedding in the lore
const cachedEmbedding = await retrieveCachedEmbedding(runtime, input);
if (cachedEmbedding) {
return cachedEmbedding;
}
interface EmbeddingOptions {
model: string;
endpoint: string;
apiKey?: string;
length?: number;
isOllama?: boolean;
}

async function getRemoteEmbedding(input: string, options: EmbeddingOptions): Promise<number[]> {
const requestOptions = {
method: "POST",
headers: {
"Content-Type": "application/json",
// TODO: make this not hardcoded
// TODO: make this not hardcoded
...((runtime.modelProvider !== ModelProviderName.OLLAMA || settings.USE_OPENAI_EMBEDDING) ? {
Authorization: `Bearer ${runtime.token}`,
} : {}),
...(options.apiKey ? {
Authorization: `Bearer ${options.apiKey}`,
} : {}),
},
body: JSON.stringify({
input,
model: embeddingModel,
length: 384, // we are squashing dimensions to 768 for openai, even thought the model supports 1536
// -- this is ok for matryoshka embeddings but longterm, we might want to support 1536
model: options.model,
length: options.length || 384,
}),
};

try {
const response = await fetch(
// TODO: make this not hardcoded
`${runtime.character.modelEndpointOverride || modelProvider.endpoint}${(runtime.character.modelProvider === ModelProviderName.OLLAMA && !settings.USE_OPENAI_EMBEDDING) ? "/v1" : ""}/embeddings`,
`${options.endpoint}${options.isOllama ? "/v1" : ""}/embeddings`,
requestOptions
);

if (!response.ok) {
throw new Error(
"OpenAI API Error: " +
response.status +
" " +
response.statusText
"Embedding API Error: " +
response.status +
" " +
response.statusText
);
}

interface OpenAIEmbeddingResponse {
interface EmbeddingResponse {
data: Array<{ embedding: number[] }>;
}

const data: OpenAIEmbeddingResponse = await response.json();

const data: EmbeddingResponse = await response.json();
return data?.data?.[0].embedding;
} catch (e) {
console.error(e);
throw e;
}
}

/**
* Send a message to the OpenAI API for embedding.
* @param input The input to be embedded.
* @returns The embedding of the input.
*/
export async function embed(runtime: IAgentRuntime, input: string) {
const modelProvider = models[runtime.character.modelProvider];
const embeddingModel = modelProvider.model.embedding;

// Try local embedding first
if (
runtime.character.modelProvider !== ModelProviderName.OPENAI &&
runtime.character.modelProvider !== ModelProviderName.OLLAMA &&
!settings.USE_OPENAI_EMBEDDING
) {
return await getLocalEmbedding(input);
}

// Check cache
const cachedEmbedding = await retrieveCachedEmbedding(runtime, input);
if (cachedEmbedding) {
return cachedEmbedding;
}

// Get remote embedding
return await getRemoteEmbedding(input, {
model: embeddingModel,
endpoint: runtime.character.modelEndpointOverride || modelProvider.endpoint,
apiKey: runtime.token,
isOllama: runtime.character.modelProvider === ModelProviderName.OLLAMA && !settings.USE_OPENAI_EMBEDDING
});
}

async function getLocalEmbedding(input: string): Promise<number[]> {
const cacheDir = getRootPath() + "/cache/";
if (!fs.existsSync(cacheDir)) {
fs.mkdirSync(cacheDir, { recursive: true });
}

const embeddingModel = await FlagEmbedding.init({
cacheDir: cacheDir
});

const trimmedInput = trimTokens(input, 8000, "gpt-4o-mini");
const embedding = await embeddingModel.queryEmbed(trimmedInput);
console.log("Embedding dimensions: ", embedding.length);
return embedding;
}

export async function retrieveCachedEmbedding(
runtime: IAgentRuntime,
input: string
Expand All @@ -129,11 +130,12 @@ export async function retrieveCachedEmbedding(
console.log("No input to retrieve cached embedding for");
return null;
}

const similaritySearchResult =
await runtime.messageManager.getCachedEmbeddings(input);
if (similaritySearchResult.length > 0) {
return similaritySearchResult[0].embedding;
}
return null;
}

Loading