Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

feat: generic vectorize files tool utility #3

Merged
merged 2 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions codellm/src/tool/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,5 @@ export const initTools = async (config: Config): Promise<Tools> => {

return tools;
};

export * from './utils/index.js';
5 changes: 0 additions & 5 deletions codellm/src/tool/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,3 @@ export type Tool = {
description: ToolDescription;
};
export type Tools = Record<string, Tool>;

export type VectorDbToolConfig = {
vectorDbName: string;
vectorDbCollectionName: string;
};
1 change: 1 addition & 0 deletions codellm/src/tool/utils/index.ts
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
export { processFiles } from './processFiles/index.js';
export * as vectorizeFiles from './vectorizeFiles/index.js';
1 change: 1 addition & 0 deletions codellm/src/tool/utils/types.ts
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
export * from './processFiles/types.js';
export * from './vectorizeFiles/types.js';
44 changes: 44 additions & 0 deletions codellm/src/tool/utils/vectorizeFiles/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import type {
Config,
VectorDbQueryOpts,
VectorizeFilesPrompts,
VectorizeFilesToolConfig,
} from '@/.';

import { log, vectorDb } from '@/index.js';
import { vectorizeFiles } from './vectorizeFiles.js';

export const newClient = async (
toolName: string,
config: Config,
toolConfig: VectorizeFilesToolConfig,
) => {
const { vectorDbName, vectorDbCollectionName } = toolConfig;
const dbClient = await vectorDb.newClient(vectorDbName, config);
await dbClient.init([vectorDbCollectionName]);

return {
vectorizeFiles: (prompts: VectorizeFilesPrompts) => {
return vectorizeFiles({
config,
dbClient,
prompts,
toolConfig,
toolName,
});
},
query: (opts: VectorDbQueryOpts) => {
log(`${toolName} running`, 'debug', {
vectorDbCollectionName,
opts,
});

//TODO: Validate params

return dbClient.query({
collectionName: vectorDbCollectionName,
opts,
});
},
};
};
31 changes: 31 additions & 0 deletions codellm/src/tool/utils/vectorizeFiles/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import type {
Config,
LlmClient,
ProcessFileHandleParams,
ProcessFilesToolConfig,
VectorDbClient,
} from '@/.';

export type VectorizeFilesToolConfig = ProcessFilesToolConfig & {
vectorDbName: string;
vectorDbCollectionName: string;
};

export type VectorizeFilesPrompts = {
summarize: string;
};

export type VectorizeFilesParams = {
config: Config;
dbClient: VectorDbClient;
prompts: VectorizeFilesPrompts;
toolConfig: VectorizeFilesToolConfig;
toolName: string;
};

export type VectorizeFileParams = ProcessFileHandleParams & {
collectionName: string;
dbClient: VectorDbClient;
llm: LlmClient;
prompt: string;
};
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,11 @@ import type {
LlmClient,
ProcessFileHandleParams,
VectorDbAddDocumentsParams,
} from '@interrobangc/codellm';
import type { HandleFileParams, RunImportParams } from './types.js';
VectorizeFileParams,
VectorizeFilesParams,
} from '@/.';

import { llm as codeLlmLlm, log, toolUtils } from '@interrobangc/codellm';
import {
vectorDbCollectionName as collectionName,
summarizeTaskPrompt,
} from './constants.js';
import { llm as codeLlmLlm, log, toolUtils } from '@/index.js';

/**
* Summarize the code using the summarize LLM
Expand All @@ -21,11 +18,15 @@ import {
*
* @throws - If there is an error summarizing the code
*/
export const summarize = async (llm: LlmClient, code: string) => {
export const summarize = async (
llm: LlmClient,
prompt: string,
code: string,
) => {
return llm.prompt({
system: '',
prompt: `
${summarizeTaskPrompt}
${prompt}
${code}
`,
});
Expand All @@ -38,14 +39,16 @@ export const summarize = async (llm: LlmClient, code: string) => {
* @param llm - The LLM to use for summarization
* @param path - The path to the file to handle
*/
export const handleFile = async ({
export const vectorizeFile = async ({
dbClient,
llm,
fileContent,
fileContentHash,
filePath,
filePathHash,
}: HandleFileParams) => {
collectionName,
prompt,
}: VectorizeFileParams) => {
// TODO: dynamic for different passes in a single run
const id = `codeSummary:${filePath}`;

Expand All @@ -66,7 +69,11 @@ export const handleFile = async ({
return;
}

const response = await summarize(llm, `file: ${filePath}\n\n${fileContent}`);
const response = await summarize(
llm,
prompt,
`file: ${filePath}\n\n${fileContent}`,
);

const document: VectorDbAddDocumentsParams = {
collectionName,
Expand All @@ -87,25 +94,15 @@ export const handleFile = async ({
await dbClient.addDocuments(document);
};

/**
* Run the import for the codeSummaryQuery tool
*
* @param params Object - The parameters for the import
* @param params.config - The codellm configuration
* @param params.toolConfig - The tool configuration
* @param params.vectorDb - The vector database client to use
*
* @returns - The result of the import
*
* @throws - If there is an error running the import
*/
export const runImport = async ({
export const vectorizeFiles = async ({
config,
toolConfig,
dbClient,
}: RunImportParams) => {
prompts,
toolConfig,
toolName,
}: VectorizeFilesParams): Promise<void> => {
const llms = await codeLlmLlm.initLlms(config, ['summarize']);
log('codeSummaryQuery runImport LLMs', 'silly', { llms });
log(`${toolName} runImport LLMs`, 'silly', { llms });
const llm = llms.summarize;

if (!llm) {
Expand All @@ -116,19 +113,17 @@ export const runImport = async ({
const { include, exclude } = toolConfig;

await toolUtils.processFiles({
toolName: 'codeSummaryQuery',
toolName,
path,
include,
exclude,
handle: (params: ProcessFileHandleParams) =>
handleFile({
vectorizeFile({
dbClient,
llm,
collectionName: toolConfig.vectorDbCollectionName,
prompt: prompts.summarize,
...params,
}),
});

return { success: true, content: 'success' };
};

export default runImport;
7 changes: 3 additions & 4 deletions codellm/src/vectorDb/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ export type VectorDbQueryParams = {
};

export type VectorDbQueryResultItem = EmbeddingDocument & {
distances: number[][] | null;
distance: number | null;
};

export type VectorDbQueryResult = VectorDbQueryResultItem[];

export type VectorDbGetParams = {
Expand All @@ -53,9 +54,7 @@ export type VectorDbGetResult = VectorDbGetResultItem[];
export type VectorDbClient = {
init: (collectionNames: string[]) => Promise<void>;
addDocuments: (params: VectorDbAddDocumentsParams) => Promise<void>;
// query: (params: VectorDbQueryParams) => Promise<VectorDbQueryResult>;
// get: (params: VectorDbGetParams) => Promise<VectorDbGetResult>;
query: (params: VectorDbQueryParams) => Promise<unknown>;
query: (params: VectorDbQueryParams) => Promise<VectorDbQueryResult>;
get: (params: VectorDbGetParams) => Promise<unknown>;
};

Expand Down
54 changes: 38 additions & 16 deletions tools/codeSummaryQuery/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import type { Config, Tool, ToolRunParamsCommon } from '@interrobangc/codellm';
import type {
Config,
Tool,
ToolRunParamsCommon,
ToolRunReturn,
} from '@interrobangc/codellm';
import type { ToolConfig } from './types';

import { vectorDb } from '@interrobangc/codellm';
import { toolUtils } from '@interrobangc/codellm';
import {
DEFAULT_CONFIG,
description,
vectorDbCollectionName,
numResults,
summarizeTaskPrompt,
} from './constants.js';
import run from './run.js';
import runImport from './runImport.js';

/**
* Create a new codeSummaryQuery tool
Expand All @@ -27,19 +31,37 @@ export const newTool = async (
...(config.tools?.[toolName]?.config as Partial<ToolConfig>),
} as ToolConfig;

const { vectorDbName } = toolConfig;

const dbClient = await vectorDb.newClient(vectorDbName, config);
await dbClient.init([vectorDbCollectionName]);
const vectorizeFilesClient = await toolUtils.vectorizeFiles.newClient(
toolName,
config,
toolConfig,
);

return {
run: async (params: ToolRunParamsCommon) =>
run({
...params,
toolConfig,
dbClient,
}),
import: async () => runImport({ config, toolConfig, dbClient }),
run: async ({ params }: ToolRunParamsCommon): Promise<ToolRunReturn> => {
const dbResponse = await vectorizeFilesClient.query({
query: params['query'] as unknown as string,
numResults,
});

const content = JSON.stringify(
// @ts-expect-error - types aren't in place yet
dbResponse.map((d) => ({
path: d.metadata['path'],
summary: d.document,
code: params['includeCode'] ? d.metadata['content'] : undefined,
distances: d.distances,
})),
);

return { success: true, content };
},
import: async () => {
await vectorizeFilesClient.vectorizeFiles({
summarize: summarizeTaskPrompt,
});
return { success: true, content: 'Import complete' };
},
description,
};
};
59 changes: 0 additions & 59 deletions tools/codeSummaryQuery/src/run.ts

This file was deleted.

Loading
Loading