Skip to content

Commit

Permalink
core: remove langchain dependency for text splitting
Browse files Browse the repository at this point in the history
  • Loading branch information
Deeptanshu Sankhwar committed Feb 22, 2025
1 parent e91e527 commit 9fd2a19
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 7 deletions.
1 change: 0 additions & 1 deletion packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@
"handlebars": "^4.7.8",
"js-sha1": "0.7.0",
"js-tiktoken": "1.0.15",
"langchain": "0.3.6",
"ollama-ai-provider": "0.16.1",
"openai": "4.82.0",
"pino": "^9.6.0",
Expand Down
20 changes: 14 additions & 6 deletions packages/core/src/generation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import { createMistral } from "@ai-sdk/mistral";
import { createGroq } from "@ai-sdk/groq";
import { createOpenAI } from "@ai-sdk/openai";
import { bedrock } from "@ai-sdk/amazon-bedrock";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import {
generateObject as aiGenerateObject,
generateText as aiGenerateText,
Expand Down Expand Up @@ -1380,12 +1379,8 @@ export async function splitChunks(
): Promise<string[]> {
elizaLogger.debug(`[splitChunks] Starting text split`);

const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: Number(chunkSize),
chunkOverlap: Number(bleed),
});
const chunks = splitText(content, chunkSize, bleed);

const chunks = await textSplitter.splitText(content);
elizaLogger.debug(`[splitChunks] Split complete:`, {
numberOfChunks: chunks.length,
averageChunkSize:
Expand All @@ -1396,6 +1391,19 @@ export async function splitChunks(
return chunks;
}

export function splitText(content: string, chunkSize: number, bleed: number): string[] {
const chunks: string[] = [];
let start = 0;

while (start < content.length) {
const end = Math.min(start + chunkSize, content.length);
chunks.push(content.substring(start, end));
start = end - bleed; // Apply overlap
}

return chunks;
}

/**
* Sends a message to the model and parses the response as a boolean value
* @param opts - The options for the generateText request
Expand Down

0 comments on commit 9fd2a19

Please # to comment.