-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathazure.js
71 lines (56 loc) · 2.42 KB
/
azure.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import "dotenv/config";
import { createStuffDocumentsChain } from "langchain/chains/combine_documents";
import { YoutubeLoader } from "@langchain/community/document_loaders/web/youtube";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import { ChatPromptTemplate } from "@langchain/core/prompts";
import { AzureChatOpenAI, AzureOpenAIEmbeddings } from "@langchain/openai";
import { AzureAISearchVectorStore } from "@langchain/community/vectorstores/azure_aisearch";
const YOUTUBE_VIDEO_URL = "https://www.youtube.com/watch?v=FZhbJZEgKQ4";
const QUESTION = "What are the news about GPT-4 models?";
// Load documents ------------------------------------------------------------
console.log("Loading documents...");
const loader = YoutubeLoader.createFromUrl(YOUTUBE_VIDEO_URL, {
language: "en",
addVideoInfo: true,
});
const rawDocuments = await loader.load();
const splitter = new RecursiveCharacterTextSplitter({
chunkSize: 1500,
chunkOverlap: 200,
});
const documents = await splitter.splitDocuments(rawDocuments);
// Init models and DB --------------------------------------------------------
console.log("Initializing models and DB...");
const embeddings = new AzureOpenAIEmbeddings();
const model = new AzureChatOpenAI();
const vectorStore = new AzureAISearchVectorStore(embeddings, {});
// Search if documents already exist for the source video
const videoId = YOUTUBE_VIDEO_URL.split("v=")[1];
const indexedDocuments = await vectorStore.similaritySearch("*", 1, {
filterExpression: `metadata/source eq '${videoId}'`,
});
if (indexedDocuments.length === 0) {
console.log("Embedding documents...");
await vectorStore.addDocuments(documents);
}
// Run the chain -------------------------------------------------------------
console.log("Running the chain...");
const questionAnsweringPrompt = ChatPromptTemplate.fromMessages([
["system", "Answer the user's question using only the sources below:\n\n{context}"],
["human", "{input}"],
]);
const retriever = vectorStore.asRetriever()
const ragChain = await createStuffDocumentsChain({
prompt: questionAnsweringPrompt,
llm: model,
});
const stream = await ragChain.stream({
input: QUESTION,
context: await retriever.invoke(QUESTION)
});
// Print the result ----------------------------------------------------------
console.log(`Answer for the question "${QUESTION}":\n`);
for await (const chunk of stream) {
process.stdout.write(chunk ?? "");
}
console.log();