-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluate.py
118 lines (100 loc) · 4.12 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.chains import RetrievalQA, LLMChain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from config import Configuration
def evaluate(documents, eval_datasets, config: Configuration):
results = []
llm = load_llm(config)
emb_model = load_emb_model(config)
for dataset in eval_datasets:
query = dataset.query
answer = dataset.answer
text_splitter = CharacterTextSplitter(
chunk_size=config.chunk_size,
chunk_overlap=config.chunk_overlap,
separator="\n")
texts = text_splitter.split_documents(documents)
retriever = FAISS.from_documents(
texts,
emb_model,
).as_retriever(search_kwargs={"k": config.top_k_chunk})
if config.chain_type == "stuff":
prompt = PromptTemplate(
template=config.retrieve_chain_template,
input_variables=["context", "question"],
)
chain_type_kwargs = {"prompt": prompt}
else:
# TODO: implement other chain types
chain_type_kwargs = {}
qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type=config.chain_type,
retriever=retriever,
chain_type_kwargs=chain_type_kwargs,
verbose=True)
response = qa(query)['result']
results.append({
"query": query,
"answer": answer,
"response": response,
"correct": match_response(response, answer, config),
})
return results
def match_response(response, answer, config: Configuration):
llm = ChatOpenAI(temperature=0)
match_query = config.match_template.format(response=response, answer=answer)
prompt = ChatPromptTemplate.from_messages([
HumanMessagePromptTemplate.from_template("{input}")
])
chain = LLMChain(
llm=llm,
prompt=prompt,
verbose=True)
res = chain.run(match_query)
return res.strip() == "OK"
def load_llm(config: Configuration):
if config.llm_model == Configuration.LlmModel.gpt_3_turbo:
return ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", verbose=config.verbose)
if config.llm_model == Configuration.LlmModel.gpt_4:
return ChatOpenAI(temperature=0, model_name="gpt-4", verbose=config.verbose)
elif config.llm_model == Configuration.LlmModel.google_flan_t5_xl:
from langchain import HuggingFaceHub
repo_id = "google/flan-t5-xl"
return HuggingFaceHub(
repo_id=repo_id,
model_kwargs={"temperature": 0, "max_length": 2000}
)
elif config.llm_model == Configuration.LlmModel.google_flan_t5_large:
from langchain import HuggingFaceHub
repo_id = "google/flan-t5-large"
return HuggingFaceHub(
repo_id=repo_id,
model_kwargs={"temperature": 0, "max_length": 2000},
verbose=config.verbose,
)
elif config.llm_model == Configuration.LlmModel.dolly_v2_3b:
import torch
from transformers import pipeline
model = pipeline(
model="databricks/dolly-v2-3b",
torch_dtype=torch.bfloat16,
trust_remote_code=True,
device_map="auto",)
# TODO: This won't work. Wrap in a class
return model
else:
raise ValueError(f"Unknown LLM model {config.llm_model}")
def load_emb_model(config: Configuration):
if config.embedding_model == Configuration.EmbeddingModel.gpt_embedding:
return OpenAIEmbeddings()
elif config.embedding_model == Configuration.EmbeddingModel.flan_embedding:
raise NotImplementedError()
elif config.embedding_model == Configuration.EmbeddingModel.huggingface_embedding:
return HuggingFaceEmbeddings()
else:
raise ValueError(f"Unknown embedding model {config.embedding_model}")