-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
144 lines (108 loc) · 5.77 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from groq import Groq
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
import os
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, BackgroundTasks
from pydantic import BaseModel
import logging
import asyncio
load_dotenv()
TOKEN_LIMIT = 4096
app = FastAPI()
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
client = Groq(
api_key=os.getenv("GROQ_API_KEY")
)
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
persist_directory = "portfolio_db"
db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
class ChatInput(BaseModel):
message: str
class ChatResponse(BaseModel):
response: str
dialog_history = []
async def call_groq_api(messages, model):
try:
chat_completion = await asyncio.to_thread(
client.chat.completions.create,
messages=messages,
model=model
)
return chat_completion.choices[0].message.content
except Exception as e:
logger.error(f"Error calling Groq API: {e}")
raise HTTPException(status_code=500, detail="Error processing request")
async def process_chat(chat_input: ChatInput):
user_input = chat_input.message
docs = db.similarity_search(user_input, k=3)
logger.info(f"Found {len(docs)} documents")
if len(docs) == 0:
return "I'm sorry, I don't have that information. But you can send a message to the email in the contact form."
template = """
You are an exceptional customer support chatbot that gently answers questions.
You know the following context information.
{chunks_formatted}
Answer the following question from a customer. Use only information from the previous context information. Do not invent stuff.
If user asks a question that is not in the context information, answer with "I'm sorry, I don't have that information. But you can send message to email in contact form.".
If user ask in Russian, you can answer in Russian. If user ask in English, you can answer in English.
Question: {query}
Answer:
"""
manager_template = """
You are an exceptional chatbot manager. Your task is to review the text provided by your subordinates. You need to check the grammar and content and correct it to a relevant version.
The happiness of our customers depends on the quality of the text, so make sure the text is clear and does not mix languages. If this happens, you should correct the text to convey the message to the customer clearly.
The text can be either in Russian or in English. If user asks in Russian, you must answer in Russian. If user asks in English, you must answer in English.
We are the technical support for the user, so it is important to communicate with them in a polite tone, with humor, and to provide comprehensive answers to their questions.
Please answer directly to the user's question. Do not invent stuff. Don't tell that you changed the text. Just provide the corrected text.
DON'T USE "Here is the corrected text:" or any other similar phrases. Use only direct text.
Let's think step by step. First, let's check the text for grammar and content. If you see any mistakes, correct them. If the text is correct, you can leave it as it is.
If the text mixes languages, correct it to a relevant version. If you see that a foreign word has been added to a message in Russian, replace it with a Russian word.
If you see that a foreign word has been added to a message in English, replace it with an English word.
You should not mention your actions or what you did, just send the corrected version without any explanations.
Subordinate's text: {text}
Corrected text:
"""
chunks_formatted = "\n".join([f"{i+1}. {doc.page_content}" for i, doc in enumerate(docs)])
message = template.format(chunks_formatted=chunks_formatted, query=user_input)
models = ["gemma-7b-it", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "llama-3.1-405b-reasoning"]
dialog_history.append({
'role': 'user',
'content': message
})
logger.info(f"Send message {message}")
response = await call_groq_api(dialog_history, models[1])
logger.info(f"Received response {response}")
logger.info(f"Send message to critic: {response}")
manager_message = manager_template.format(text=response)
dialog_history.append({
'role': 'user',
'content': manager_message
})
response = await call_groq_api(dialog_history, models[1])
logger.info(f"Received response {response}")
dialog_history.append({
'role': 'assistant',
'content': response
})
if sum(len(message['content']) for message in dialog_history) > TOKEN_LIMIT:
while sum(len(message['content']) for message in dialog_history) > TOKEN_LIMIT:
dialog_history.pop(0)
return response
@app.post("/chat", response_model=ChatResponse)
async def chat(chat_input: ChatInput, background_tasks: BackgroundTasks):
try:
task = asyncio.create_task(process_chat(chat_input))
background_tasks.add_task(task)
response = await asyncio.wait_for(task, timeout=120.0) # Увеличен таймаут до 2 минут
return ChatResponse(response=response)
except asyncio.TimeoutError:
logger.error("Request timed out")
return ChatResponse(response="I'm sorry, but the request is taking longer than expected. Please try again later.")
except Exception as e:
logger.error(f"Error processing chat request: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000, timeout_keep_alive=120)