-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscript.py
210 lines (153 loc) · 6.07 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
from langchain_text_splitters import CharacterTextSplitter
from pypdf import PdfReader
import re
from langchain_community.vectorstores import FAISS
import os
from langchain.chains.question_answering import load_qa_chain
from langchain_google_genai import GoogleGenerativeAIEmbeddings , ChatGoogleGenerativeAI
import google.generativeai as genai
from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
import shutil
from flask import Flask, jsonify, request
# import requests
app = Flask(__name__)
load_dotenv()
genai.configure(api_key = os.getenv("GOOGLE_API_KEY"))
model = ChatGoogleGenerativeAI(model = "gemini-pro", temperatue = 0.5 , max_retries=3)
current_dir = os.path.dirname(os.path.abspath(__file__))
UPLOAD_FOLDER = os.path.join(current_dir, 'upload')
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
text_storage = []
embeddings = []
global_filepath = ""
startup_completed = False
def normalize_text(s, sep_token=" \n "):
s = re.sub(r'\s+', ' ', s).strip()
s = re.sub(r'\.', sep_token, s)
return s
def allowedFile(filename):
split_tup = os.path.splitext(filename)
file_extension = split_tup[1]
if file_extension == '.pdf':
return True
return False
@app.route('/api/upload' , methods=['POST'])
def getPDF():
global global_filepath
file = request.files['file']
filename = file.filename
if allowedFile(filename) == False:
return jsonify({'error': "File not a pdf"}), 400
if file.filename == "":
return jsonify({'error': "No file selected"}), 400
if 'file' not in request.files:
return jsonify({'error': "No file part"}), 400
filepath = os.path.join(app.config['UPLOAD_FOLDER'] , filename)
file.save(filepath)
ProcessPDF(filepath)
global_filepath = filepath
return jsonify({'response': "File Uploaded Successfully"}), 200
def ProcessPDF(filepath):
doc_reader = PdfReader(filepath)
raw_text = ""
for i , page in enumerate(doc_reader.pages):
text = page.extract_text()
if text:
raw_text = raw_text + text
text_splitter = CharacterTextSplitter(
separator='\n',
chunk_size = 1000,
chunk_overlap = 200,
length_function = len
)
texts = text_splitter.split_text(raw_text)
texts = list(map(normalize_text , texts))
print(type(texts))
global text_storage
text_storage = texts
global embeddings
embeddings = createVectorStore()
def createVectorStore():
global text_storage
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
embedded_documents = embeddings.embed_documents(text_storage)
vector_store = FAISS.from_texts(text_storage,embeddings)
vector_store.save_local("faiss_index")
if not os.path.exists("faiss_index"):
return jsonify({'error': "Vector store did not get created"}), 400
return embeddings
def load_faiss_index_safely(index_path, embeddings):
try:
new_db = FAISS.load_local(index_path, embeddings , allow_dangerous_deserialization=True)
return new_db
except Exception as e:
print(f"An error occurred during deserialization: {e}")
raise
def get_conversation_chain():
prompt_template = """
Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
provided context just say, "answer is not available in the context", don't provide the wrong answern\n
Context:\n {context}?\n
Question: \n{question}\n
Answer:
"""
global model
prompt = PromptTemplate(template = prompt_template, input_variables= ["context","question"])
chain = load_qa_chain(model,chain_type="stuff",prompt = prompt)
return chain
@app.route('/api/prompt' , methods = ['POST'])
def prompt():
global embeddings
data = request.get_json()
prompt = data.get('prompt')
if not prompt:
return jsonify({'error': 'No prompt provided'}), 400
if embeddings == []:
return jsonify({'error': 'Upload PDF please'}), 400
new_db = load_faiss_index_safely("faiss_index", embeddings)
if new_db == None:
return jsonify({'error': "Deserialization issue"}), 400
docs = new_db.similarity_search(prompt,k=5,fetch_k=20,distance_metric="cosine",threshold=0.7)
chain = get_conversation_chain()
response = chain.invoke({"input_documents":docs, "question": prompt})
if response['output_text'] == None or response['output_text'] == "":
return jsonify({'error': "Response is empty"}), 400
return jsonify({'response': response['output_text']}), 200
@app.route('/api/exit' , methods = ['GET'])
def exit_application():
global global_filepath
shutil.rmtree("faiss_index")
os.remove(global_filepath)
return jsonify({'response': "Exited successfully"}), 200
@app.route('/api/prompt/topic' , methods=['GET'])
def TellTopic():
TOPIC = '''This is a chatbot which is used to chat with Budget of India related questions.Please provide prompt in detail for better responses.
Start Prompt with Tell me about... or What are.... or Explain... for better results.'''
return jsonify({'response': TOPIC}), 200
# @app.before_first_request
# def startup():
# print("Enter")
# try:
# api_requests = [
# {
# "url": "http://localhost:5000/api/upload",
# "file_param": "file",
# },
# ]
# for request in api_requests:
# file = open(os.path.join(app.config['UPLOAD_FOLDER'] , "BudgetGPT_PDF.pdf") , 'rb')
# print("file opened")
# files = {request['file_param']:file}
# print("Loading....")
# response = requests.post(request['url'] , files = files)
# if response.status_code == 200:
# print("File uploaded successfully")
# else:
# print("File upload failed")
# except Exception as e:
# print('Error calling API : ' + str(e))
# startup()
if __name__ == '__main__':
app.run(debug=True)