Skip to content

Chunk limit email check #1196

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Draft
wants to merge 9 commits into
base: dev
Choose a base branch
from
72 changes: 65 additions & 7 deletions backend/score.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from src.entities.user import user_info
from fastapi import FastAPI, File, UploadFile, Form, Request, HTTPException
from fastapi_health import health
from fastapi.middleware.cors import CORSMiddleware
Expand Down Expand Up @@ -218,7 +219,8 @@ async def extract_knowledge_graph_from_file(
access_token=Form(None),
retry_condition=Form(None),
additional_instructions=Form(None),
email=Form(None)
email=Form(None),
user_obj: user_info=Form(None)
):
"""
Calls 'extract_graph_from_file' in a new thread to create Neo4jGraph from a
Expand All @@ -241,22 +243,22 @@ async def extract_knowledge_graph_from_file(
if source_type == 'local file':
file_name = sanitize_filename(file_name)
merged_file_path = validate_file_path(MERGED_DIR, file_name)
uri_latency, result = await extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions)
uri_latency, result = await extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions, user_obj)

elif source_type == 's3 bucket' and source_url:
uri_latency, result = await extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions)
uri_latency, result = await extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions, user_obj)

elif source_type == 'web-url':
uri_latency, result = await extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions)
uri_latency, result = await extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions, user_obj)

elif source_type == 'youtube' and source_url:
uri_latency, result = await extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions)
uri_latency, result = await extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions, user_obj)

elif source_type == 'Wikipedia' and wiki_query:
uri_latency, result = await extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions)
uri_latency, result = await extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions, user_obj)

elif source_type == 'gcs bucket' and gcs_bucket_name:
uri_latency, result = await extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions)
uri_latency, result = await extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions, user_obj)
else:
return create_api_response('Failed',message='source_type is other than accepted source')
extract_api_time = time.time() - start_time
Expand Down Expand Up @@ -1096,5 +1098,61 @@ async def get_schema_visualization(uri=Form(None), userName=Form(None), password
finally:
gc.collect()

@app.post("/set_user_info")
async def set_user_info(uri=Form(None),
userName=Form(None),
password=Form(None),
database=Form(None),
email=Form(None)):
try:
start = time.time()
graph = create_graph_database_connection(uri, userName, password, database)
graphDb_data_Access = graphDBdataAccess(graph)
result = graphDb_data_Access.save_user_info(email, database)
end = time.time()
elapsed_time = end - start
return create_api_response('Success', data=result,message=f"Total elapsed API time {elapsed_time:.2f}")
except Exception as e:
message="Unable to save the detail of user in DB"
error_message = str(e)
logging.info(message)
logging.exception(f'Exception:{error_message}')
return create_api_response("Failed", message=message, error=error_message)
finally:
gc.collect()

@app.post("/get_user_info")
async def get_user_info(uri=Form(None),
userName=Form(None),
password=Form(None),
database=Form(None),
email=Form(None),
token_chunk_size:int=Form(None)):
try:
start = time.time()
graph = create_graph_database_connection(uri, userName, password, database)
graphDb_data_Access = graphDBdataAccess(graph)
# result = graphDb_data_Access.get_user_detail(email)
MAX_TOKEN_CHUNK_SIZE = int(os.getenv('MAX_TOKEN_CHUNK_SIZE', 10000))
chunk_to_be_created = int(MAX_TOKEN_CHUNK_SIZE / int(token_chunk_size))
userInfo = user_info()
userInfo.chunk_limits= chunk_to_be_created
userInfo.readonly= False
userInfo.rate_limit= 60000
userInfo.remaining_limit = 40000
userInfo.is_chunk_limit_applicable = True
end = time.time()
elapsed_time = end - start
logger.log_struct(userInfo, "INFO")
return create_api_response('Success', data=userInfo,message=f"Total elapsed API time {elapsed_time:.2f}")
except Exception as e:
message="Unable to get the details of user in DB"
error_message = str(e)
logging.info(message)
logging.exception(f'Exception:{error_message}')
return create_api_response("Failed", message=message, error=error_message)
finally:
gc.collect()

if __name__ == "__main__":
uvicorn.run(app)
5 changes: 3 additions & 2 deletions backend/src/create_chunks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from langchain_text_splitters import TokenTextSplitter
from langchain.docstore.document import Document
from src.entities.user import user_info
from langchain_neo4j import Neo4jGraph
import logging
from src.document_sources.youtube import get_chunks_with_timestamps, get_calculated_timestamps
Expand All @@ -14,7 +15,7 @@ def __init__(self, pages: list[Document], graph: Neo4jGraph):
self.pages = pages
self.graph = graph

def split_file_into_chunks(self,token_chunk_size, chunk_overlap):
def split_file_into_chunks(self,token_chunk_size, chunk_overlap, user_obj: user_info):
"""
Split a list of documents(file pages) into chunks of fixed size.

Expand All @@ -33,7 +34,7 @@ def split_file_into_chunks(self,token_chunk_size, chunk_overlap):
chunks = []
for i, document in enumerate(self.pages):
page_number = i + 1
if len(chunks) >= chunk_to_be_created:
if user_obj.is_chunk_limit_applicable and (len(chunks) >= user_obj.chunk_limits or user_info.remaining_limit <= 0) or len(chunks) >= chunk_to_be_created:
break
else:
for chunk in text_splitter.split_documents([document]):
Expand Down
9 changes: 9 additions & 0 deletions backend/src/entities/user.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from pydantic import BaseModel
from typing import Optional

class user_info(BaseModel):
chunk_limits:Optional[int] = 50
readonly:Optional[bool] = False
rate_limit:Optional[int] = 100000
remaining_limit:Optional[int] = 100000
is_chunk_limit_applicable:Optional[bool] = True
8 changes: 7 additions & 1 deletion backend/src/graphDB_dataAccess.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,4 +583,10 @@ def get_websource_url(self,file_name):
RETURN d.url AS url
"""
param = {"file_name" : file_name}
return self.execute_query(query, param)
return self.execute_query(query, param)

def save_user_info(self,email, database):
domain = "@neo4j.com"
is_neo4j_user = domain in email
write_access = self.check_account_access(database=database)
return {"is_neo4j_user": is_neo4j_user, "write_access": write_access}
Loading