Skip to content

Commit c383c29

Browse files
committedFeb 7, 2024
Question Suggestion features
1 parent 41ae1ee commit c383c29

File tree

12 files changed

+598
-53
lines changed

12 files changed

+598
-53
lines changed
 

‎README.md

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ This sample demonstrates building a pitch book from public, private and paid dat
44

55
## Updates
66

7+
* 2/7/2024 - Add capability to suggest questions for Earning Calls & SEC Filings
78
* 1/28/2024 - Additional Details on all Cognitive search Index used
89
* pibec - Index to store the earning calls raw content
910
* pibpr - Index to store the Press Releases raw content (PR Date, Title, Content)

‎api/Python/Pib/__init__.py

+2-20
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@ def getProfileAndBio(pibIndexName, cik, step, symbol, temperature, llm, today):
113113
s1Data.append(sData)
114114
mergeDocs(SearchService, SearchKey, pibIndexName, step1Biography)
115115
return s1Data
116-
117116
def processStep1(pibIndexName, cik, step, symbol, temperature, llm, today, reProcess):
118117
s1Data = []
119118

@@ -234,7 +233,6 @@ def processStep1(pibIndexName, cik, step, symbol, temperature, llm, today, rePro
234233
s1Data = getProfileAndBio(pibIndexName, cik, step, symbol, temperature, llm, today)
235234

236235
return s1Data
237-
238236
def getEarningCalls(totalYears, historicalYear, symbol, today):
239237
# Call the paid data (FMP) API
240238
# Get the earning call transcripts for the last 3 years and merge documents into the index.
@@ -294,7 +292,6 @@ def getEarningCalls(totalYears, historicalYear, symbol, today):
294292
return earningsData[-1]
295293
except Exception as e:
296294
logging.error(f"Error occured while processing {symbol} : {e}")
297-
298295
def getPressReleases(today, symbol):
299296
# For now we are calling API to get data, but otherwise we need to ensure the data is not persisted in our
300297
# index repository before calling again, if it is persisted then we need to delete it first
@@ -323,7 +320,6 @@ def getPressReleases(today, symbol):
323320

324321
mergeDocs(SearchService, SearchKey, pressReleaseIndexName, pressReleasesList)
325322
return pressReleasesList
326-
327323
# Helper function to find the answer to a question
328324
def findAnswer(chainType, topK, symbol, quarter, year, question, indexName, embeddingModelType, llm):
329325
# Since we already index our document, we can perform the search on the query to retrieve "TopK" documents
@@ -397,7 +393,6 @@ def findAnswer(chainType, topK, symbol, quarter, year, question, indexName, embe
397393
outputAnswer = answer['output_text']
398394

399395
return outputAnswer
400-
401396
def summarizeTopic(llm, query, embeddingModelType, indexName, symbol):
402397

403398
promptTemplate = """You are an AI assistant tasked with summarizing documents from
@@ -432,8 +427,7 @@ def summarizeTopic(llm, query, embeddingModelType, indexName, symbol):
432427
map_prompt=customPrompt, combine_prompt=customPrompt)
433428
summary = summaryChain({"input_documents": resultsDoc}, return_only_outputs=True)
434429
outputAnswer = summary['output_text']
435-
return outputAnswer
436-
430+
return outputAnswer
437431
def processTopicSummary(llm, symbol, cik, step, pibSummaryIndex, embeddingModelType, selectedTopics,
438432
earningVectorIndexName, docType):
439433
topicSummary = []
@@ -466,7 +460,6 @@ def processTopicSummary(llm, symbol, cik, step, pibSummaryIndex, embeddingModelT
466460
})
467461
mergeDocs(SearchService, SearchKey, pibSummaryIndex, topicSummary)
468462
return topicSummary
469-
470463
def processSecTopicSummary(llm, symbol, cik, step, pibSummaryIndex, embeddingModelType, selectedTopics,
471464
earningVectorIndexName, docType, secFilingList):
472465
topicSummary = []
@@ -521,7 +514,6 @@ def processSecTopicSummary(llm, symbol, cik, step, pibSummaryIndex, embeddingMod
521514
})
522515
mergeDocs(SearchService, SearchKey, pibSummaryIndex, topicSummary)
523516
return topicSummary
524-
525517
def processStep2(pibIndexName, cik, step, symbol, llm, today, embeddingModelType, totalYears,
526518
historicalYear, reProcess, selectedTopics):
527519
r = findPibData(SearchService, SearchKey, pibIndexName, cik, step, returnFields=['id', 'symbol', 'cik', 'step', 'description', 'insertedDate',
@@ -750,7 +742,6 @@ def processStep2(pibIndexName, cik, step, symbol, llm, today, embeddingModelType
750742
content = df.iloc[0]['content']
751743

752744
return s2Data, content, latestCallDate
753-
754745
def summarizePressReleases(llm, docs):
755746
promptTemplate = """You are an AI assistant tasked with summarizing company's press releases and performing sentiments on those.
756747
Your summary should accurately capture the key information in the press-releases while avoiding the omission of any domain-specific words.
@@ -767,7 +758,6 @@ def summarizePressReleases(llm, docs):
767758
summary = summaryChain({"input_documents": docs}, return_only_outputs=True)
768759
outputAnswer = summary['output_text']
769760
return outputAnswer
770-
771761
def processStep3(symbol, cik, step, llm, pibIndexName, today, reProcess):
772762
# With the data indexed, let's summarize the information
773763
s3Data = []
@@ -837,7 +827,6 @@ def processStep3(symbol, cik, step, llm, pibIndexName, today, reProcess):
837827
'pibData' : s['pibData']
838828
})
839829
return s3Data
840-
841830
def generateSummaries(llm, docs):
842831
# With the data indexed, let's summarize the information
843832
promptTemplate = """You are an AI assistant tasked with summarizing sections from the financial document like 10-K and 10-Q report.
@@ -854,7 +843,6 @@ def generateSummaries(llm, docs):
854843
summaryChain = load_summarize_chain(llm, chain_type=chainType)
855844
summary = summaryChain({"input_documents": docs}, return_only_outputs=True)
856845
return summary
857-
858846
def processStep4(symbol, cik, filingType, historicalYear, currentYear, embeddingModelType, llm, pibIndexName,
859847
step, today, reProcess, selectedTopics):
860848

@@ -1101,7 +1089,6 @@ def processStep4(symbol, cik, filingType, historicalYear, currentYear, embedding
11011089
mergeDocs(SearchService, SearchKey, pibIndexName, s4Data)
11021090

11031091
return s4Data
1104-
11051092
def processStep5(pibIndexName, cik, step, symbol, today, reProcess):
11061093
s5Data = []
11071094

@@ -1260,7 +1247,6 @@ def processStep5(pibIndexName, cik, step, symbol, today, reProcess):
12601247
'pibData' : s['pibData']
12611248
})
12621249
return s5Data
1263-
12641250
def PibSteps(step, symbol, embeddingModelType, reProcess, overrides):
12651251
logging.info("Calling PibSteps Open AI for symbol " + symbol)
12661252

@@ -1349,8 +1335,6 @@ def PibSteps(step, symbol, embeddingModelType, reProcess, overrides):
13491335
return {"data_points": "", "answer": "Exception during finding answers - Error : " + str(e), "thoughts": "", "sources": "", "nextQuestions": "", "error": str(e)}
13501336

13511337
#return answer
1352-
1353-
13541338
def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse:
13551339
logging.info(f'{context.function_name} HTTP trigger function processed a request.')
13561340
if hasattr(context, 'retry_context'):
@@ -1382,7 +1366,6 @@ def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse:
13821366
"Invalid body",
13831367
status_code=400
13841368
)
1385-
13861369
def ComposeResponse(step, symbol, embeddingModelType, reProcess, jsonData):
13871370
values = json.loads(jsonData)['values']
13881371

@@ -1396,7 +1379,6 @@ def ComposeResponse(step, symbol, embeddingModelType, reProcess, jsonData):
13961379
if outputRecord != None:
13971380
results["values"].append(outputRecord)
13981381
return json.dumps(results, ensure_ascii=False)
1399-
14001382
def TransformValue(step, symbol, embeddingModelType, reProcess, record):
14011383
logging.info("Calling Transform Value")
14021384
try:
@@ -1444,4 +1426,4 @@ def TransformValue(step, symbol, embeddingModelType, reProcess, record):
14441426
{
14451427
"recordId": recordId,
14461428
"errors": [ { "message": "Could not complete operation for record." } ]
1447-
})
1429+
})

‎api/Python/PibChat/__init__.py

+27-24
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@
3333
CosmosKey = os.environ['CosmosKey']
3434
CosmosDatabase = os.environ['CosmosDatabase']
3535
CosmosContainer = os.environ['CosmosContainer']
36+
PibEarningsCallIndex = os.environ['PibEarningsCallIndex']
37+
PibPressReleaseIndex = os.environ['PibPressReleaseIndex']
38+
PibEarningsCallVectorIndex = os.environ['PibEarningsCallVectorIndex']
39+
PibSummariesIndex = os.environ['PibSummariesIndex']
40+
PibSecDataIndex = os.environ['PibSecDataIndex']
41+
PibSecDataVectorIndex = os.environ['PibSecDataVectorIndex']
42+
PibDataIndex = os.environ['PibDataIndex']
3643

3744

3845
def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse:
@@ -116,7 +123,7 @@ def insertMessage(sessionId, type, role, totalTokens, tokens, response, cosmosCo
116123
}
117124
cosmosContainer.create_item(body=aiMessage)
118125

119-
def GetRrrAnswer(history, approach, overrides, symbol, indexName):
126+
def GetRrrAnswer(history, approach, overrides, symbol, pibChatType):
120127
embeddingModelType = overrides.get('embeddingModelType') or 'azureopenai'
121128
topK = overrides.get("top") or 5
122129
temperature = overrides.get("temperature") or 0.3
@@ -128,6 +135,15 @@ def GetRrrAnswer(history, approach, overrides, symbol, indexName):
128135
overrideChain = overrides.get("chainType") or 'stuff'
129136
searchType = overrides.get('searchType') or 'similarity'
130137

138+
if pibChatType == "earningCalls":
139+
indexName = PibEarningsCallVectorIndex
140+
filterData = "symbol eq '" + symbol + "'"
141+
returnFields=['id', 'content', 'callDate']
142+
elif pibChatType == "secFiling":
143+
indexName = PibSecDataVectorIndex
144+
filterData = "symbol eq '" + symbol + "' and filingType eq '" + "10-K" + "'"
145+
returnFields=['id', 'content', 'latestFilingDate']
146+
131147
logging.info("Search for Top " + str(topK))
132148
try:
133149
cosmosClient = CosmosClient(url=CosmosEndpoint, credential=CosmosKey)
@@ -442,29 +458,16 @@ def GetRrrAnswer(history, approach, overrides, symbol, indexName):
442458

443459
logging.info("Final Prompt created")
444460

445-
if indexName == "latestsecfilings":
446-
filterData = "symbol eq '" + symbol + "' and filingType eq '" + "10-K" + "'"
447-
r = performLatestPibDataSearch(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, SearchService, SearchKey, embeddingModelType,
448-
OpenAiEmbedding, filterData, q, indexName, topK, returnFields=['id', 'content', 'latestFilingDate'])
449-
450-
if r == None:
451-
docs = [Document(page_content="No results found")]
452-
else :
453-
docs = [
454-
Document(page_content=doc['content'], metadata={"id": doc['id'], "source": doc['latestFilingDate']})
455-
for doc in r
456-
]
457-
elif indexName == "latestearningcalls":
458-
filterData = "symbol eq '" + symbol + "'"
459-
r = performLatestPibDataSearch(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, SearchService, SearchKey, embeddingModelType,
460-
OpenAiEmbedding, filterData, q, indexName, topK, returnFields=['id', 'content', 'callDate'])
461-
if r == None:
462-
docs = [Document(page_content="No results found")]
463-
else :
464-
docs = [
465-
Document(page_content=doc['content'], metadata={"id": doc['id'], "source":doc['callDate']})
466-
for doc in r
467-
]
461+
r = performLatestPibDataSearch(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, SearchService, SearchKey, embeddingModelType,
462+
OpenAiEmbedding, filterData, q, indexName, topK, returnFields=returnFields)
463+
464+
if r == None:
465+
docs = [Document(page_content="No results found")]
466+
else :
467+
docs = [
468+
Document(page_content=doc['content'], metadata={"id": doc['id'], "source": ''})
469+
for doc in r
470+
]
468471

469472
rawDocs = []
470473
for doc in docs:

0 commit comments

Comments
 (0)