akshata29 · Feb 7, 2024
diff --git a/‎README.md
+1 b/‎README.md
+1
diff --git a/‎api/Python/Pib/__init__.py
+2-20 b/‎api/Python/Pib/__init__.py
+2-20
diff --git a/‎api/Python/PibChat/__init__.py
+27-24 b/‎api/Python/PibChat/__init__.py
+27-24
@@ -4,6 +4,7 @@ This sample demonstrates building a pitch book from public, private and paid dat
 
 ## Updates
 
+* 2/7/2024 - Add capability to suggest questions for Earning Calls & SEC Filings
 * 1/28/2024 - Additional Details on all Cognitive search Index used
   * pibec - Index to store the earning calls raw content
   * pibpr - Index to store the Press Releases raw content (PR Date, Title, Content)
 
@@ -113,7 +113,6 @@ def getProfileAndBio(pibIndexName, cik, step, symbol, temperature, llm, today):
     s1Data.append(sData)
     mergeDocs(SearchService, SearchKey, pibIndexName, step1Biography)
     return s1Data
-
 def processStep1(pibIndexName, cik, step, symbol, temperature, llm, today, reProcess):
     s1Data = []
 
@@ -234,7 +233,6 @@ def processStep1(pibIndexName, cik, step, symbol, temperature, llm, today, rePro
         s1Data = getProfileAndBio(pibIndexName, cik, step, symbol, temperature, llm, today)
 
     return s1Data
-
 def getEarningCalls(totalYears, historicalYear, symbol, today):
     # Call the paid data (FMP) API
     # Get the earning call transcripts for the last 3 years and merge documents into the index.
@@ -294,7 +292,6 @@ def getEarningCalls(totalYears, historicalYear, symbol, today):
         return earningsData[-1]
     except Exception as e:
         logging.error(f"Error occured while processing {symbol} : {e}")
-
 def getPressReleases(today, symbol):
     # For now we are calling API to get data, but otherwise we need to ensure the data is not persisted in our 
     # index repository before calling again, if it is persisted then we need to delete it first
@@ -323,7 +320,6 @@ def getPressReleases(today, symbol):
 
     mergeDocs(SearchService, SearchKey, pressReleaseIndexName, pressReleasesList)
     return pressReleasesList
-
 # Helper function to find the answer to a question
 def findAnswer(chainType, topK, symbol, quarter, year, question, indexName, embeddingModelType, llm):
     # Since we already index our document, we can perform the search on the query to retrieve "TopK" documents
@@ -397,7 +393,6 @@ def findAnswer(chainType, topK, symbol, quarter, year, question, indexName, embe
         outputAnswer = answer['output_text']
 
     return outputAnswer
-
 def summarizeTopic(llm, query, embeddingModelType, indexName, symbol):
 
     promptTemplate = """You are an AI assistant tasked with summarizing documents from 
@@ -432,8 +427,7 @@ def summarizeTopic(llm, query, embeddingModelType, indexName, symbol):
                                             map_prompt=customPrompt, combine_prompt=customPrompt)
         summary = summaryChain({"input_documents": resultsDoc}, return_only_outputs=True)
         outputAnswer = summary['output_text']
-        return outputAnswer 
-    
+        return outputAnswer   
 def processTopicSummary(llm, symbol, cik, step, pibSummaryIndex, embeddingModelType, selectedTopics,
                         earningVectorIndexName, docType):
     topicSummary = []
@@ -466,7 +460,6 @@ def processTopicSummary(llm, symbol, cik, step, pibSummaryIndex, embeddingModelT
                     })
     mergeDocs(SearchService, SearchKey, pibSummaryIndex, topicSummary)
     return topicSummary
-
 def processSecTopicSummary(llm, symbol, cik, step, pibSummaryIndex, embeddingModelType, selectedTopics,
                         earningVectorIndexName, docType, secFilingList):
     topicSummary = []
@@ -521,7 +514,6 @@ def processSecTopicSummary(llm, symbol, cik, step, pibSummaryIndex, embeddingMod
                     })
     mergeDocs(SearchService, SearchKey, pibSummaryIndex, topicSummary)
     return topicSummary
-
 def processStep2(pibIndexName, cik, step, symbol, llm, today, embeddingModelType, totalYears, 
                  historicalYear, reProcess, selectedTopics):
     r = findPibData(SearchService, SearchKey, pibIndexName, cik, step, returnFields=['id', 'symbol', 'cik', 'step', 'description', 'insertedDate',
@@ -750,7 +742,6 @@ def processStep2(pibIndexName, cik, step, symbol, llm, today, embeddingModelType
             content = df.iloc[0]['content']
 
     return s2Data, content, latestCallDate
-
 def summarizePressReleases(llm, docs):
     promptTemplate = """You are an AI assistant tasked with summarizing company's press releases and performing sentiments on those. 
                 Your summary should accurately capture the key information in the press-releases while avoiding the omission of any domain-specific words. 
@@ -767,7 +758,6 @@ def summarizePressReleases(llm, docs):
     summary = summaryChain({"input_documents": docs}, return_only_outputs=True)
     outputAnswer = summary['output_text']
     return outputAnswer
-
 def processStep3(symbol, cik, step, llm, pibIndexName, today, reProcess):
     # With the data indexed, let's summarize the information
     s3Data = []
@@ -837,7 +827,6 @@ def processStep3(symbol, cik, step, llm, pibIndexName, today, reProcess):
                     'pibData' : s['pibData']
                 })
     return s3Data
-
 def generateSummaries(llm, docs):
     # With the data indexed, let's summarize the information
     promptTemplate = """You are an AI assistant tasked with summarizing sections from the financial document like 10-K and 10-Q report. 
@@ -854,7 +843,6 @@ def generateSummaries(llm, docs):
     summaryChain = load_summarize_chain(llm, chain_type=chainType)
     summary = summaryChain({"input_documents": docs}, return_only_outputs=True)
     return summary
-
 def processStep4(symbol, cik, filingType, historicalYear, currentYear, embeddingModelType, llm, pibIndexName, 
                  step, today, reProcess, selectedTopics):
 
@@ -1101,7 +1089,6 @@ def processStep4(symbol, cik, filingType, historicalYear, currentYear, embedding
         mergeDocs(SearchService, SearchKey, pibIndexName, s4Data)
 
     return s4Data
-
 def processStep5(pibIndexName, cik, step, symbol, today, reProcess):
     s5Data = []
 
@@ -1260,7 +1247,6 @@ def processStep5(pibIndexName, cik, step, symbol, today, reProcess):
                     'pibData' : s['pibData']
                 })
     return s5Data
-
 def PibSteps(step, symbol, embeddingModelType, reProcess, overrides):
     logging.info("Calling PibSteps Open AI for symbol " + symbol)
 
@@ -1349,8 +1335,6 @@ def PibSteps(step, symbol, embeddingModelType, reProcess, overrides):
       return {"data_points": "", "answer": "Exception during finding answers - Error : " + str(e), "thoughts": "", "sources": "", "nextQuestions": "", "error":  str(e)}
 
     #return answer
-
-
 def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse:
     logging.info(f'{context.function_name} HTTP trigger function processed a request.')
     if hasattr(context, 'retry_context'):
@@ -1382,7 +1366,6 @@ def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse:
              "Invalid body",
              status_code=400
         )
-
 def ComposeResponse(step, symbol, embeddingModelType, reProcess, jsonData):
     values = json.loads(jsonData)['values']
 
@@ -1396,7 +1379,6 @@ def ComposeResponse(step, symbol, embeddingModelType, reProcess, jsonData):
         if outputRecord != None:
             results["values"].append(outputRecord)
     return json.dumps(results, ensure_ascii=False)
-
 def TransformValue(step, symbol, embeddingModelType, reProcess, record):
     logging.info("Calling Transform Value")
     try:
@@ -1444,4 +1426,4 @@ def TransformValue(step, symbol, embeddingModelType, reProcess, record):
             {
             "recordId": recordId,
             "errors": [ { "message": "Could not complete operation for record." }   ]
-            })
+            })
@@ -33,6 +33,13 @@
 CosmosKey = os.environ['CosmosKey']
 CosmosDatabase = os.environ['CosmosDatabase']
 CosmosContainer = os.environ['CosmosContainer']
+PibEarningsCallIndex = os.environ['PibEarningsCallIndex']
+PibPressReleaseIndex = os.environ['PibPressReleaseIndex']
+PibEarningsCallVectorIndex = os.environ['PibEarningsCallVectorIndex']
+PibSummariesIndex = os.environ['PibSummariesIndex']
+PibSecDataIndex = os.environ['PibSecDataIndex']
+PibSecDataVectorIndex = os.environ['PibSecDataVectorIndex']
+PibDataIndex = os.environ['PibDataIndex']
 
 
 def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse:
@@ -116,7 +123,7 @@ def insertMessage(sessionId, type, role, totalTokens, tokens, response, cosmosCo
     }
     cosmosContainer.create_item(body=aiMessage)
 
-def GetRrrAnswer(history, approach, overrides, symbol, indexName):
+def GetRrrAnswer(history, approach, overrides, symbol, pibChatType):
     embeddingModelType = overrides.get('embeddingModelType') or 'azureopenai'
     topK = overrides.get("top") or 5
     temperature = overrides.get("temperature") or 0.3
@@ -128,6 +135,15 @@ def GetRrrAnswer(history, approach, overrides, symbol, indexName):
     overrideChain = overrides.get("chainType") or 'stuff'
     searchType = overrides.get('searchType') or 'similarity'
 
+    if pibChatType == "earningCalls":
+        indexName = PibEarningsCallVectorIndex
+        filterData = "symbol eq '" + symbol + "'"
+        returnFields=['id', 'content', 'callDate']
+    elif pibChatType == "secFiling":
+        indexName = PibSecDataVectorIndex
+        filterData = "symbol eq '" + symbol + "' and filingType eq '" + "10-K" + "'"
+        returnFields=['id', 'content', 'latestFilingDate']
+
     logging.info("Search for Top " + str(topK))
     try:
         cosmosClient = CosmosClient(url=CosmosEndpoint, credential=CosmosKey)
@@ -442,29 +458,16 @@ def GetRrrAnswer(history, approach, overrides, symbol, indexName):
 
         logging.info("Final Prompt created")
 
-        if indexName == "latestsecfilings":
-            filterData = "symbol eq '" + symbol + "' and filingType eq '" + "10-K" + "'"
-            r = performLatestPibDataSearch(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, SearchService, SearchKey, embeddingModelType, 
-                               OpenAiEmbedding, filterData, q, indexName, topK, returnFields=['id', 'content', 'latestFilingDate'])
-            
-            if r == None:
-                docs = [Document(page_content="No results found")]
-            else :
-                docs = [
-                    Document(page_content=doc['content'], metadata={"id": doc['id'], "source": doc['latestFilingDate']})
-                    for doc in r
-                    ]
-        elif indexName == "latestearningcalls":
-            filterData = "symbol eq '" + symbol + "'"
-            r = performLatestPibDataSearch(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, SearchService, SearchKey, embeddingModelType, 
-                               OpenAiEmbedding, filterData, q, indexName, topK, returnFields=['id', 'content', 'callDate'])
-            if r == None:
-                docs = [Document(page_content="No results found")]
-            else :
-                docs = [
-                    Document(page_content=doc['content'], metadata={"id": doc['id'], "source":doc['callDate']})
-                    for doc in r
-                    ]
+        r = performLatestPibDataSearch(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, SearchService, SearchKey, embeddingModelType, 
+                            OpenAiEmbedding, filterData, q, indexName, topK, returnFields=returnFields)
+        
+        if r == None:
+            docs = [Document(page_content="No results found")]
+        else :
+            docs = [
+                Document(page_content=doc['content'], metadata={"id": doc['id'], "source": ''})
+                for doc in r
+                ]
 
         rawDocs = []
         for doc in docs: