From e0923adc1c5d0305dfe3773e41aad79dd51482a8 Mon Sep 17 00:00:00 2001 From: Yue Fei Date: Thu, 20 Jun 2024 15:21:10 +0800 Subject: [PATCH] Add fast bm25 (#66) * Add fast bm25 * Fix bm25 bug * Fix bug * Fix test --- src/pai_rag/data/rag_dataloader.py | 1 - src/pai_rag/modules/index/pai_bm25_index.py | 1 - tests/core/test_rag_application.py | 7 ++----- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/pai_rag/data/rag_dataloader.py b/src/pai_rag/data/rag_dataloader.py index 92d901ce..ee5775a0 100644 --- a/src/pai_rag/data/rag_dataloader.py +++ b/src/pai_rag/data/rag_dataloader.py @@ -135,4 +135,3 @@ async def aload(self, file_directory: str, enable_qa_extraction: bool): def load(self, file_directory: str, enable_qa_extraction: bool): loop = asyncio.get_event_loop() loop.run_until_complete(self.aload(file_directory, enable_qa_extraction)) - return diff --git a/src/pai_rag/modules/index/pai_bm25_index.py b/src/pai_rag/modules/index/pai_bm25_index.py index d4ee9df4..8a4e9b8e 100644 --- a/src/pai_rag/modules/index/pai_bm25_index.py +++ b/src/pai_rag/modules/index/pai_bm25_index.py @@ -293,7 +293,6 @@ def query(self, query_str: str, top_n: int = 5) -> List[NodeWithScore]: doc_scores = self.index_matrix.multiply(query_vec).sum(axis=1).getA1() doc_indexes = doc_scores.argsort()[::-1][:top_n] - text_nodes = self.load_docs_with_index(doc_indexes) for i, node in enumerate(text_nodes): results.append(NodeWithScore(node=node, score=doc_scores[doc_indexes[i]])) diff --git a/tests/core/test_rag_application.py b/tests/core/test_rag_application.py index 83006385..b0fb0b5d 100644 --- a/tests/core/test_rag_application.py +++ b/tests/core/test_rag_application.py @@ -24,14 +24,11 @@ def rag_app(): rag_app = RagApplication() rag_app.initialize(config) - return rag_app - - -# Test load knowledge file -def test_add_knowledge_file(rag_app: RagApplication): data_dir = os.path.join(BASE_DIR, "tests/testdata/paul_graham") rag_app.load_knowledge(data_dir) + return rag_app + # Test rag query async def test_query(rag_app: RagApplication):