From 0d6db85dcd4dfd0eb15644ec4df31f6d539bfb4f Mon Sep 17 00:00:00 2001 From: Changyong Um Date: Mon, 10 Feb 2025 22:56:37 +0900 Subject: [PATCH] community: Add configurable text key for indexing and the retriever in Pinecone Hybrid Search (#29697) **issue** In Langchain, the original content is generally stored under the `text` key. However, the `PineconeHybridSearchRetriever` searches the `context` field in the metadata and cannot change this key. To address this, I have modified the code to allow changing the key to something other than context. In my opinion, following Langchain's conventions, the `text` key seems more appropriate than `context`. However, since I wasn't sure about the author's intent, I have left the default value as `context`. --- .../retrievers/pinecone_hybrid_search.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/libs/community/langchain_community/retrievers/pinecone_hybrid_search.py b/libs/community/langchain_community/retrievers/pinecone_hybrid_search.py index a6e0f68002dde..cd3e3e96d080a 100644 --- a/libs/community/langchain_community/retrievers/pinecone_hybrid_search.py +++ b/libs/community/langchain_community/retrievers/pinecone_hybrid_search.py @@ -31,6 +31,7 @@ def create_index( ids: Optional[List[str]] = None, metadatas: Optional[List[dict]] = None, namespace: Optional[str] = None, + text_key: str = "context", ) -> None: """Create an index from a list of contexts. @@ -69,7 +70,7 @@ def create_index( ) # add context passages as metadata meta = [ - {"context": context, **metadata} + {text_key: context, **metadata} for context, metadata in zip(context_batch, metadata_batch) ] @@ -114,7 +115,7 @@ class PineconeHybridSearchRetriever(BaseRetriever): """Alpha value for hybrid search.""" namespace: Optional[str] = None """Namespace value for index partition.""" - + text_key: str = "context" model_config = ConfigDict( arbitrary_types_allowed=True, extra="forbid", @@ -135,6 +136,7 @@ def add_texts( ids=ids, metadatas=metadatas, namespace=namespace, + text_key=self.text_key, ) @pre_init @@ -174,7 +176,7 @@ def _get_relevant_documents( ) final_result = [] for res in result["matches"]: - context = res["metadata"].pop("context") + context = res["metadata"].pop(self.text_key) metadata = res["metadata"] if "score" not in metadata and "score" in res: metadata["score"] = res["score"]