Skip to content

Commit

Permalink
Fix Chroma DB if multiple docs are inserted
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkbrnd committed Feb 16, 2025
1 parent 4d08521 commit 0cbdcee
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 9 deletions.
22 changes: 13 additions & 9 deletions libs/agno/agno/vectordb/chroma/chromadb.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,19 @@ def doc_exists(self, document: Document) -> bool:
Returns:
bool: True if document exists, False otherwise.
"""
if self.client:
try:
collection: Collection = self.client.get_collection(name=self.collection_name)
collection_data: GetResult = collection.get(include=[IncludeEnum.documents])
if collection_data.get("documents") != []:
return True
except Exception as e:
logger.error(f"Document does not exist: {e}")
return False
if not self.client:
logger.warning("Client not initialized")
return False

try:
collection: Collection = self.client.get_collection(name=self.collection_name)
collection_data: GetResult = collection.get(include=[IncludeEnum.documents])
existing_documents = collection_data.get("documents", [])
cleaned_content = document.content.replace("\x00", "\ufffd")
if cleaned_content in existing_documents:
return True
except Exception as e:
logger.error(f"Document does not exist: {e}")

def name_exists(self, name: str) -> bool:
"""Check if a document with a given name exists in the collection.
Expand Down
18 changes: 18 additions & 0 deletions libs/agno/tests/unit/vectordb/test_chromadb.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,21 @@ def test_custom_embedder(mock_embedder):
finally:
if os.path.exists(TEST_PATH):
shutil.rmtree(TEST_PATH)


def test_multiple_document_operations(chroma_db, sample_documents):
"""Test multiple document operations including batch inserts"""
# Test batch insert
first_batch = sample_documents[:2]
chroma_db.insert(first_batch)
assert chroma_db.get_count() == 2

# Test adding another document
second_batch = [sample_documents[2]]
chroma_db.insert(second_batch)
assert chroma_db.get_count() == 3

# Verify all documents are searchable
curry_results = chroma_db.search("curry", limit=1)
assert len(curry_results) == 1
assert "curry" in curry_results[0].content.lower()

0 comments on commit 0cbdcee

Please # to comment.