-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinit_db.py
62 lines (53 loc) · 1.89 KB
/
init_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from pymongo.operations import SearchIndexModel
from config import get_mongo_collection
from generate_listings_and_embeddings import generate_mock_properties, generate_embeddings
import json
def init_vector_search():
"""
Initializes vector search index in MongoDB Atlas
"""
collection = get_mongo_collection("properties")
# Creates collection if it doesn't exist by inserting a dummy document
try:
collection.insert_one({"_id": "dummy"})
collection.delete_one({"_id": "dummy"})
except Exception as e:
print(f"Warning when creating collection: {e}")
# Defines vector index model
search_index_model = SearchIndexModel(
definition = {
"fields": [
{
"type": "vector",
"path": "embedding",
"similarity": "dotProduct",
"numDimensions": 1536,
}
]
},
name="vector_index",
type="vectorSearch"
)
try:
collection.create_search_index(model = search_index_model)
print("Search index created successfully!")
except Exception as e:
print(f"Error creating search index: {e}")
def init_database():
"""
Initializes database with mock data and vector search index
"""
# Generate mock properties
mock_data = generate_mock_properties(100)
# Save mock data to file
with open('mock_data.json', 'w', encoding='utf-8') as f:
json.dump(mock_data, f, ensure_ascii=False, indent=2)
print("Mock data generated and saved to mock_data.json")
# Initialize vector search index
init_vector_search()
print("Vector search index initialized")
# Generate embeddings and save to MongoDB
generate_embeddings(mock_data)
print("Embeddings generated and saved to MongoDB")
if __name__ == "__main__":
init_database()