Skip to content

Commit 2b5b697

Browse files
authored
Merge pull request #231 from poissoncorp/v7.0
RDBC-889 7.0 Python client with Vector API
2 parents f9b2c95 + 736839d commit 2b5b697

File tree

32 files changed

+872
-37
lines changed

32 files changed

+872
-37
lines changed

.github/workflows/RavenClient.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ name: tests/python
22

33
on:
44
push:
5-
branches: [v5.2]
5+
branches: [v7.0]
66
pull_request:
7-
branches: [v5.2]
7+
branches: [v7.0]
88
schedule:
99
- cron: '0 10 * * *'
1010
workflow_dispatch:
@@ -29,8 +29,8 @@ jobs:
2929

3030
strategy:
3131
matrix:
32-
python-version: [ '3.8', '3.9', '3.10' ,'3.11', '3.12']
33-
serverVersion: [ '5.4', '6.2', '7.0' ]
32+
python-version: [ '3.9', '3.10' ,'3.11', '3.12']
33+
serverVersion: [ '7.0' ]
3434
fail-fast: false
3535

3636
steps:

README.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Install from [PyPi](https://pypi.python.org/pypi), as [ravendb](https://pypi.org
77
pip install ravendb
88
````
99
## Introduction and changelog
10-
Python client API (v5.2) for [RavenDB](https://ravendb.net/) , a NoSQL document database.
10+
Python client API (v7.0) for [RavenDB](https://ravendb.net/) , a NoSQL document database.
1111

1212
Although new API isn't compatible with the previous one, it comes with **many improvements and new features**.
1313
@@ -17,8 +17,6 @@ Although new API isn't compatible with the previous one, it comes with **many im
1717
1818
## Releases
1919
20-
* All client versions 5.2.x are fully compatible with and support RavenDB server releases 5.4 and 6.0.
21-
2220
* [Click here](https://github.com/ravendb/ravendb-python-client/releases) to view all Releases and Changelog.
2321
2422
---

README_pypi.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ pip install ravendb
88
```
99

1010
## Introduction
11-
Python client API (v5.2) for [RavenDB](https://ravendb.net/) , a NoSQL document database.
11+
Python client API (v7.0) for [RavenDB](https://ravendb.net/) , a NoSQL document database.
1212

1313
Although new API isn't compatible with the previous one, it comes with **many improvements and new features**.
1414

@@ -19,8 +19,6 @@ Although new API isn't compatible with the previous one, it comes with **many im
1919

2020
## Releases
2121

22-
* All client versions 5.2.x are fully compatible with and support RavenDB server releases 5.4 and 6.0.
23-
2422
* [Click here](https://github.com/ravendb/ravendb-python-client/releases) to view all Releases and Changelog.
2523

2624
## What's new?

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
[tool.black]
22
line-length = 120
3-
target-version = ['py37']
3+
target-version = ['py39']
44
include = '\.pyi?$'

ravendb/documents/indexes/abstract_index_creation_tasks.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from abc import abstractmethod, ABC
2-
from typing import Generic, TypeVar, Union, Dict, Set, Callable, Optional, List, TYPE_CHECKING
2+
from typing import Generic, TypeVar, Union, Dict, Set, Callable, Optional, List, TYPE_CHECKING, Any
33

44
from ravendb.documents.conventions import DocumentConventions
55
from ravendb.documents.indexes.definitions import (
@@ -16,8 +16,10 @@
1616
SpatialOptions,
1717
IndexFieldOptions,
1818
IndexType,
19+
SearchEngineType,
1920
)
2021
from ravendb.documents.indexes.spatial.configuration import SpatialOptionsFactory
22+
from ravendb.documents.indexes.vector.options import VectorOptions
2123
from ravendb.documents.operations.indexes import PutIndexesOperation
2224
from ravendb.documents.store.definition import DocumentStoreBase
2325
from ravendb.primitives import constants
@@ -40,13 +42,15 @@ def __init__(
4042
lock_mode: IndexLockMode = None,
4143
deployment_mode: IndexDeploymentMode = None,
4244
state: IndexState = None,
45+
search_engine_type: SearchEngineType = None,
4346
):
4447
super().__init__()
4548
self.conventions = conventions
4649
self.priority = priority
4750
self.lock_mode = lock_mode
4851
self.deployment_mode = deployment_mode
4952
self.state = state
53+
self.search_engine_type = search_engine_type
5054

5155
def execute(self, store: "DocumentStore", conventions: DocumentConventions = None, database: str = None):
5256
old_conventions = self.conventions
@@ -68,6 +72,11 @@ def execute(self, store: "DocumentStore", conventions: DocumentConventions = Non
6872
if self.deployment_mode is not None:
6973
index_definition.deployment_mode = self.deployment_mode
7074

75+
if self.search_engine_type is not None:
76+
if not index_definition.configuration:
77+
index_definition.configuration = {}
78+
index_definition.configuration["Indexing.Static.SearchEngineType"] = self.search_engine_type.__str__()
79+
7180
store.maintenance.for_database(database).send(PutIndexesOperation(index_definition))
7281

7382
finally:
@@ -88,6 +97,7 @@ def __init__(self):
8897
self._index_suggestions: Set[str] = set()
8998
self._term_vectors_strings: Dict[str, FieldTermVector] = {}
9099
self._spatial_options_strings: Dict[str, SpatialOptions] = {}
100+
self._vector_indexes_strings: Dict[str, VectorOptions] = {}
91101

92102
self._output_reduce_to_collection: Union[None, str] = None
93103
self._pattern_for_output_reduce_to_collection_references: Union[None, str] = None
@@ -150,6 +160,9 @@ def _add_assembly(self, assembly: AdditionalAssembly) -> None:
150160

151161
self.additional_assemblies.add(assembly)
152162

163+
def _vector(self, field: str, vector_options: VectorOptions) -> None:
164+
self._vector_indexes_strings[field] = vector_options
165+
153166

154167
class AbstractIndexDefinitionBuilder(Generic[_T_IndexDefinition]):
155168
def __init__(self, index_name: str):
@@ -165,6 +178,7 @@ def __init__(self, index_name: str):
165178
self.suggestions_options: Set[str] = set()
166179
self.term_vectors_strings: Dict[str, FieldTermVector] = {}
167180
self.spatial_indexes_strings: Dict[str, SpatialOptions] = {}
181+
self.vector_indexes_strings: Dict[str, VectorOptions] = {}
168182

169183
self.lock_mode: Optional[IndexLockMode] = None
170184
self.priority: Optional[IndexLockMode] = None
@@ -191,7 +205,7 @@ def __apply_values(
191205
self,
192206
index_definition: IndexDefinition,
193207
values: Dict[str, object],
194-
action: Callable[[IndexFieldOptions, object], None],
208+
action: Callable[[IndexFieldOptions, Any], None],
195209
) -> None:
196210
for key, value in values.items():
197211
field = index_definition.fields.get(key, IndexFieldOptions())
@@ -216,29 +230,33 @@ def to_index_definition(self, conventions: DocumentConventions, validate_map: bo
216230
for suggestions_option in self.suggestions_options:
217231
suggestions[suggestions_option] = True
218232

219-
def __set_indexing(options, value):
233+
def __set_indexing(options: IndexFieldOptions, value: FieldIndexing):
220234
options.indexing = value
221235

222-
def __set_storage(options, value):
236+
def __set_storage(options: IndexFieldOptions, value: FieldStorage):
223237
options.storage = value
224238

225-
def __set_analyzer(options, value):
239+
def __set_analyzer(options: IndexFieldOptions, value: str):
226240
options.analyzer = value
227241

228-
def __set_term_vector(options, value):
242+
def __set_term_vector(options: IndexFieldOptions, value: FieldTermVector):
229243
options.term_vector = value
230244

231-
def __set_spatial(options, value):
245+
def __set_spatial(options: IndexFieldOptions, value: SpatialOptions):
232246
options.spatial = value
233247

234-
def __set_suggestions(options, value):
248+
def __set_vector(options: IndexFieldOptions, value: VectorOptions):
249+
options.vector = value
250+
251+
def __set_suggestions(options: IndexFieldOptions, value: bool):
235252
options.suggestions = value
236253

237254
self.__apply_values(index_definition, self.indexes_strings, __set_indexing)
238255
self.__apply_values(index_definition, self.stores_strings, __set_storage)
239256
self.__apply_values(index_definition, self.analyzers_strings, __set_analyzer)
240257
self.__apply_values(index_definition, self.term_vectors_strings, __set_term_vector)
241258
self.__apply_values(index_definition, self.spatial_indexes_strings, __set_spatial)
259+
self.__apply_values(index_definition, self.vector_indexes_strings, __set_vector)
242260
self.__apply_values(index_definition, suggestions, __set_suggestions)
243261

244262
index_definition.additional_sources = self.additional_sources
@@ -302,6 +320,7 @@ def create_index_definition(self) -> IndexDefinition:
302320
index_definition_builder.suggestions_options = self._index_suggestions
303321
index_definition_builder.term_vectors_strings = self._term_vectors_strings
304322
index_definition_builder.spatial_indexes_strings = self._spatial_options_strings
323+
index_definition_builder.vector_indexes_strings = self._vector_indexes_strings
305324
index_definition_builder.output_reduce_to_collection = self._output_reduce_to_collection
306325
index_definition_builder.pattern_for_output_reduce_to_collection_references = (
307326
self._pattern_for_output_reduce_to_collection_references

ravendb/documents/indexes/definitions.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from abc import ABC
77
from typing import Union, Optional, List, Dict, Set, Iterable
88
from ravendb.documents.indexes.spatial.configuration import SpatialOptions, AutoSpatialOptions
9+
from ravendb.documents.indexes.vector.options import VectorOptions, AutoVectorOptions
910
from ravendb.tools.utils import Utils
1011

1112

@@ -60,6 +61,14 @@ def __str__(self):
6061
return self.value
6162

6263

64+
class SearchEngineType(Enum):
65+
LUCENE = "Lucene"
66+
CORAX = "Corax"
67+
68+
def __str__(self):
69+
return self.value
70+
71+
6372
class FieldStorage(Enum):
6473
YES = "Yes"
6574
NO = "No"
@@ -144,13 +153,15 @@ def __init__(
144153
indexing: Optional[FieldIndexing] = None,
145154
term_vector: Optional[FieldTermVector] = None,
146155
spatial: Optional[SpatialOptions] = None,
156+
vector: Optional[VectorOptions] = None,
147157
analyzer: Optional[str] = None,
148158
suggestions: Optional[bool] = None,
149159
):
150160
self.storage = storage
151161
self.indexing = indexing
152162
self.term_vector = term_vector
153163
self.spatial = spatial
164+
self.vector = vector
154165
self.analyzer = analyzer
155166
self.suggestions = suggestions
156167

@@ -160,6 +171,7 @@ def to_json(self):
160171
"Indexing": self.indexing,
161172
"TermVector": self.term_vector,
162173
"Spatial": self.spatial.to_json() if self.spatial else None,
174+
"Vector": self.vector.to_json() if self.vector else None,
163175
"Analyzer": self.analyzer,
164176
"Suggestions": self.suggestions,
165177
}
@@ -192,6 +204,7 @@ def __init__(
192204
pattern_for_output_reduce_to_collection_references: Optional[str] = None,
193205
pattern_references_collection_name: Optional[str] = None,
194206
deployment_mode: Optional[IndexDeploymentMode] = None,
207+
search_engine_type: Optional[SearchEngineType] = None,
195208
):
196209
super(IndexDefinition, self).__init__(name, priority, state)
197210
self.lock_mode = lock_mode
@@ -208,6 +221,7 @@ def __init__(
208221
self.pattern_for_output_reduce_to_collection_references = pattern_for_output_reduce_to_collection_references
209222
self.pattern_references_collection_name = pattern_references_collection_name
210223
self.deployment_mode = deployment_mode
224+
self.search_engine_type = search_engine_type
211225

212226
@classmethod
213227
def from_json(cls, json_dict: dict) -> IndexDefinition:
@@ -230,6 +244,8 @@ def from_json(cls, json_dict: dict) -> IndexDefinition:
230244
index_type = json_dict.get("IndexType", None)
231245
if index_type is not None:
232246
result.__index_type = IndexType(index_type)
247+
if json_dict["Configuration"] and "Indexing.Static.SearchEngineType" in json_dict["Configuration"]:
248+
result.search_engine_type = SearchEngineType(json_dict["Configuration"]["Indexing.Static.SearchEngineType"])
233249
result.output_reduce_to_collection = json_dict["OutputReduceToCollection"]
234250
result.reduce_output_index = json_dict["ReduceOutputIndex"]
235251
result.pattern_for_output_reduce_to_collection_references = json_dict[
@@ -370,6 +386,7 @@ def __init__(
370386
indexing: Optional[AutoFieldIndexing] = None,
371387
aggregation: Optional[AggregationOperation] = None,
372388
spatial: Optional[AutoSpatialOptions] = None,
389+
vector: Optional[AutoVectorOptions] = None,
373390
group_by_array_behavior: Optional[GroupByArrayBehavior] = None,
374391
suggestions: Optional[bool] = None,
375392
is_name_quoted: Optional[bool] = None,
@@ -378,6 +395,7 @@ def __init__(
378395
self.indexing = indexing
379396
self.aggregation = aggregation
380397
self.spatial = spatial
398+
self.vector = vector
381399
self.group_by_array_behavior = group_by_array_behavior
382400
self.suggestions = suggestions
383401
self.is_name_quoted = is_name_quoted
@@ -389,6 +407,7 @@ def from_json(cls, json_dict: Dict) -> AutoIndexFieldOptions:
389407
AutoFieldIndexing(json_dict.get("Indexing")),
390408
AggregationOperation(json_dict.get("Aggregation")) if json_dict.get("Aggregation", None) else None,
391409
AutoSpatialOptions.from_json(json_dict.get("Spatial")) if json_dict.get("Spatial", None) else None,
410+
AutoVectorOptions.from_json(json_dict.get("Vector")) if json_dict.get("Vector", None) else None,
392411
GroupByArrayBehavior(json_dict.get("GroupByArrayBehavior")),
393412
json_dict.get("Suggestions"),
394413
json_dict.get("IsNameQuoted"),
@@ -400,6 +419,9 @@ def to_json(self) -> Dict:
400419
"Indexing": self.indexing.value,
401420
"Aggregation": self.aggregation.value if self.aggregation is not None else None,
402421
"Spatial": self.spatial.type if self.spatial is not None else None,
422+
"Vector": (
423+
self.vector.to_json() if self.vector is not None else None
424+
), # todo; check if vector.to_json() is valid here
403425
"GroupByArrayBehavior": self.group_by_array_behavior.value,
404426
"Suggestions": self.suggestions,
405427
"IsNameQuoted": self.is_name_quoted,

ravendb/documents/indexes/vector/__init__.py

Whitespace-only changes.
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from enum import Enum
2+
3+
4+
class VectorEmbeddingType(Enum):
5+
SINGLE = "Single" # float
6+
INT8 = "Int8" # quantized int
7+
BINARY = "Binary" # 1/0 quantized int
8+
TEXT = "Text" # str
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
from __future__ import annotations
2+
3+
from typing import Dict, Any
4+
5+
from ravendb.documents.indexes.vector.embedding import VectorEmbeddingType
6+
from ravendb.primitives import constants
7+
8+
9+
class VectorOptions:
10+
def __init__(
11+
self,
12+
source_embedding_type: VectorEmbeddingType = constants.VectorSearch.DEFAULT_EMBEDDING_TYPE,
13+
destination_embedding_type: VectorEmbeddingType = constants.VectorSearch.DEFAULT_EMBEDDING_TYPE,
14+
dimensions: int = None,
15+
number_of_edges: int = None,
16+
number_of_candidates_for_indexing: int = None,
17+
):
18+
self.dimensions = dimensions
19+
self.source_embedding_type = source_embedding_type
20+
self.destination_embedding_type = destination_embedding_type
21+
self.numbers_of_candidates_for_indexing = number_of_candidates_for_indexing
22+
self.number_of_edges = number_of_edges
23+
24+
@classmethod
25+
def from_json(cls, json_dict: Dict[str, Any]) -> VectorOptions:
26+
return cls(
27+
json_dict["SourceEmbeddingType"],
28+
json_dict["DestinationEmbeddingType"],
29+
json_dict["Dimensions"],
30+
json_dict["NumberOfEdges"],
31+
json_dict["NumberOfCandidatesForIndexing"],
32+
)
33+
34+
def to_json(self) -> Dict[str, Any]:
35+
return {
36+
"SourceEmbeddingType": self.source_embedding_type.value,
37+
"DestinationEmbeddingType": self.destination_embedding_type.value,
38+
"Dimensions": self.dimensions,
39+
"NumberOfCandidatesForIndexing": self.numbers_of_candidates_for_indexing,
40+
"NumberOfEdges": self.number_of_edges,
41+
}
42+
43+
44+
class AutoVectorOptions(VectorOptions):
45+
def __init__(
46+
self,
47+
source_embedding_type: VectorEmbeddingType = constants.VectorSearch.DEFAULT_EMBEDDING_TYPE,
48+
destination_embedding_type: VectorEmbeddingType = constants.VectorSearch.DEFAULT_EMBEDDING_TYPE,
49+
dimensions: int = None,
50+
number_of_edges: int = None,
51+
number_of_candidates_for_indexing: int = None,
52+
source_field_name: str = None,
53+
):
54+
super().__init__(
55+
source_embedding_type,
56+
destination_embedding_type,
57+
dimensions,
58+
number_of_edges,
59+
number_of_candidates_for_indexing,
60+
)
61+
self.source_field_name = source_field_name
62+
63+
@classmethod
64+
def from_vector_options(cls, vector_options: VectorOptions):
65+
return cls(
66+
source_embedding_type=vector_options.source_embedding_type,
67+
destination_embedding_type=vector_options.destination_embedding_type,
68+
dimensions=vector_options.dimensions,
69+
number_of_edges=vector_options.number_of_edges,
70+
number_of_candidates_for_indexing=vector_options.numbers_of_candidates_for_indexing,
71+
)
72+
73+
@classmethod
74+
def from_json(cls, json_dict: Dict[str, Any]):
75+
vec_options = super().from_json(json_dict)
76+
auto_vect_options = cls.from_vector_options(vec_options)
77+
auto_vect_options.source_field_name = json_dict["SourceFieldName"]
78+
return auto_vect_options
79+
80+
def to_json(self) -> Dict[str, Any]:
81+
json_dict = super().to_json()
82+
json_dict["SourceFieldName"] = self.source_field_name
83+
return json_dict

0 commit comments

Comments
 (0)