From b58b1ac256d8fa572498ce34d26ecb16cef64169 Mon Sep 17 00:00:00 2001 From: vga91 Date: Tue, 2 Apr 2024 17:20:26 +0200 Subject: [PATCH] wip --- build.gradle | 2 +- .../main/java/apoc/ml/vectordb/Chroma.java | 110 +++++++++ .../main/java/apoc/ml/vectordb/Pinecone.java | 212 ++++++++++++++++++ 3 files changed, 323 insertions(+), 1 deletion(-) create mode 100644 extended/src/main/java/apoc/ml/vectordb/Chroma.java diff --git a/build.gradle b/build.gradle index c47c09a0c9..2d93febbd2 100644 --- a/build.gradle +++ b/build.gradle @@ -131,7 +131,7 @@ subprojects { ext { // NB: due to version.json generation by parsing this file, the next line must not have any if/then/else logic - neo4jVersion = "5.17.0" + neo4jVersion = "5.18.1" // instead we apply the override logic here neo4jVersionEffective = project.hasProperty("neo4jVersionOverride") ? project.getProperty("neo4jVersionOverride") : neo4jVersion testContainersVersion = '1.18.3' diff --git a/extended/src/main/java/apoc/ml/vectordb/Chroma.java b/extended/src/main/java/apoc/ml/vectordb/Chroma.java new file mode 100644 index 0000000000..1b5a0306f8 --- /dev/null +++ b/extended/src/main/java/apoc/ml/vectordb/Chroma.java @@ -0,0 +1,110 @@ +package apoc.ml.vectordb; + +public class Chroma { + /* + /api/v1 +Root + + +POST +/api/v1/reset +Reset + + +GET +/api/v1/version +Version + + +GET +/api/v1/heartbeat +Heartbeat + + +GET +/api/v1/pre-flight-checks +Pre Flight Checks + + +POST +/api/v1/databases +Create Database + + +GET +/api/v1/databases/{database} +Get Database + + +POST +/api/v1/tenants +Create Tenant + + +GET +/api/v1/tenants/{tenant} +Get Tenant + + +GET +/api/v1/collections +List Collections + + +POST +/api/v1/collections +Create Collection + + +GET +/api/v1/count_collections +Count Collections + + +POST +/api/v1/collections/{collection_id}/add +Add + + +POST +/api/v1/collections/{collection_id}/update +Update + + +POST +/api/v1/collections/{collection_id}/upsert +Upsert + + +POST +/api/v1/collections/{collection_id}/get +Get + + +POST +/api/v1/collections/{collection_id}/delete +Delete + + +GET +/api/v1/collections/{collection_id}/count +Count + + +POST +/api/v1/collections/{collection_id}/query +Get Nearest Neighbors + + +GET +/api/v1/collections/{collection_name} +Get Collection + + +DELETE +/api/v1/collections/{collection_name} +Delete Collection + +Parameters + */ +} diff --git a/extended/src/main/java/apoc/ml/vectordb/Pinecone.java b/extended/src/main/java/apoc/ml/vectordb/Pinecone.java index e1607d1173..49b0e35a9e 100644 --- a/extended/src/main/java/apoc/ml/vectordb/Pinecone.java +++ b/extended/src/main/java/apoc/ml/vectordb/Pinecone.java @@ -1,6 +1,218 @@ package apoc.ml.vectordb; +import org.neo4j.graphdb.security.URLAccessChecker; +import org.neo4j.procedure.Context; + + +/* +TODO: +TODO - Quadrad. VECTOR DATABASE.. —> + —> read data + :score and metadata, + Context —> yield —> lookup —> transaction.findNodes —> .. primary key.. + —> topology information + Neo4j Vector index integrations —> !! + */ + public class Pinecone { + /* + +curl -s -X POST "https://api.pinecone.io/indexes" \ + -H "Accept: application/json" \ + -H "Content-Type: application/json" \ + -H "Api-Key: $PINECONE_API_KEY" \ + -d '{ + "name": "quickstart", + "dimension": 1536, + "metric": "cosine", + "spec": { + "serverless": { + "cloud": "aws", + "region": "us-west-2" + } + } + }' + + +# The `POST` requests below uses the unique endpoint for an index. +# See https://docs.pinecone.io/guides/data/get-an-index-endpoint for details. +PINECONE_API_KEY="YOUR_API_KEY" +INDEX_HOST="INDEX_HOST" + +curl -X POST "https://$INDEX_HOST/vectors/upsert" \ + -H "Api-Key: $PINECONE_API_KEY" \ + -H 'Content-Type: application/json' \ + -d '{ + "vectors": [ + { + "id": "vec1", + "values": [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] + }, + { + "id": "vec2", + "values": [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2] + }, + { + "id": "vec3", + "values": [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3] + }, + { + "id": "vec4", + "values": [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4] + } + ], + "namespace": "ns1" + }' + +curl -X POST "https://$INDEX_HOST/vectors/upsert" \ + -H "Api-Key: $PINECONE_API_KEY" \ + -H 'Content-Type: application/json' \ + -d '{ + "vectors": [ + { + "id": "vec5", + "values": [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] + }, + { + "id": "vec6", + "values": [0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6] + }, + { + "id": "vec7", + "values": [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7] + }, + { + "id": "vec8", + "values": [0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8] + } + ], + "namespace": "ns2" + }' + + + +# The `POST` request below uses the unique endpoint for an index. +# See https://docs.pinecone.io/guides/data/get-an-index-endpoint for details. +PINECONE_API_KEY="YOUR_API_KEY" +INDEX_HOST="INDEX_HOST" + +curl -X POST "https://$INDEX_HOST/describe_index_stats" \ + -H "Api-Key: $PINECONE_API_KEY" \ + +# Output: +# { +# "namespaces": { +# "ns1": { +# "vectorCount": 4 +# }, +# "ns2": { +# "vectorCount": 4 +# } +# }, +# "dimension": 8, +# "indexFullness": 0.00008, +# "totalVectorCount": 8 +# } + + + + +# The `POST` requests below uses the unique endpoint for an index. +# See https://docs.pinecone.io/guides/data/get-an-index-endpoint for details. +PINECONE_API_KEY="YOUR_API_KEY" +INDEX_HOST="INDEX_HOST" + +curl -X POST "https://$INDEX_HOST/query" \ + -H "Api-Key: $PINECONE_API_KEY" \ + -H 'Content-Type: application/json' \ + -d '{ + "namespace": "ns1", + "vector": [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3], + "topK": 3, + "includeValues": true + }' + +curl -X POST "https://$INDEX_HOST/query" \ + \ + -H "Api-Key: $PINECONE_API_KEY" \ + -H 'Content-Type: application/json' \ + -d '{ + "namespace": "ns2", + "vector": [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7], + "topK": 3, + "includeValues": true + }' +# Output: +# { +# "matches":[ +# { +# "id": "vec3", +# "score": 0, +# "values": [0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3] +# }, +# { +# "id": "vec2", +# "score": 0.0800000429, +# "values": [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2] +# }, +# { +# "id": "vec4", +# "score": 0.0799999237, +# "values": [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4] +# } +# ], +# "namespace": "ns1", +# "usage": {"read_units": 6} +# } +# { +# "matches": [ +# { +# "id": "vec7", +# "score": 0, +# "values": [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7] +# }, +# { +# "id": "vec6", +# "score": 0.0799999237, +# "values": [0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6] +# }, +# { +# "id": "vec8", +# "score": 0.0799999237, +# "values": [0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8] +# } +# ], +# "namespace": "ns2", +# "usage": {"read_units": 6} +# } + + + +PINECONE_API_KEY="YOUR_API_KEY" + +curl -s -X -v DELETE "https://api.pinecone.io/indexes/quickstart" \ + -H "Accept: application/json" \ + -H "Api-Key: $PINECONE_API_KEY" + + */ + + @Context + public URLAccessChecker urlAccessChecker; + + + /* + TODO: fare delle api con chroma e pinecone con la stessa firma? + - Data + - Upsert data + - Query data + - Fetch data + - Update data + - Delete data + - List record IDs + - Get an index endpoint + */ + + // todo - embeddingResult with metadata, id, score... //