Skip to content

Commit ce41847

Browse files
authored
Support vector search on Cosmos DB (#33991)
* Support vector search on Cosmos DB Fixes #33783 * Allow array types
1 parent b988989 commit ce41847

38 files changed

+1917
-181
lines changed

src/EFCore.Analyzers/EFDiagnostics.cs

+1
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,5 @@ public static class EFDiagnostics
1818
public const string PrecompiledQueryExperimental = "EF9100";
1919
public const string MetricsExperimental = "EF9101";
2020
public const string PagingExperimental = "EF9102";
21+
public const string CosmosVectorSearchExperimental = "EF9103";
2122
}

src/EFCore.Cosmos/EFCore.Cosmos.csproj

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
<ImplicitUsings>true</ImplicitUsings>
1212
<NoWarn>$(NoWarn);EF9101</NoWarn> <!-- Metrics is experimental -->
1313
<NoWarn>$(NoWarn);EF9102</NoWarn> <!-- Paging is experimental -->
14+
<NoWarn>$(NoWarn);EF9103</NoWarn> <!-- Vector search is experimental -->
1415
</PropertyGroup>
1516

1617
<ItemGroup>

src/EFCore.Cosmos/Extensions/CosmosDbFunctionsExtensions.cs

+143
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4+
using System.Diagnostics.CodeAnalysis;
5+
46
namespace Microsoft.EntityFrameworkCore.Cosmos.Extensions;
57

68
/// <summary>
@@ -47,4 +49,145 @@ public static T CoalesceUndefined<T>(
4749
T expression1,
4850
T expression2)
4951
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(CoalesceUndefined)));
52+
53+
/// <summary>
54+
/// Returns the distance between two vectors, using the distance function and data type defined using
55+
/// <see cref="CosmosPropertyBuilderExtensions.IsVector(Microsoft.EntityFrameworkCore.Metadata.Builders.PropertyBuilder,Microsoft.Azure.Cosmos.DistanceFunction,int)"/>.
56+
/// </summary>
57+
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
58+
/// <param name="vector1">The first vector.</param>
59+
/// <param name="vector2">The second vector.</param>
60+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
61+
public static double VectorDistance(this DbFunctions _, ReadOnlyMemory<byte> vector1, ReadOnlyMemory<byte> vector2)
62+
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));
63+
64+
/// <summary>
65+
/// Returns the distance between two vectors, given a distance function (aka similarity measure).
66+
/// </summary>
67+
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
68+
/// <param name="vector1">The first vector.</param>
69+
/// <param name="vector2">The second vector.</param>
70+
/// <param name="useBruteForce">A <see langword="bool"/> specifying how the computed value is used in an ORDER BY
71+
/// expression. If <see langword="true"/>, then brute force is used, otherwise any index defined on the vector
72+
/// property is leveraged.</param>
73+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
74+
public static double VectorDistance(
75+
this DbFunctions _,
76+
ReadOnlyMemory<byte> vector1,
77+
ReadOnlyMemory<byte> vector2,
78+
[NotParameterized] bool useBruteForce)
79+
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));
80+
81+
/// <summary>
82+
/// Returns the distance between two vectors, given a distance function (aka similarity measure).
83+
/// </summary>
84+
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
85+
/// <param name="vector1">The first vector.</param>
86+
/// <param name="vector2">The second vector.</param>
87+
/// <param name="distanceFunction">The distance function to use.</param>
88+
/// <param name="useBruteForce">A <see langword="bool"/> specifying how the computed value is used in an ORDER BY
89+
/// expression. If <see langword="true"/>, then brute force is used, otherwise any index defined on the vector
90+
/// property is leveraged.</param>
91+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
92+
public static double VectorDistance(
93+
this DbFunctions _,
94+
ReadOnlyMemory<byte> vector1,
95+
ReadOnlyMemory<byte> vector2,
96+
[NotParameterized] bool useBruteForce,
97+
[NotParameterized] DistanceFunction distanceFunction)
98+
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));
99+
100+
/// <summary>
101+
/// Returns the distance between two vectors, using the distance function and data type defined using
102+
/// <see cref="CosmosPropertyBuilderExtensions.IsVector(Microsoft.EntityFrameworkCore.Metadata.Builders.PropertyBuilder,Microsoft.Azure.Cosmos.DistanceFunction,int)"/>.
103+
/// </summary>
104+
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
105+
/// <param name="vector1">The first vector.</param>
106+
/// <param name="vector2">The second vector.</param>
107+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
108+
public static double VectorDistance(this DbFunctions _, ReadOnlyMemory<sbyte> vector1, ReadOnlyMemory<sbyte> vector2)
109+
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));
110+
111+
/// <summary>
112+
/// Returns the distance between two vectors, given a distance function (aka similarity measure).
113+
/// </summary>
114+
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
115+
/// <param name="vector1">The first vector.</param>
116+
/// <param name="vector2">The second vector.</param>
117+
/// <param name="useBruteForce">A <see langword="bool"/> specifying how the computed value is used in an ORDER BY
118+
/// expression. If <see langword="true"/>, then brute force is used, otherwise any index defined on the vector
119+
/// property is leveraged.</param>
120+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
121+
public static double VectorDistance(
122+
this DbFunctions _,
123+
ReadOnlyMemory<sbyte> vector1,
124+
ReadOnlyMemory<sbyte> vector2,
125+
[NotParameterized] bool useBruteForce)
126+
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));
127+
128+
/// <summary>
129+
/// Returns the distance between two vectors, given a distance function (aka similarity measure).
130+
/// </summary>
131+
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
132+
/// <param name="vector1">The first vector.</param>
133+
/// <param name="vector2">The second vector.</param>
134+
/// <param name="distanceFunction">The distance function to use.</param>
135+
/// <param name="useBruteForce">A <see langword="bool"/> specifying how the computed value is used in an ORDER BY
136+
/// expression. If <see langword="true"/>, then brute force is used, otherwise any index defined on the vector
137+
/// property is leveraged.</param>
138+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
139+
public static double VectorDistance(
140+
this DbFunctions _,
141+
ReadOnlyMemory<sbyte> vector1,
142+
ReadOnlyMemory<sbyte> vector2,
143+
[NotParameterized] bool useBruteForce,
144+
[NotParameterized] DistanceFunction distanceFunction)
145+
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));
146+
147+
/// <summary>
148+
/// Returns the distance between two vectors, using the distance function and data type defined using
149+
/// <see cref="CosmosPropertyBuilderExtensions.IsVector(Microsoft.EntityFrameworkCore.Metadata.Builders.PropertyBuilder,Microsoft.Azure.Cosmos.DistanceFunction,int)"/>.
150+
/// </summary>
151+
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
152+
/// <param name="vector1">The first vector.</param>
153+
/// <param name="vector2">The second vector.</param>
154+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
155+
public static double VectorDistance(this DbFunctions _, ReadOnlyMemory<float> vector1, ReadOnlyMemory<float> vector2)
156+
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));
157+
158+
/// <summary>
159+
/// Returns the distance between two vectors, given a distance function (aka similarity measure).
160+
/// </summary>
161+
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
162+
/// <param name="vector1">The first vector.</param>
163+
/// <param name="vector2">The second vector.</param>
164+
/// <param name="useBruteForce">A <see langword="bool"/> specifying how the computed value is used in an ORDER BY
165+
/// expression. If <see langword="true"/>, then brute force is used, otherwise any index defined on the vector
166+
/// property is leveraged.</param>
167+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
168+
public static double VectorDistance(
169+
this DbFunctions _,
170+
ReadOnlyMemory<float> vector1,
171+
ReadOnlyMemory<float> vector2,
172+
[NotParameterized] bool useBruteForce)
173+
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));
174+
175+
/// <summary>
176+
/// Returns the distance between two vectors, given a distance function (aka similarity measure).
177+
/// </summary>
178+
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
179+
/// <param name="vector1">The first vector.</param>
180+
/// <param name="vector2">The second vector.</param>
181+
/// <param name="distanceFunction">The distance function to use.</param>
182+
/// <param name="useBruteForce">A <see langword="bool"/> specifying how the computed value is used in an ORDER BY
183+
/// expression. If <see langword="true"/>, then brute force is used, otherwise any index defined on the vector
184+
/// property is leveraged.</param>
185+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
186+
public static double VectorDistance(
187+
this DbFunctions _,
188+
ReadOnlyMemory<float> vector1,
189+
ReadOnlyMemory<float> vector2,
190+
[NotParameterized] bool useBruteForce,
191+
[NotParameterized] DistanceFunction distanceFunction)
192+
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));
50193
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Diagnostics.CodeAnalysis;
5+
using Microsoft.EntityFrameworkCore.Cosmos.Metadata.Internal;
6+
7+
// ReSharper disable once CheckNamespace
8+
namespace Microsoft.EntityFrameworkCore;
9+
10+
/// <summary>
11+
/// Azure Cosmos DB-specific extension methods for <see cref="IndexBuilder"/>.
12+
/// </summary>
13+
/// <remarks>
14+
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
15+
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
16+
/// </remarks>
17+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
18+
public static class CosmosIndexBuilderExtensions
19+
{
20+
/// <summary>
21+
/// Configures the index as a vector index with the given vector index type, such as "flat", "diskANN", or "quantizedFlat".
22+
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
23+
/// </summary>
24+
/// <remarks>
25+
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
26+
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
27+
/// </remarks>
28+
/// <param name="indexBuilder">The builder for the index being configured.</param>
29+
/// <param name="indexType">The type of vector index to create.</param>
30+
/// <returns>A builder to further configure the index.</returns>
31+
public static IndexBuilder ForVectors(this IndexBuilder indexBuilder, VectorIndexType? indexType)
32+
{
33+
indexBuilder.Metadata.SetVectorIndexType(indexType);
34+
35+
return indexBuilder;
36+
}
37+
38+
/// <summary>
39+
/// Configures whether the index as a vector index with the given vector index type, such as "flat", "diskANN", or "quantizedFlat".
40+
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
41+
/// </summary>
42+
/// <remarks>
43+
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
44+
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
45+
/// </remarks>
46+
/// <param name="indexBuilder">The builder for the index being configured.</param>
47+
/// <param name="indexType">The type of vector index to create.</param>
48+
/// <returns>A builder to further configure the index.</returns>
49+
public static IndexBuilder<TEntity> ForVectors<TEntity>(
50+
this IndexBuilder<TEntity> indexBuilder,
51+
VectorIndexType? indexType)
52+
=> (IndexBuilder<TEntity>)ForVectors((IndexBuilder)indexBuilder, indexType);
53+
54+
/// <summary>
55+
/// Configures whether the index as a vector index with the given vector index type, such as "flat", "diskANN", or "quantizedFlat".
56+
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
57+
/// </summary>
58+
/// <remarks>
59+
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
60+
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
61+
/// </remarks>
62+
/// <param name="indexBuilder">The builder for the index being configured.</param>
63+
/// <param name="indexType">The type of vector index to create.</param>
64+
/// <param name="fromDataAnnotation">Indicates whether the configuration was specified using a data annotation.</param>
65+
/// <returns>
66+
/// The same builder instance if the configuration was applied,
67+
/// <see langword="null" /> otherwise.
68+
/// </returns>
69+
public static IConventionIndexBuilder? ForVectors(
70+
this IConventionIndexBuilder indexBuilder,
71+
VectorIndexType? indexType,
72+
bool fromDataAnnotation = false)
73+
{
74+
if (indexBuilder.CanSetVectorIndexType(indexType, fromDataAnnotation))
75+
{
76+
indexBuilder.Metadata.SetVectorIndexType(indexType, fromDataAnnotation);
77+
return indexBuilder;
78+
}
79+
80+
return null;
81+
}
82+
83+
/// <summary>
84+
/// Returns a value indicating whether the vector index can be configured for vectors.
85+
/// </summary>
86+
/// <remarks>
87+
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
88+
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
89+
/// </remarks>
90+
/// <param name="indexBuilder">The builder for the index being configured.</param>
91+
/// <param name="indexType">The index type to use.</param>
92+
/// <param name="fromDataAnnotation">Indicates whether the configuration was specified using a data annotation.</param>
93+
/// <returns><see langword="true" /> if the index can be configured for vectors.</returns>
94+
public static bool CanSetVectorIndexType(
95+
this IConventionIndexBuilder indexBuilder,
96+
VectorIndexType? indexType,
97+
bool fromDataAnnotation = false)
98+
=> indexBuilder.CanSetAnnotation(CosmosAnnotationNames.VectorIndexType, indexType, fromDataAnnotation);
99+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Diagnostics.CodeAnalysis;
5+
using Microsoft.EntityFrameworkCore.Cosmos.Metadata.Internal;
6+
7+
// ReSharper disable once CheckNamespace
8+
namespace Microsoft.EntityFrameworkCore;
9+
10+
/// <summary>
11+
/// Index extension methods for Azure Cosmos DB-specific metadata.
12+
/// </summary>
13+
/// <remarks>
14+
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
15+
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
16+
/// </remarks>
17+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
18+
public static class CosmosIndexExtensions
19+
{
20+
/// <summary>
21+
/// Returns the vector index type to use, such as "flat", "diskANN", or "quantizedFlat".
22+
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
23+
/// </summary>
24+
/// <param name="index">The index.</param>
25+
/// <returns>The index type to use, or <see langword="null" /> if none is set.</returns>
26+
public static VectorIndexType? GetVectorIndexType(this IReadOnlyIndex index)
27+
=> (index is RuntimeIndex)
28+
? throw new InvalidOperationException(CoreStrings.RuntimeModelMissingData)
29+
: (VectorIndexType?)index[CosmosAnnotationNames.VectorIndexType];
30+
31+
/// <summary>
32+
/// Sets the vector index type to use, such as "flat", "diskANN", or "quantizedFlat".
33+
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
34+
/// </summary>
35+
/// <param name="index">The index.</param>
36+
/// <param name="indexType">The index type to use.</param>
37+
public static void SetVectorIndexType(this IMutableIndex index, VectorIndexType? indexType)
38+
=> index.SetAnnotation(CosmosAnnotationNames.VectorIndexType, indexType);
39+
40+
/// <summary>
41+
/// Sets the vector index type to use, such as "flat", "diskANN", or "quantizedFlat".
42+
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
43+
/// </summary>
44+
/// <param name="indexType">The index type to use.</param>
45+
/// <param name="index">The index.</param>
46+
/// <param name="fromDataAnnotation">Indicates whether the configuration was specified using a data annotation.</param>
47+
/// <returns>The configured value.</returns>
48+
public static string? SetVectorIndexType(
49+
this IConventionIndex index,
50+
VectorIndexType? indexType,
51+
bool fromDataAnnotation = false)
52+
=> (string?)index.SetAnnotation(
53+
CosmosAnnotationNames.VectorIndexType,
54+
indexType,
55+
fromDataAnnotation)?.Value;
56+
57+
/// <summary>
58+
/// Returns the <see cref="ConfigurationSource" /> for whether the <see cref="GetVectorIndexType"/>.
59+
/// </summary>
60+
/// <param name="property">The property.</param>
61+
/// <returns>The <see cref="ConfigurationSource" /> for whether the index is clustered.</returns>
62+
public static ConfigurationSource? GetVectorIndexTypeConfigurationSource(this IConventionIndex property)
63+
=> property.FindAnnotation(CosmosAnnotationNames.VectorIndexType)?.GetConfigurationSource();
64+
}

0 commit comments

Comments
 (0)