Skip to content

Commit 91b866a

Browse files
committed
Support vector search on Cosmos DB
Fixes #33783 This PR introduces: - `IsVector()` to configure a property to be configured as a vector (embedding) in the document. - The distance function and dimensions are specified. - The data type can be specified, or otherwise is inferred. - `HasIndex().ForVectors()` to configure a vector index over a vector property. - `VectorDistance()` which translates to the Cosmos `VectorDistance` function - The distance function and data type are taken from the property mapping, or can be overridden.
1 parent 3d1d324 commit 91b866a

35 files changed

+1787
-130
lines changed

src/EFCore.Analyzers/EFDiagnostics.cs

+1
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,5 @@ public static class EFDiagnostics
1818
public const string PrecompiledQueryExperimental = "EF9100";
1919
public const string MetricsExperimental = "EF9101";
2020
public const string PagingExperimental = "EF9102";
21+
public const string CosmosVectorSearchExperimental = "EF9103";
2122
}

src/EFCore.Cosmos/EFCore.Cosmos.csproj

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
<ImplicitUsings>true</ImplicitUsings>
1212
<NoWarn>$(NoWarn);EF9101</NoWarn> <!-- Metrics is experimental -->
1313
<NoWarn>$(NoWarn);EF9102</NoWarn> <!-- Paging is experimental -->
14+
<NoWarn>$(NoWarn);EF9102</NoWarn> <!-- Paging is experimental -->
1415
</PropertyGroup>
1516

1617
<ItemGroup>

src/EFCore.Cosmos/Extensions/CosmosDbFunctionsExtensions.cs

+51
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4+
using System.Diagnostics.CodeAnalysis;
5+
46
namespace Microsoft.EntityFrameworkCore.Cosmos.Extensions;
57

68
/// <summary>
@@ -47,4 +49,53 @@ public static T CoalesceUndefined<T>(
4749
T expression1,
4850
T expression2)
4951
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(CoalesceUndefined)));
52+
53+
/// <summary>
54+
/// Returns the distance between two vectors, using the distance function and data type defined using
55+
/// <see cref="CosmosPropertyBuilderExtensions.IsVector(Microsoft.EntityFrameworkCore.Metadata.Builders.PropertyBuilder,Microsoft.Azure.Cosmos.DistanceFunction,int,System.Nullable{Microsoft.Azure.Cosmos.VectorDataType})"/>.
56+
/// </summary>
57+
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
58+
/// <param name="vector1">The first vector.</param>
59+
/// <param name="vector2">The second vector.</param>
60+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
61+
public static double VectorDistance<T>(this DbFunctions _, IEnumerable<T> vector1, IEnumerable<T> vector2)
62+
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));
63+
64+
/// <summary>
65+
/// Returns the distance between two vectors, given a distance function (aka similarity measure).
66+
/// </summary>
67+
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
68+
/// <param name="vector1">The first vector.</param>
69+
/// <param name="vector2">The second vector.</param>
70+
/// <param name="useBruteForce">A <see langword="bool"/> specifying how the computed value is used in an ORDER BY
71+
/// expression. If <see langword="true"/>, then brute force is used, otherwise any index defined on the vector
72+
/// property is leveraged.</param>
73+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
74+
public static double VectorDistance<T>(
75+
this DbFunctions _,
76+
IEnumerable<T> vector1,
77+
IEnumerable<T> vector2,
78+
[NotParameterized] bool useBruteForce)
79+
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));
80+
81+
/// <summary>
82+
/// Returns the distance between two vectors, given a distance function (aka similarity measure).
83+
/// </summary>
84+
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
85+
/// <param name="vector1">The first vector.</param>
86+
/// <param name="vector2">The second vector.</param>
87+
/// <param name="distanceFunction">The distance function to use.</param>
88+
/// <param name="dataType">The vector data type to use.</param>
89+
/// <param name="useBruteForce">A <see langword="bool"/> specifying how the computed value is used in an ORDER BY
90+
/// expression. If <see langword="true"/>, then brute force is used, otherwise any index defined on the vector
91+
/// property is leveraged.</param>
92+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
93+
public static double VectorDistance<T>(
94+
this DbFunctions _,
95+
IEnumerable<T> vector1,
96+
IEnumerable<T> vector2,
97+
[NotParameterized] bool useBruteForce,
98+
[NotParameterized] DistanceFunction distanceFunction,
99+
[NotParameterized] VectorDataType dataType)
100+
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));
50101
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Diagnostics.CodeAnalysis;
5+
using Microsoft.EntityFrameworkCore.Cosmos.Metadata.Internal;
6+
7+
// ReSharper disable once CheckNamespace
8+
namespace Microsoft.EntityFrameworkCore;
9+
10+
/// <summary>
11+
/// Azure Cosmos DB-specific extension methods for <see cref="IndexBuilder"/>.
12+
/// </summary>
13+
/// <remarks>
14+
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
15+
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
16+
/// </remarks>
17+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
18+
public static class CosmosIndexBuilderExtensions
19+
{
20+
/// <summary>
21+
/// Configures the index as a vector index with the given vector index type, such as "flat", "diskANN", or "quantizedFlat".
22+
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
23+
/// </summary>
24+
/// <remarks>
25+
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
26+
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
27+
/// </remarks>
28+
/// <param name="indexBuilder">The builder for the index being configured.</param>
29+
/// <param name="indexType">The type of vector index to create.</param>
30+
/// <returns>A builder to further configure the index.</returns>
31+
public static IndexBuilder ForVectors(this IndexBuilder indexBuilder, VectorIndexType? indexType)
32+
{
33+
indexBuilder.Metadata.SetVectorIndexType(indexType);
34+
35+
return indexBuilder;
36+
}
37+
38+
/// <summary>
39+
/// Configures whether the index as a vector index with the given vector index type, such as "flat", "diskANN", or "quantizedFlat".
40+
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
41+
/// </summary>
42+
/// <remarks>
43+
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
44+
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
45+
/// </remarks>
46+
/// <param name="indexBuilder">The builder for the index being configured.</param>
47+
/// <param name="indexType">The type of vector index to create.</param>
48+
/// <returns>A builder to further configure the index.</returns>
49+
public static IndexBuilder<TEntity> ForVectors<TEntity>(
50+
this IndexBuilder<TEntity> indexBuilder,
51+
VectorIndexType? indexType)
52+
=> (IndexBuilder<TEntity>)ForVectors((IndexBuilder)indexBuilder, indexType);
53+
54+
/// <summary>
55+
/// Configures whether the index as a vector index with the given vector index type, such as "flat", "diskANN", or "quantizedFlat".
56+
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
57+
/// </summary>
58+
/// <remarks>
59+
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
60+
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
61+
/// </remarks>
62+
/// <param name="indexBuilder">The builder for the index being configured.</param>
63+
/// <param name="indexType">The type of vector index to create.</param>
64+
/// <param name="fromDataAnnotation">Indicates whether the configuration was specified using a data annotation.</param>
65+
/// <returns>
66+
/// The same builder instance if the configuration was applied,
67+
/// <see langword="null" /> otherwise.
68+
/// </returns>
69+
public static IConventionIndexBuilder? ForVectors(
70+
this IConventionIndexBuilder indexBuilder,
71+
VectorIndexType? indexType,
72+
bool fromDataAnnotation = false)
73+
{
74+
if (indexBuilder.CanSetVectorIndexType(indexType, fromDataAnnotation))
75+
{
76+
indexBuilder.Metadata.SetVectorIndexType(indexType, fromDataAnnotation);
77+
return indexBuilder;
78+
}
79+
80+
return null;
81+
}
82+
83+
/// <summary>
84+
/// Returns a value indicating whether the vector index can be configured for vectors.
85+
/// </summary>
86+
/// <remarks>
87+
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
88+
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
89+
/// </remarks>
90+
/// <param name="indexBuilder">The builder for the index being configured.</param>
91+
/// <param name="indexType">The index type to use.</param>
92+
/// <param name="fromDataAnnotation">Indicates whether the configuration was specified using a data annotation.</param>
93+
/// <returns><see langword="true" /> if the index can be configured for vectors.</returns>
94+
public static bool CanSetVectorIndexType(
95+
this IConventionIndexBuilder indexBuilder,
96+
VectorIndexType? indexType,
97+
bool fromDataAnnotation = false)
98+
=> indexBuilder.CanSetAnnotation(CosmosAnnotationNames.VectorIndexType, indexType, fromDataAnnotation);
99+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Diagnostics.CodeAnalysis;
5+
using Microsoft.EntityFrameworkCore.Cosmos.Metadata.Internal;
6+
7+
// ReSharper disable once CheckNamespace
8+
namespace Microsoft.EntityFrameworkCore;
9+
10+
/// <summary>
11+
/// Index extension methods for Azure Cosmos DB-specific metadata.
12+
/// </summary>
13+
/// <remarks>
14+
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
15+
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
16+
/// </remarks>
17+
[Experimental(EFDiagnostics.CosmosVectorSearchExperimental)]
18+
public static class CosmosIndexExtensions
19+
{
20+
/// <summary>
21+
/// Returns the vector index type to use, such as "flat", "diskANN", or "quantizedFlat".
22+
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
23+
/// </summary>
24+
/// <param name="index">The index.</param>
25+
/// <returns>The index type to use, or <see langword="null" /> if none is set.</returns>
26+
public static VectorIndexType? GetVectorIndexType(this IReadOnlyIndex index)
27+
=> (index is RuntimeIndex)
28+
? throw new InvalidOperationException(CoreStrings.RuntimeModelMissingData)
29+
: (VectorIndexType?)index[CosmosAnnotationNames.VectorIndexType];
30+
31+
/// <summary>
32+
/// Sets the vector index type to use, such as "flat", "diskANN", or "quantizedFlat".
33+
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
34+
/// </summary>
35+
/// <param name="index">The index.</param>
36+
/// <param name="indexType">The index type to use.</param>
37+
public static void SetVectorIndexType(this IMutableIndex index, VectorIndexType? indexType)
38+
=> index.SetAnnotation(CosmosAnnotationNames.VectorIndexType, indexType);
39+
40+
/// <summary>
41+
/// Sets the vector index type to use, such as "flat", "diskANN", or "quantizedFlat".
42+
/// See <see href="https://aka.ms/ef-cosmos-vectors">Vector Search in Azure Cosmos DB for NoSQL</see> for more information.
43+
/// </summary>
44+
/// <param name="indexType">The index type to use.</param>
45+
/// <param name="index">The index.</param>
46+
/// <param name="fromDataAnnotation">Indicates whether the configuration was specified using a data annotation.</param>
47+
/// <returns>The configured value.</returns>
48+
public static string? SetVectorIndexType(
49+
this IConventionIndex index,
50+
VectorIndexType? indexType,
51+
bool fromDataAnnotation = false)
52+
=> (string?)index.SetAnnotation(
53+
CosmosAnnotationNames.VectorIndexType,
54+
indexType,
55+
fromDataAnnotation)?.Value;
56+
57+
/// <summary>
58+
/// Returns the <see cref="ConfigurationSource" /> for whether the <see cref="GetVectorIndexType"/>.
59+
/// </summary>
60+
/// <param name="property">The property.</param>
61+
/// <returns>The <see cref="ConfigurationSource" /> for whether the index is clustered.</returns>
62+
public static ConfigurationSource? GetVectorIndexTypeConfigurationSource(this IConventionIndex property)
63+
=> property.FindAnnotation(CosmosAnnotationNames.VectorIndexType)?.GetConfigurationSource();
64+
}

0 commit comments

Comments
 (0)