Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Add MS Learn code #260

Merged
merged 3 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public class Hotel {
@VectorStoreRecordVector(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
private final List<Float> indexedEuclidean;

@VectorStoreRecordData
@VectorStoreRecordData(isFilterable = true)
private final List<String> tags;

@VectorStoreRecordData
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import com.microsoft.semantickernel.data.redis.RedisVectorStore;
import com.microsoft.semantickernel.data.redis.RedisVectorStoreOptions;
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollectionOptions;
import com.microsoft.semantickernel.tests.data.jdbc.Hotel;
import com.redis.testcontainers.RedisContainer;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@
import com.azure.core.credential.AzureKeyCredential;
import com.azure.core.credential.KeyCredential;
import com.microsoft.semantickernel.aiservices.openai.textembedding.OpenAITextEmbeddingGenerationService;
import com.microsoft.semantickernel.data.textsearch.TextSearchResultValue;
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults;
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection;
import com.microsoft.semantickernel.data.VolatileVectorStore;
import com.microsoft.semantickernel.data.VolatileVectorStoreRecordCollectionOptions;
import com.microsoft.semantickernel.data.VectorStoreTextSearch;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector;
Expand Down Expand Up @@ -43,7 +42,6 @@ static class GitHubFile {
@VectorStoreRecordData
private final String description;
@VectorStoreRecordData
@TextSearchResultValue
private final String link;
@VectorStoreRecordVector(dimensions = EMBEDDING_DIMENSIONS, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.COSINE_DISTANCE)
private final List<Float> embedding;
Expand Down Expand Up @@ -125,24 +123,27 @@ public static void inMemoryStoreAndSearch(
.then(storeData(collection, embeddingGeneration, sampleData()))
.block();

// Build a vectorized search
var vectorStoreTextSearch = VectorStoreTextSearch.<GitHubFile>builder()
.withVectorizedSearch(collection)
.withTextEmbeddingGenerationService(embeddingGeneration)
.build();

// Search for results
// Volatile store executes an exhaustive search, for approximate search use Azure AI Search, Redis or JDBC with PostgreSQL
String query = "How to get started?";
var results = vectorStoreTextSearch.searchAsync(query, null)
.block();
var results = search("How to get started", collection, embeddingGeneration).block();

if (results == null || results.getTotalCount() == 0) {
System.out.println("No search results found.");
return;
}
var searchResult = results.getResults().get(0);
System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n",
searchResult.getScore(), searchResult.getRecord().link,
searchResult.getRecord().description);
}

System.out.printf("Best result for '%s': %s%n", query, results.getResults().get(0));
private static Mono<VectorSearchResults<GitHubFile>> search(
String searchText,
VectorStoreRecordCollection<String, GitHubFile> recordCollection,
OpenAITextEmbeddingGenerationService embeddingGeneration) {
// Generate embeddings for the search text and search for the closest records
return embeddingGeneration.generateEmbeddingAsync(searchText)
.flatMap(r -> recordCollection.searchAsync(r.getVector(), null));
}

private static Mono<List<String>> storeData(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@
import com.azure.ai.openai.OpenAIClientBuilder;
import com.azure.core.credential.AzureKeyCredential;
import com.azure.core.credential.KeyCredential;
import com.azure.core.util.ClientOptions;
import com.azure.core.util.MetricsOptions;
import com.azure.core.util.TracingOptions;
import com.azure.search.documents.indexes.SearchIndexAsyncClient;
import com.azure.search.documents.indexes.SearchIndexClientBuilder;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.microsoft.semantickernel.aiservices.openai.textembedding.OpenAITextEmbeddingGenerationService;
import com.microsoft.semantickernel.data.azureaisearch.AzureAISearchVectorStore;
import com.microsoft.semantickernel.data.azureaisearch.AzureAISearchVectorStoreOptions;
import com.microsoft.semantickernel.data.azureaisearch.AzureAISearchVectorStoreRecordCollectionOptions;
import com.microsoft.semantickernel.data.textsearch.TextSearchResultValue;
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults;
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection;
import com.microsoft.semantickernel.data.VectorStoreTextSearch;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector;
Expand Down Expand Up @@ -49,12 +50,11 @@ public class VectorStoreWithAzureAISearch {
private static final int EMBEDDING_DIMENSIONS = 1536;

static class GitHubFile {
@VectorStoreRecordKey()
@VectorStoreRecordKey
private final String id;
@VectorStoreRecordData()
@VectorStoreRecordData
private final String description;
@VectorStoreRecordData
@TextSearchResultValue
private final String link;
@VectorStoreRecordVector(dimensions = EMBEDDING_DIMENSIONS, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.COSINE_SIMILARITY)
private final List<Float> embedding;
Expand All @@ -64,10 +64,10 @@ public GitHubFile() {
}

public GitHubFile(
@JsonProperty("fileId") String id,
@JsonProperty("description") String description,
@JsonProperty("link") String link,
@JsonProperty("embedding") List<Float> embedding) {
String id,
String description,
String link,
List<Float> embedding) {
this.id = id;
this.description = description;
this.link = link;
Expand Down Expand Up @@ -108,6 +108,7 @@ public static void main(String[] args) {
var searchClient = new SearchIndexClientBuilder()
.endpoint(AZURE_AI_SEARCH_ENDPOINT)
.credential(new AzureKeyCredential(AZURE_AISEARCH_KEY))
.clientOptions(clientOptions())
.buildAsyncClient();

storeAndSearch(searchClient, embeddingGeneration);
Expand Down Expand Up @@ -137,24 +138,27 @@ public static void storeAndSearch(
.then(storeData(collection, embeddingGeneration, sampleData()))
.block();

// Build a vectorized search
var vectorStoreTextSearch = VectorStoreTextSearch.<GitHubFile>builder()
.withVectorizedSearch(collection)
.withTextEmbeddingGenerationService(embeddingGeneration)
.build();

// Search for results
// Might need to wait for the data to be indexed
String query = "How to get started?";
var results = vectorStoreTextSearch.searchAsync(query, null)
.block();
var results = search("How to get started", collection, embeddingGeneration).block();

if (results == null || results.getTotalCount() == 0) {
System.out.println("No search results found.");
return;
}
var searchResult = results.getResults().get(0);
System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n",
searchResult.getScore(), searchResult.getRecord().link,
searchResult.getRecord().description);
}

System.out.printf("Best result for '%s': %s%n", query, results.getResults().get(0));
private static Mono<VectorSearchResults<GitHubFile>> search(
String searchText,
VectorStoreRecordCollection<String, GitHubFile> recordCollection,
OpenAITextEmbeddingGenerationService embeddingGeneration) {
// Generate embeddings for the search text and search for the closest records
return embeddingGeneration.generateEmbeddingAsync(searchText)
.flatMap(r -> recordCollection.searchAsync(r.getVector(), null));
}

private static Mono<List<String>> storeData(
Expand Down Expand Up @@ -197,4 +201,11 @@ private static Map<String, String> sampleData() {
"README: README associated with a sample chat summary react-based webapp" },
}).collect(Collectors.toMap(element -> element[0], element -> element[1]));
}

private static ClientOptions clientOptions() {
return new ClientOptions()
.setTracingOptions(new TracingOptions())
.setMetricsOptions(new MetricsOptions())
.setApplicationId("Semantic-Kernel");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@
import com.microsoft.semantickernel.data.jdbc.JDBCVectorStore;
import com.microsoft.semantickernel.data.jdbc.JDBCVectorStoreOptions;
import com.microsoft.semantickernel.data.jdbc.JDBCVectorStoreRecordCollectionOptions;
import com.microsoft.semantickernel.data.jdbc.mysql.MySQLVectorStoreQueryProvider;
import com.microsoft.semantickernel.data.textsearch.TextSearchResultValue;
import com.microsoft.semantickernel.data.jdbc.postgres.PostgreSQLVectorStoreQueryProvider;
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults;
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection;
import com.microsoft.semantickernel.data.VectorStoreTextSearch;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector;
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
import com.mysql.cj.jdbc.MysqlDataSource;

import java.nio.charset.StandardCharsets;
import java.sql.SQLException;
import java.util.Arrays;
Expand All @@ -27,6 +26,7 @@
import java.util.Map;
import java.util.stream.Collectors;

import org.postgresql.ds.PGSimpleDataSource;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;

Expand All @@ -42,12 +42,11 @@ public class VectorStoreWithJDBC {
private static final int EMBEDDING_DIMENSIONS = 1536;

static class GitHubFile {
@VectorStoreRecordKey()
@VectorStoreRecordKey
private final String id;
@VectorStoreRecordData()
@VectorStoreRecordData
private final String description;
@VectorStoreRecordData
@TextSearchResultValue
private final String link;
@VectorStoreRecordVector(dimensions = EMBEDDING_DIMENSIONS, distanceFunction = DistanceFunction.COSINE_DISTANCE)
private final List<Float> embedding;
Expand Down Expand Up @@ -89,8 +88,8 @@ static String encodeId(String realId) {
}
}

// Run a MySQL server with:
// docker run -d --name mysql-container -e MYSQL_ROOT_PASSWORD=root -e MYSQL_DATABASE=sk -p 3306:3306 mysql:latest
// Run a PostgreSQL server with:
// docker run -d --name pgvector-container -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=root -e POSTGRES_DB=sk -p 5432:5432 pgvector/pgvector:pg17

public static void main(String[] args) throws SQLException {
System.out.println("==============================================================");
Expand Down Expand Up @@ -123,14 +122,14 @@ public static void main(String[] args) throws SQLException {

public static void storeAndSearch(OpenAITextEmbeddingGenerationService embeddingGeneration) {
// Configure the data source
var dataSource = new MysqlDataSource();
dataSource.setUrl("jdbc:mysql://localhost:3306/sk");
PGSimpleDataSource dataSource = new PGSimpleDataSource();
dataSource.setUrl("jdbc:postgresql://localhost:5432/sk");
dataSource.setUser("postgres");
dataSource.setPassword("root");
dataSource.setUser("root");

// Build a query provider
// Other available query providers are PostgreSQLVectorStoreQueryProvider and SQLiteVectorStoreQueryProvider
var queryProvider = MySQLVectorStoreQueryProvider.builder()
var queryProvider = PostgreSQLVectorStoreQueryProvider.builder()
.withDataSource(dataSource)
.build();

Expand All @@ -155,23 +154,26 @@ public static void storeAndSearch(OpenAITextEmbeddingGenerationService embedding
.then(storeData(collection, embeddingGeneration, sampleData()))
.block();

// Build a vectorized search
var vectorStoreTextSearch = VectorStoreTextSearch.<GitHubFile>builder()
.withVectorizedSearch(collection)
.withTextEmbeddingGenerationService(embeddingGeneration)
.build();

// Search for results
String query = "How to get started?";
var results = vectorStoreTextSearch.searchAsync(query, null)
.block();
var results = search("How to get started", collection, embeddingGeneration).block();

if (results == null || results.getTotalCount() == 0) {
System.out.println("No search results found.");
return;
}
var searchResult = results.getResults().get(0);
System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n",
searchResult.getScore(), searchResult.getRecord().link,
searchResult.getRecord().description);
}

System.out.printf("Best result for '%s': %s%n", query, results.getResults().get(0));
private static Mono<VectorSearchResults<GitHubFile>> search(
String searchText,
VectorStoreRecordCollection<String, GitHubFile> recordCollection,
OpenAITextEmbeddingGenerationService embeddingGeneration) {
// Generate embeddings for the search text and search for the closest records
return embeddingGeneration.generateEmbeddingAsync(searchText)
.flatMap(r -> recordCollection.searchAsync(r.getVector(), null));
}

private static Mono<List<String>> storeData(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@
import com.microsoft.semantickernel.data.redis.RedisStorageType;
import com.microsoft.semantickernel.data.redis.RedisVectorStore;
import com.microsoft.semantickernel.data.redis.RedisVectorStoreOptions;
import com.microsoft.semantickernel.data.textsearch.TextSearchResultValue;
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults;
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection;
import com.microsoft.semantickernel.data.VectorStoreTextSearch;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector;
Expand Down Expand Up @@ -42,12 +41,11 @@ public class VectorStoreWithRedis {
private static final int EMBEDDING_DIMENSIONS = 1536;

public static class GitHubFile {
@VectorStoreRecordKey()
@VectorStoreRecordKey
private final String id;
@VectorStoreRecordData()
@VectorStoreRecordData
private final String description;
@VectorStoreRecordData
@TextSearchResultValue
private final String link;
@VectorStoreRecordVector(dimensions = EMBEDDING_DIMENSIONS, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.COSINE_DISTANCE)
private final List<Float> embedding;
Expand Down Expand Up @@ -146,23 +144,27 @@ public static void storeAndSearch(
.then(storeData(collection, embeddingGeneration, sampleData()))
.block();

// Build a vectorized search
var vectorStoreTextSearch = VectorStoreTextSearch.<GitHubFile>builder()
.withVectorizedSearch(collection)
.withTextEmbeddingGenerationService(embeddingGeneration)
.build();

// Search for results
String query = "How to get started?";
var results = vectorStoreTextSearch.searchAsync(query, null)
.block();
// Might need to wait for the data to be indexed
var results = search("How to get started", collection, embeddingGeneration).block();

if (results == null || results.getTotalCount() == 0) {
System.out.println("No search results found.");
return;
}
var searchResult = results.getResults().get(0);
System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n",
searchResult.getScore(), searchResult.getRecord().link,
searchResult.getRecord().description);
}

System.out.printf("Best result for '%s': %s%n", query, results.getResults().get(0));
private static Mono<VectorSearchResults<GitHubFile>> search(
String searchText,
VectorStoreRecordCollection<String, GitHubFile> recordCollection,
OpenAITextEmbeddingGenerationService embeddingGeneration) {
// Generate embeddings for the search text and search for the closest records
return embeddingGeneration.generateEmbeddingAsync(searchText)
.flatMap(r -> recordCollection.searchAsync(r.getVector(), null));
}

private static Mono<List<String>> storeData(
Expand Down
13 changes: 13 additions & 0 deletions samples/semantickernel-learn-resources/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@
<artifactId>semantickernel-api</artifactId>
</dependency>

<dependency>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-data-azureaisearch</artifactId>
</dependency>
<dependency>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-data-jdbc</artifactId>
</dependency>
<dependency>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-data-redis</artifactId>
</dependency>

<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
Expand Down
Loading