diff --git a/examples/mysql-vector/README.md b/examples/mysql-vector/README.md new file mode 100644 index 0000000..cd266bd --- /dev/null +++ b/examples/mysql-vector/README.md @@ -0,0 +1,95 @@ +# MySQL 9 Vector Store Integration for LLM Chain + +This directory contains example files for using MySQL 9's native vector capabilities with LLM Chain. + +## Overview + +MySQL 9 introduces built-in vector support through the `VECTOR` data type and functions like `VECTOR_COSINE_DISTANCE` for similarity operations. This allows using MySQL as a vector store for RAG (Retrieval-Augmented Generation) applications without requiring a separate vector database service. + +## Requirements + +- PHP 8.2 or higher +- MySQL 9.0.0 or higher +- PDO PHP extension with MySQL driver +- llm-chain library + +## Quick Start with Docker + +1. Ensure you have Docker and Docker Compose installed +2. Start MySQL 9: + ```bash + docker-compose up -d + ``` +3. Wait for the MySQL server to be ready (check with `docker-compose logs -f`) +4. Configure your `.env` file: + ``` + MYSQL_DSN=mysql:host=localhost;port=3306;dbname=llm_chain;charset=utf8mb4 + MYSQL_USERNAME=root + MYSQL_PASSWORD=password + OPENAI_API_KEY=sk-your-openai-key + ``` +5. Run the example: + ```bash + php ../store-mysql-similarity-search.php + ``` + +## Manual Setup + +If you're not using Docker, you'll need to: + +1. Install MySQL 9 +2. Create a database: + ```sql + CREATE DATABASE llm_chain; + ``` +3. The example will automatically create the necessary table with a VECTOR column + +## How It Works + +The MySQL 9 Store implementation: + +1. Automatically creates a table with the necessary structure when first used +2. Converts vector data from JSON to MySQL's native VECTOR type during storage +3. Uses MySQL's `VECTOR_COSINE_DISTANCE` function for similarity search +4. Converts distance scores to similarity scores (1 - distance) for compatibility with other vector stores + +## Vector Table Schema + +The automatically created table has the following structure: + +```sql +CREATE TABLE vector_documents ( + id VARCHAR(36) PRIMARY KEY, + vector_data JSON NOT NULL, + metadata JSON, + VECTOR USING vector_data(1536) -- dimensions is configurable +); +``` + +## Advanced Configuration + +You can customize the Store behavior through constructor parameters: + +```php +$store = new Store( + $pdo, // PDO connection + 'custom_table_name', // Custom table name (default: vector_documents) + 'embedding_vector', // Custom vector column name (default: vector_data) + 'document_metadata', // Custom metadata column name (default: metadata) + [], // Additional options + 768, // Vector dimensions (default: 1536 for OpenAI) + 5 // Default query result limit (default: 3) +); +``` + +## Performance Considerations + +For production use: +- Consider adding indexes based on your specific query patterns +- Monitor memory usage, especially with large vector collections +- Adjust MySQL server configuration for vector operations + +## Further Reading + +- [MySQL 9 Vector Documentation](https://dev.mysql.com/doc/refman/9.0/en/vector.html) +- [LLM Chain Documentation](https://github.com/php-llm/llm-chain) \ No newline at end of file diff --git a/examples/mysql-vector/docker-compose.yaml b/examples/mysql-vector/docker-compose.yaml new file mode 100644 index 0000000..911359f --- /dev/null +++ b/examples/mysql-vector/docker-compose.yaml @@ -0,0 +1,22 @@ +version: '3.8' + +services: + mysql: + image: mysql:9.0.0 + restart: always + environment: + - MYSQL_ROOT_PASSWORD=password + - MYSQL_DATABASE=llm_chain + ports: + - "3306:3306" + volumes: + - mysql_data:/var/lib/mysql + command: --default-authentication-plugin=mysql_native_password + healthcheck: + test: ["CMD", "mysqladmin", "ping", "-h", "localhost"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + mysql_data: \ No newline at end of file diff --git a/examples/store-mysql-similarity-search.php b/examples/store-mysql-similarity-search.php new file mode 100644 index 0000000..55378b4 --- /dev/null +++ b/examples/store-mysql-similarity-search.php @@ -0,0 +1,109 @@ +bootEnv(dirname(__DIR__).'/.env'); + +// Establish MySQL connection +$dsn = $_ENV['MYSQL_DSN'] ?? 'mysql:host=localhost;port=3306;dbname=llm_chain;charset=utf8mb4'; +$username = $_ENV['MYSQL_USERNAME'] ?? 'root'; +$password = $_ENV['MYSQL_PASSWORD'] ?? 'password'; +$pdo = new PDO($dsn, $username, $password, [PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION]); + +// Initialize Platform & Models +$platform = PlatformFactory::create($_ENV['OPENAI_API_KEY']); +$embeddingsModel = new Embeddings($platform); +$llm = new GPT(GPT::GPT_4O_MINI); + +// Initialize Store +$store = new Store( + $pdo, + 'vector_documents', // Table name + 'vector_data', // Vector column name + 'metadata', // Metadata column name + [], // Additional options + 1536, // Vector dimensions for OpenAI Embeddings + 3 // Default limit for results +); + +// Create demo data for the store +$examples = [ + [ + 'question' => 'What is PHP?', + 'answer' => 'PHP (recursive acronym and backronym for "PHP: Hypertext Preprocessor", originally "Personal Home Page Tools") is a scripting language with a syntax similar to C and Perl, mainly used for creating dynamic web pages.', + ], + [ + 'question' => 'What is MySQL?', + 'answer' => 'MySQL is a relational open-source database management system that uses SQL as its query language. MySQL version 9 provides native vector support for AI applications, enabling efficient similarity search operations.', + ], + [ + 'question' => 'What is LLM Chain?', + 'answer' => 'LLM Chain is a PHP library for developing LLM-based and AI-based features and applications. It supports various language models, platforms, and vector stores for building intelligent applications.', + ], + [ + 'question' => 'What is a vector store?', + 'answer' => 'A vector store is a specialized type of database optimized for storing and querying vector data, typically derived from embedding models. They enable similarity searches based on the semantic meaning of texts, making them ideal for RAG (Retrieval-Augmented Generation) systems.', + ], + [ + 'question' => 'How does MySQL 9 vector support work?', + 'answer' => 'MySQL 9 introduces native vector support through the VECTOR data type and specialized functions like VECTOR_COSINE_DISTANCE. This enables efficient storage and similarity search operations directly within the database, simplifying the architecture for AI applications by eliminating the need for separate vector database services.', + ], +]; + +// Insert data into the store +$embedder = new Embedder($platform, $embeddingsModel, $store); +$documents = []; + +echo "Creating and storing embeddings for the example data...\n"; +foreach ($examples as $index => $example) { + $content = sprintf("Question: %s\nAnswer: %s", $example['question'], $example['answer']); + $documents[] = new TextDocument( + id: Symfony\Component\Uid\Uuid::v4(), + content: $content, + metadata: new Metadata(['index' => $index, 'question' => $example['question']]), + ); +} + +$embedder->embed($documents); +echo "Embeddings successfully stored.\n\n"; + +// Create Chain with SimilaritySearch tool +$similaritySearch = new SimilaritySearch($embeddingsModel, $store); +$toolbox = Toolbox::create($similaritySearch); +$processor = new ChainProcessor($toolbox); +$chain = new Chain($platform, $llm, [$processor], [$processor]); + +// Simulate user request +$userQuestion = 'Explain what a vector store is and how it works with MySQL 9.'; + +echo "User query: {$userQuestion}\n"; +$messages = new MessageBag( + Message::forSystem(<<call($messages); +echo "Answer: {$response->getContent()}\n"; \ No newline at end of file diff --git a/src/Bridge/MySQL/Store.php b/src/Bridge/MySQL/Store.php new file mode 100644 index 0000000..55525f2 --- /dev/null +++ b/src/Bridge/MySQL/Store.php @@ -0,0 +1,132 @@ + $options + */ + public function __construct( + private PDO $connection, + private string $tableName = 'vector_documents', + private string $vectorColumnName = 'vector_data', + private string $metadataColumnName = 'metadata', + private array $options = [], + private int $dimensions = 1536, + private int $limit = 3, + ) { + $this->ensureTableExists(); + } + + public function add(VectorDocument ...$documents): void + { + if ([] === $documents) { + return; + } + + $sql = sprintf( + 'INSERT INTO %s (id, %s, %s) VALUES (?, JSON_ARRAY_PACK(?), ?)', + $this->tableName, + $this->vectorColumnName, + $this->metadataColumnName + ); + + $statement = $this->connection->prepare($sql); + $this->connection->beginTransaction(); + + try { + foreach ($documents as $document) { + $statement->execute([ + (string) $document->id, + json_encode($document->vector->getData()), + json_encode($document->metadata->getArrayCopy()), + ]); + } + $this->connection->commit(); + } catch (PDOException $e) { + $this->connection->rollBack(); + throw $e; + } + } + + public function query(Vector $vector, array $options = [], ?float $minScore = null): array + { + $limit = $options['limit'] ?? $this->limit; + $minScoreCondition = $minScore !== null ? "HAVING score >= $minScore" : ''; + + $sql = sprintf( + 'SELECT + id, + %s as vector_data, + %s as metadata, + VECTOR_COSINE_DISTANCE(%s, JSON_ARRAY_PACK(?)) as score + FROM %s + %s + ORDER BY score + LIMIT %d', + $this->vectorColumnName, + $this->metadataColumnName, + $this->vectorColumnName, + $this->tableName, + $minScoreCondition, + $limit + ); + + $statement = $this->connection->prepare($sql); + $statement->execute([json_encode($vector->getData())]); + $results = $statement->fetchAll(PDO::FETCH_ASSOC); + + $documents = []; + foreach ($results as $result) { + $vectorData = json_decode($result['vector_data'], true); + $metadataArray = json_decode($result['metadata'], true); + + // Die Cosine-Distanz in eine Ähnlichkeits-Score umwandeln (1 - Distanz) + // MySQL gibt die Distanz zurück, wir müssen sie in einen Ähnlichkeitswert umwandeln + $similarityScore = 1 - $result['score']; + + $documents[] = new VectorDocument( + id: Uuid::fromString($result['id']), + vector: new Vector($vectorData), + metadata: new Metadata($metadataArray), + score: $similarityScore, + ); + } + + return $documents; + } + + private function ensureTableExists(): void + { + $tableExistsQuery = "SHOW TABLES LIKE '$this->tableName'"; + $tableExists = $this->connection->query($tableExistsQuery)->rowCount() > 0; + + if (!$tableExists) { + $sql = sprintf( + 'CREATE TABLE %s ( + id VARCHAR(36) PRIMARY KEY, + %s JSON NOT NULL, + %s JSON, + VECTOR USING %s(%d) + )', + $this->tableName, + $this->vectorColumnName, + $this->metadataColumnName, + $this->vectorColumnName, + $this->dimensions + ); + $this->connection->exec($sql); + } + } +} \ No newline at end of file