From dbea310538343d048e515ee460d6180b19256217 Mon Sep 17 00:00:00 2001 From: Daniele Barbaro Date: Thu, 28 May 2026 11:32:35 +0200 Subject: [PATCH 1/6] chore: pin ezimuel/phpvector dev-feat/is-persistent until 0.3.0 ships --- composer.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/composer.json b/composer.json index f204d13..7e8df19 100644 --- a/composer.json +++ b/composer.json @@ -1,6 +1,6 @@ { "name": "neuron-core/php-vector", - "description": "Conversational Data Collection.", + "description": "PHPVector adapter for the Neuron AI framework.", "minimum-stability": "stable", "authors": [ { @@ -11,7 +11,7 @@ "license": "MIT", "require": { "php": "^8.1", - "ezimuel/phpvector": "^0.2.0", + "ezimuel/phpvector": "dev-feat/is-persistent as 0.3.0", "neuron-core/neuron-ai": "^3.0" }, "require-dev": { From bcce1c2476597933865672bfc5d54f25d768f128 Mon Sep 17 00:00:00 2001 From: Daniele Barbaro Date: Wed, 27 May 2026 22:48:21 +0200 Subject: [PATCH 2/6] feat: round-trip sourceType and sourceName through metadata --- src/PHPVector.php | 57 +++++++++++++++++++++++++++++++---------- tests/PHPVectorTest.php | 23 +++++++++++++++++ 2 files changed, 67 insertions(+), 13 deletions(-) diff --git a/src/PHPVector.php b/src/PHPVector.php index f75132b..c43acf2 100644 --- a/src/PHPVector.php +++ b/src/PHPVector.php @@ -9,6 +9,7 @@ use NeuronAI\RAG\VectorStore\VectorStoreInterface; use NeuronAI\StaticConstructor; use PHPVector\Document; +use PHPVector\Metadata\MetadataFilter; use PHPVector\SearchResult; use PHPVector\VectorDatabase; @@ -18,6 +19,9 @@ class PHPVector implements VectorStoreInterface { use StaticConstructor; + private const SOURCE_TYPE_KEY = 'sourceType'; + private const SOURCE_NAME_KEY = 'sourceName'; + public function __construct( protected VectorDatabase $database, protected int $topK = 5, @@ -26,14 +30,7 @@ public function __construct( public function addDocument(NeuronDocument $document): VectorStoreInterface { - $this->database->addDocument( - new Document( - id: $document->id, - vector: $document->embedding, - text: $document->content, - metadata: $document->metadata, - ) - ); + $this->write($document); return $this; } @@ -44,12 +41,36 @@ public function addDocument(NeuronDocument $document): VectorStoreInterface public function addDocuments(array $documents): VectorStoreInterface { foreach ($documents as $document) { - $this->addDocument($document); + $this->write($document); } return $this; } + /** + * Persist a Neuron document into PHPVector. + * + * Neuron's `sourceType`/`sourceName` are top-level Document properties, but + * PHPVector only stores `metadata`. They are folded into metadata under the + * reserved keys so `deleteBy()` can filter on them; `similaritySearch()` + * restores them and strips the reserved keys back out. + */ + private function write(NeuronDocument $document): void + { + $this->database->addDocument( + new Document( + id: $document->id, + vector: $document->embedding, + text: $document->content, + metadata: [ + ...$document->metadata, + self::SOURCE_TYPE_KEY => $document->sourceType, + self::SOURCE_NAME_KEY => $document->sourceName, + ], + ) + ); + } + /** * @throws VectorStoreException */ @@ -79,11 +100,21 @@ public function similaritySearch(array $embedding): iterable ); return array_map(function (SearchResult $result): NeuronDocument { - $document = new NeuronDocument($result->document->text); - $document->id = $result->document->id; - $document->embedding = $result->document->vector; - $document->metadata = $result->document->metadata; + $phpDoc = $result->document; + + $metadata = $phpDoc->metadata; + $sourceType = $metadata[self::SOURCE_TYPE_KEY] ?? 'manual'; + $sourceName = $metadata[self::SOURCE_NAME_KEY] ?? 'manual'; + unset($metadata[self::SOURCE_TYPE_KEY], $metadata[self::SOURCE_NAME_KEY]); + + $document = new NeuronDocument($phpDoc->text); + $document->id = $phpDoc->id; + $document->embedding = $phpDoc->vector; + $document->sourceType = $sourceType; + $document->sourceName = $sourceName; + $document->metadata = $metadata; $document->score = $result->score; + return $document; }, $results); } diff --git a/tests/PHPVectorTest.php b/tests/PHPVectorTest.php index 2ebf73f..2378d8f 100644 --- a/tests/PHPVectorTest.php +++ b/tests/PHPVectorTest.php @@ -238,6 +238,29 @@ public function testAddDocumentsReturnsAdapterInstance(): void $this->assertSame($adapter, $result); } + public function testSourceTypeAndNameRoundTripWithoutLeakingIntoMetadata(): void + { + $database = new VectorDatabase(); + $adapter = new PHPVector($database); + + $document = new NeuronDocument('Round trip content'); + $document->id = 'rt1'; + $document->embedding = $this->createTestEmbedding(); + $document->sourceType = 'pdf'; + $document->sourceName = 'manual.pdf'; + $document->metadata = ['author' => 'jane', 'pages' => 12, 'published' => true]; + + $adapter->addDocument($document); + + $results = $adapter->similaritySearch($document->embedding); + $resultsArray = is_array($results) ? $results : iterator_to_array($results); + $first = $resultsArray[0]; + + self::assertSame('pdf', $first->sourceType); + self::assertSame('manual.pdf', $first->sourceName); + self::assertSame(['author' => 'jane', 'pages' => 12, 'published' => true], $first->metadata); + } + private function createDocumentWithEmbedding(string $content): NeuronDocument { $document = new NeuronDocument($content); From 04ca67bacd69fc56c4628cfc654f641355b1cd56 Mon Sep 17 00:00:00 2001 From: Daniele Barbaro Date: Wed, 27 May 2026 22:56:59 +0200 Subject: [PATCH 3/6] feat: add configurable batched auto-save --- src/PHPVector.php | 10 ++++++++++ tests/PHPVectorTest.php | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/src/PHPVector.php b/src/PHPVector.php index c43acf2..9487875 100644 --- a/src/PHPVector.php +++ b/src/PHPVector.php @@ -25,12 +25,14 @@ class PHPVector implements VectorStoreInterface public function __construct( protected VectorDatabase $database, protected int $topK = 5, + protected bool $autoSave = true, ) { } public function addDocument(NeuronDocument $document): VectorStoreInterface { $this->write($document); + $this->persist(); return $this; } @@ -43,6 +45,7 @@ public function addDocuments(array $documents): VectorStoreInterface foreach ($documents as $document) { $this->write($document); } + $this->persist(); return $this; } @@ -71,6 +74,13 @@ private function write(NeuronDocument $document): void ); } + private function persist(): void + { + if ($this->autoSave && $this->database->isPersistent()) { + $this->database->save(); + } + } + /** * @throws VectorStoreException */ diff --git a/tests/PHPVectorTest.php b/tests/PHPVectorTest.php index 2378d8f..c297bd5 100644 --- a/tests/PHPVectorTest.php +++ b/tests/PHPVectorTest.php @@ -261,6 +261,39 @@ public function testSourceTypeAndNameRoundTripWithoutLeakingIntoMetadata(): void self::assertSame(['author' => 'jane', 'pages' => 12, 'published' => true], $first->metadata); } + public function testMutationsPersistWhenAutoSaveEnabled(): void + { + $database = new VectorDatabase(path: $this->tempDir); + $adapter = new PHPVector($database); + + $adapter->addDocuments([ + $this->createDocumentWithEmbedding('Auto 1'), + $this->createDocumentWithEmbedding('Auto 2'), + ]); + + // No explicit save(): auto-save should have persisted the index. + $reopened = VectorDatabase::open($this->tempDir); + self::assertSame(2, $reopened->count()); + } + + public function testAutoSaveDisabledDoesNotPersistUntilManualSave(): void + { + $database = new VectorDatabase(path: $this->tempDir); + $adapter = new PHPVector($database, autoSave: false); + + $adapter->addDocuments([ + $this->createDocumentWithEmbedding('Manual 1'), + $this->createDocumentWithEmbedding('Manual 2'), + ]); + + // Index not yet persisted: meta.json must not exist on disk. + self::assertFileDoesNotExist($this->tempDir . '/meta.json'); + + $database->save(); + $afterSave = VectorDatabase::open($this->tempDir); + self::assertSame(2, $afterSave->count()); + } + private function createDocumentWithEmbedding(string $content): NeuronDocument { $document = new NeuronDocument($content); From 4402c89c26057aa60452f49cbe93e94b2347e353 Mon Sep 17 00:00:00 2001 From: Daniele Barbaro Date: Wed, 27 May 2026 23:03:32 +0200 Subject: [PATCH 4/6] feat: implement deleteBy via metadata filtering --- src/PHPVector.php | 23 +++++++---- tests/PHPVectorTest.php | 86 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 8 deletions(-) diff --git a/src/PHPVector.php b/src/PHPVector.php index 9487875..87991ff 100644 --- a/src/PHPVector.php +++ b/src/PHPVector.php @@ -4,7 +4,6 @@ namespace NeuronAI\PHPVector; -use NeuronAI\Exceptions\VectorStoreException; use NeuronAI\RAG\Document as NeuronDocument; use NeuronAI\RAG\VectorStore\VectorStoreInterface; use NeuronAI\StaticConstructor; @@ -81,21 +80,29 @@ private function persist(): void } } - /** - * @throws VectorStoreException - */ public function deleteBy(string $sourceType, ?string $sourceName = null): VectorStoreInterface { - throw new VectorStoreException('Deletion not supported.'); + $filters = [MetadataFilter::eq(self::SOURCE_TYPE_KEY, $sourceType)]; + + if ($sourceName !== null) { + $filters[] = MetadataFilter::eq(self::SOURCE_NAME_KEY, $sourceName); + } + + foreach ($this->database->metadataSearch(filters: $filters) as $result) { + $this->database->deleteDocument($result->document->id); + } + + $this->persist(); + + return $this; } /** - * @throws VectorStoreException + * @deprecated Use deleteBy() instead. */ public function deleteBySource(string $sourceType, string $sourceName): VectorStoreInterface { - $this->deleteBy($sourceType, $sourceName); - return $this; + return $this->deleteBy($sourceType, $sourceName); } /** diff --git a/tests/PHPVectorTest.php b/tests/PHPVectorTest.php index c297bd5..2c186db 100644 --- a/tests/PHPVectorTest.php +++ b/tests/PHPVectorTest.php @@ -294,10 +294,96 @@ public function testAutoSaveDisabledDoesNotPersistUntilManualSave(): void self::assertSame(2, $afterSave->count()); } + public function testDeleteByRemovesMatchingSourceType(): void + { + $database = new VectorDatabase(); + $adapter = new PHPVector($database); + + $adapter->addDocuments([ + $this->makeSourcedDocument('a', 'pdf', 'one.pdf'), + $this->makeSourcedDocument('b', 'pdf', 'two.pdf'), + $this->makeSourcedDocument('c', 'web', 'site'), + ]); + self::assertSame(3, $database->count()); + + $adapter->deleteBy('pdf'); + + self::assertSame(1, $database->count()); + } + + public function testDeleteByRemovesOnlyExactTypeAndName(): void + { + $database = new VectorDatabase(); + $adapter = new PHPVector($database); + + $adapter->addDocuments([ + $this->makeSourcedDocument('a', 'pdf', 'one.pdf'), + $this->makeSourcedDocument('b', 'pdf', 'two.pdf'), + ]); + + $adapter->deleteBy('pdf', 'one.pdf'); + + self::assertSame(1, $database->count()); + } + + public function testDeleteByWithNoMatchIsNoop(): void + { + $database = new VectorDatabase(); + $adapter = new PHPVector($database); + + $adapter->addDocument($this->makeSourcedDocument('a', 'pdf', 'one.pdf')); + + $result = $adapter->deleteBy('missing'); + + self::assertSame(1, $database->count()); + self::assertSame($adapter, $result); + } + + public function testDeleteBySourceDelegatesToDeleteBy(): void + { + $database = new VectorDatabase(); + $adapter = new PHPVector($database); + + $adapter->addDocuments([ + $this->makeSourcedDocument('a', 'pdf', 'one.pdf'), + $this->makeSourcedDocument('b', 'web', 'site'), + ]); + + $adapter->deleteBySource('pdf', 'one.pdf'); + + self::assertSame(1, $database->count()); + } + + public function testDeleteByPersistsWhenAutoSaveEnabled(): void + { + $database = new VectorDatabase(path: $this->tempDir); + $adapter = new PHPVector($database); + + $adapter->addDocuments([ + $this->makeSourcedDocument('a', 'pdf', 'one.pdf'), + $this->makeSourcedDocument('b', 'web', 'site'), + ]); + + $adapter->deleteBy('pdf'); + + $reopened = VectorDatabase::open($this->tempDir); + self::assertSame(1, $reopened->count()); + } + private function createDocumentWithEmbedding(string $content): NeuronDocument { $document = new NeuronDocument($content); $document->embedding = $this->createTestEmbedding(); return $document; } + + private function makeSourcedDocument(string $id, string $sourceType, string $sourceName): NeuronDocument + { + $document = new NeuronDocument('content ' . $id); + $document->id = $id; + $document->embedding = $this->createTestEmbedding(); + $document->sourceType = $sourceType; + $document->sourceName = $sourceName; + return $document; + } } From 6bccd531a47b30a2adf41a9fd26b33e88f0e594a Mon Sep 17 00:00:00 2001 From: Daniele Barbaro Date: Thu, 28 May 2026 00:13:15 +0200 Subject: [PATCH 5/6] docs: document usage, persistence, and deletion --- README.md | 102 ++++++++++++++++++++-------------------- tests/PHPVectorTest.php | 16 ++++++- 2 files changed, 66 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 25722c9..4a79ebd 100644 --- a/README.md +++ b/README.md @@ -1,70 +1,72 @@ -# PHPVector adapter for Neuron AI framework +# neuron-core/php-vector -This is the [PHPVector](https://github.com/ezimuel/PHPVector) adapter for the [Neuron AI framework](https://neuron-ai.dev/). +PHPVector adapter for the [Neuron AI](https://neuron-ai.dev) framework. Implements +`NeuronAI\RAG\VectorStore\VectorStoreInterface` on top of `ezimuel/phpvector`. -## Install +## Installation -``` +```bash composer require neuron-core/php-vector ``` -## Use in RAG +## Usage ```php use NeuronAI\PHPVector\PHPVector; +use PHPVector\VectorDatabase; + +// Persistent database: pass a path to enable on-disk storage. +$store = new PHPVector( + database: new VectorDatabase(path: '/var/data/mydb'), + topK: 5, +); +``` + +Inside a Neuron RAG class: -class MyRAG extends RAG +```php +protected function vectorStore(): VectorStoreInterface { - ... - - protected function vectorStore(): VectorStoreInterface - { - return new PHPVector( - database: new VectorDatabase(path: '/var/data/mydb'), - topK: 5 - ); - } + return new PHPVector( + database: new VectorDatabase(path: '/var/data/mydb'), + topK: 5, + ); } ``` -## Use in Retrieval components +## Persistence + +PHPVector separates document storage from index storage: + +- `new VectorDatabase(path: '...')` creates (or targets) a database directory. +- `VectorDatabase::open('...')` loads an existing database from disk. +- `addDocument()` writes the document file to disk on each call (asynchronously via `pcntl_fork` when available, otherwise synchronously). +- `save()` persists the HNSW + BM25 index and finalizes deletions. + +By default this adapter auto-saves after every mutation (`addDocument`, `addDocuments`, +`deleteBy`), batched to a single `save()` per call, so persistence "just works". Disable it +to manage `save()` yourself: ```php -use NeuronAI\PHPVector\PHPVector; +$store = new PHPVector(database: $db, autoSave: false); +// ... many addDocument() calls ... +$db->save(); +``` -class MyAgent extends Agent -{ - ... - - protected function tools(): array - { - return [ - RetrievalTool::make( - new SimilarityRetrieval( - $this->vectorStore(), - $this->embeddings() - ) - ), - ]; - } - - protected function vectorStore(): VectorStoreInterface - { - return new PHPVector( - database: new VectorDatabase(path: '/var/data/mydb'), - topK: 5 - ); - } - - protected function embeddings(): EmbeddingsProviderInterface - { - return new OllamaEmbeddingsProvider( - model: 'OLLAMA_EMBEDDINGS_MODEL' - ); - } -} +Auto-save is skipped for in-memory databases (no path), so it never throws. + +## Deletion + +`deleteBy()` removes documents by Neuron's `sourceType` / `sourceName`, which this adapter +stores as PHPVector metadata: + +```php +$store->deleteBy('pdf'); // all documents from sourceType "pdf" +$store->deleteBy('pdf', 'manual.pdf'); // only that exact source ``` -## Official documentation +## Requirements -**[Go to the official documentation](https://neuron.inspector.dev/)** +- PHP 8.1+ +- `ezimuel/phpvector` ^0.3.0 +- `neuron-core/neuron-ai` ^3.0 diff --git a/tests/PHPVectorTest.php b/tests/PHPVectorTest.php index 2c186db..234c367 100644 --- a/tests/PHPVectorTest.php +++ b/tests/PHPVectorTest.php @@ -9,6 +9,19 @@ use PHPVector\VectorDatabase; use PHPUnit\Framework\TestCase; +use function array_diff; +use function array_fill; +use function is_array; +use function is_dir; +use function iterator_to_array; +use function mt_getrandmax; +use function mt_rand; +use function rmdir; +use function scandir; +use function sys_get_temp_dir; +use function uniqid; +use function unlink; + class PHPVectorTest extends TestCase { private string $tempDir; @@ -85,7 +98,7 @@ public function testPersistDocumentsAcrossInstances(): void { // Create and persist documents with first instance $database = new VectorDatabase(path: $this->tempDir); - $adapter = new PHPVector($database); + $adapter = new PHPVector($database, autoSave: false); $documents = [ $this->createDocumentWithEmbedding('Persisted document 1'), @@ -139,7 +152,6 @@ public function testSimilaritySearchReturnsResults(): void $results = $adapter->similaritySearch($queryEmbedding); $this->assertNotEmpty($results); - $this->assertIsIterable($results); $resultsArray = is_array($results) ? $results : iterator_to_array($results); $this->assertCount(3, $resultsArray); From 1d2fa8711d8d1964188c9dc6ecc561778f53cc71 Mon Sep 17 00:00:00 2001 From: Daniele Barbaro Date: Thu, 28 May 2026 16:46:15 +0200 Subject: [PATCH 6/6] chore: unpin ezimuel/phpvector to ^0.3.0 --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 7e8df19..af04d0a 100644 --- a/composer.json +++ b/composer.json @@ -11,7 +11,7 @@ "license": "MIT", "require": { "php": "^8.1", - "ezimuel/phpvector": "dev-feat/is-persistent as 0.3.0", + "ezimuel/phpvector": "^0.3.0", "neuron-core/neuron-ai": "^3.0" }, "require-dev": {