From 383b94a3c79516a5de2cce747343a20b760396b9 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Fri, 29 May 2026 19:52:28 -0400 Subject: [PATCH 01/11] ai bug fixes --- .gitignore | 2 + .../MemoryVectorDatabase.cs | 14 ++- .../VectorDatabaseBase.cs | 24 +++- .../VectorStore/BasicDiskVectorStore.cs | 10 +- src/SharpVectorTest/BugDiscoveryTests.cs | 113 ++++++++++++++++++ src/test.sh | 38 ++++++ 6 files changed, 186 insertions(+), 15 deletions(-) create mode 100644 src/SharpVectorTest/BugDiscoveryTests.cs create mode 100755 src/test.sh diff --git a/.gitignore b/.gitignore index f5d883a..59ef96b 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ TestResults/ docs/site .cache + +coveragereport diff --git a/src/Build5Nines.SharpVector/MemoryVectorDatabase.cs b/src/Build5Nines.SharpVector/MemoryVectorDatabase.cs index e08b425..22c3258 100644 --- a/src/Build5Nines.SharpVector/MemoryVectorDatabase.cs +++ b/src/Build5Nines.SharpVector/MemoryVectorDatabase.cs @@ -59,8 +59,11 @@ public override async Task DeserializeFromBinaryStreamAsync(Stream stream) { await base.DeserializeFromBinaryStreamAsync(stream); - // Re-initialize the IdGenerator with the max Id value from the VectorStore - _idGenerator = new IntIdGenerator(VectorStore.GetIds().Max()); + // Re-initialize the IdGenerator with the max Id value from the VectorStore when items exist. + var ids = VectorStore.GetIds().ToArray(); + _idGenerator = ids.Length > 0 + ? new IntIdGenerator(ids.Max()) + : new IntIdGenerator(); } /// @@ -71,7 +74,10 @@ public override void DeserializeFromBinaryStream(Stream stream) { base.DeserializeFromBinaryStream(stream); - // Re-initialize the IdGenerator with the max Id value from the VectorStore - _idGenerator = new IntIdGenerator(VectorStore.GetIds().Max()); + // Re-initialize the IdGenerator with the max Id value from the VectorStore when items exist. + var ids = VectorStore.GetIds().ToArray(); + _idGenerator = ids.Length > 0 + ? new IntIdGenerator(ids.Max()) + : new IntIdGenerator(); } } diff --git a/src/Build5Nines.SharpVector/VectorDatabaseBase.cs b/src/Build5Nines.SharpVector/VectorDatabaseBase.cs index 203cb58..9c40435 100644 --- a/src/Build5Nines.SharpVector/VectorDatabaseBase.cs +++ b/src/Build5Nines.SharpVector/VectorDatabaseBase.cs @@ -249,14 +249,20 @@ public async Task> SearchAsync var totalCountFoundInSearch = similarities.Count(); IEnumerable> resultsToReturn; - if (pageCount != null && pageCount >= 0 && pageIndex >= 0) { + int totalPages; + if (pageCount != null && pageCount >= 0 && pageIndex >= 0) + { resultsToReturn = similarities.Skip(pageIndex * pageCount.Value).Take(pageCount.Value); - } else { + totalPages = pageCount.Value == 0 ? 0 : (int)Math.Ceiling(totalCountFoundInSearch / (double)pageCount.Value); + } + else + { // no paging specified, return all results resultsToReturn = similarities; + totalPages = totalCountFoundInSearch > 0 ? 1 : 0; } - return new VectorTextResult(totalCountFoundInSearch, pageIndex, pageCount.HasValue ? pageCount.Value : 1, resultsToReturn); + return new VectorTextResult(totalCountFoundInSearch, pageIndex, totalPages, resultsToReturn); } private async Task>> CalculateVectorComparisonAsync(TVocabularyKey queryText, float? threshold = null, Func>? filter = null) @@ -673,14 +679,20 @@ public async Task> SearchAsync(string var totalCountFoundInSearch = similarities.Count(); IEnumerable> resultsToReturn; - if (pageCount != null && pageCount >= 0 && pageIndex >= 0) { + int totalPages; + if (pageCount != null && pageCount >= 0 && pageIndex >= 0) + { resultsToReturn = similarities.Skip(pageIndex * pageCount.Value).Take(pageCount.Value); - } else { + totalPages = pageCount.Value == 0 ? 0 : (int)Math.Ceiling(totalCountFoundInSearch / (double)pageCount.Value); + } + else + { // no paging specified, return all results resultsToReturn = similarities; + totalPages = totalCountFoundInSearch > 0 ? 1 : 0; } - return new VectorTextResult(totalCountFoundInSearch, pageIndex, pageCount.HasValue ? pageCount.Value : 1, resultsToReturn); + return new VectorTextResult(totalCountFoundInSearch, pageIndex, totalPages, resultsToReturn); } private async Task>> CalculateVectorComparisonAsync(string queryText, float? threshold = null, Func>? filter = null) diff --git a/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs b/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs index 137d725..fa48453 100644 --- a/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs +++ b/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs @@ -34,7 +34,7 @@ public class BasicDiskVectorStore _cache.Count; + public int Count => _index.Count; public BasicDiskVectorStore(string rootPath, TVocabularyStore vocabularyStore) { @@ -48,7 +48,7 @@ public BasicDiskVectorStore(string rootPath, TVocabularyStore vocabularyStore) _backgroundFlushTask = Task.Run(BackgroundFlusherAsync); } - public IEnumerable GetIds() => _cache.Keys; + public IEnumerable GetIds() => _index.Keys; public IVectorTextItem Get(TId id) { @@ -116,7 +116,7 @@ public IVectorTextItem Delete(TId id) return existing; } - public bool ContainsKey(TId id) => _cache.ContainsKey(id); + public bool ContainsKey(TId id) => _index.ContainsKey(id); public async Task SerializeToJsonStreamAsync(Stream stream) { @@ -134,7 +134,7 @@ public async Task DeserializeFromJsonStreamAsync(Stream stream) public IEnumerator>> GetEnumerator() { - foreach (var key in _cache.Keys) + foreach (var key in _index.Keys) { yield return new KeyValuePair>(key, (VectorTextItem)Get(key)); } @@ -144,7 +144,7 @@ public IEnumerator>> public async IAsyncEnumerator>> GetAsyncEnumerator(CancellationToken cancellationToken = default) { - foreach (var key in _cache.Keys) + foreach (var key in _index.Keys) { yield return new KeyValuePair>(key, (VectorTextItem)Get(key)); await Task.Yield(); diff --git a/src/SharpVectorTest/BugDiscoveryTests.cs b/src/SharpVectorTest/BugDiscoveryTests.cs new file mode 100644 index 0000000..4572a7b --- /dev/null +++ b/src/SharpVectorTest/BugDiscoveryTests.cs @@ -0,0 +1,113 @@ +namespace SharpVectorTest; + +using System.Diagnostics; +using Build5Nines.SharpVector; + +[TestClass] +public class BugDiscoveryTests +{ + private static string CreateTempDir() + { + var dir = Path.Combine(Path.GetTempPath(), "SharpVectorTests", Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(dir); + return dir; + } + + private static async Task WaitForDiskCheckpointAsync(string rootPath) + { + var indexPath = Path.Combine(rootPath, "index.json"); + var itemsPath = Path.Combine(rootPath, "items.bin"); + var walPath = Path.Combine(rootPath, "wal.log"); + var timeout = Stopwatch.StartNew(); + + while (timeout.Elapsed < TimeSpan.FromSeconds(5)) + { + if (File.Exists(indexPath) + && File.Exists(itemsPath) + && new FileInfo(itemsPath).Length > 0 + && File.Exists(walPath) + && new FileInfo(walPath).Length == 0) + { + return; + } + + await Task.Delay(25); + } + + Assert.Fail("Timed out waiting for disk checkpoint to complete."); + } + + [TestMethod] + public async Task SearchAsync_PageMetadata_ComputesTotalPagesFromTotalCount() + { + var vdb = new BasicMemoryVectorDatabase(); + + await vdb.AddTextsAsync( + [ + ("alpha one", "m1"), + ("alpha two", "m2"), + ("alpha three", "m3"), + ("alpha four", "m4"), + ("alpha five", "m5") + ]); + + var results = await vdb.SearchAsync("alpha", pageIndex: 0, pageCount: 2); + + Assert.AreEqual(5, results.TotalCount); + Assert.AreEqual(2, results.Texts.Count()); + Assert.AreEqual(3, results.TotalPages, "TotalPages should be the ceiling of TotalCount / pageCount."); + } + + [TestMethod] + public async Task EmptyMemoryDatabase_CanRoundTripThroughBinaryStream() + { + var original = new MemoryVectorDatabase(); + await using var stream = new MemoryStream(); + + await original.SerializeToBinaryStreamAsync(stream); + stream.Position = 0; + + var reloaded = new MemoryVectorDatabase(); + await reloaded.DeserializeFromBinaryStreamAsync(stream); + + var id = await reloaded.AddTextAsync("hello world", "meta1"); + var item = reloaded.GetText(id); + + Assert.AreEqual(1, id); + Assert.AreEqual("hello world", item.Text); + Assert.AreEqual("meta1", item.Metadata); + } + + [TestMethod] + public async Task ReopenedDiskDatabase_GetIds_ShouldIncludePersistedItemsAfterCheckpoint() + { + var root = CreateTempDir(); + var db = new BasicDiskVectorDatabase(root); + var id = await db.AddTextAsync("persisted text", "meta1"); + + await WaitForDiskCheckpointAsync(root); + + var reopened = new BasicDiskVectorDatabase(root); + + CollectionAssert.AreEqual(new[] { id }, reopened.GetIds().OrderBy(x => x).ToArray()); + Assert.AreEqual("persisted text", reopened.GetText(id).Text); + } + + [TestMethod] + public async Task ReopenedDiskDatabase_SearchAsync_ShouldNotTreatPersistedDatabaseAsEmpty() + { + var root = CreateTempDir(); + var db = new BasicDiskVectorDatabase(root); + var id = await db.AddTextAsync("persisted search text", "meta-search"); + + await WaitForDiskCheckpointAsync(root); + + var reopened = new BasicDiskVectorDatabase(root); + var results = await reopened.SearchAsync("persisted"); + + Assert.AreEqual(1, results.TotalCount); + Assert.AreEqual(id, results.Texts.Single().Id); + Assert.AreEqual("persisted search text", results.Texts.Single().Text); + Assert.AreEqual("meta-search", results.Texts.Single().Metadata); + } +} diff --git a/src/test.sh b/src/test.sh new file mode 100755 index 0000000..b82780d --- /dev/null +++ b/src/test.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +RESULTS_DIR="$SCRIPT_DIR/TestResults/Coverage" +REPORT_DIR="$SCRIPT_DIR/coveragereport" + +rm -rf "$RESULTS_DIR" "$REPORT_DIR" +mkdir -p "$RESULTS_DIR" "$REPORT_DIR" + +cd "$SCRIPT_DIR" + +echo "Running unit tests with code coverage..." +dotnet test SharpVector.sln \ + --configuration Debug \ + --results-directory "$RESULTS_DIR" \ + --logger "trx;LogFileName=test_results.trx" \ + --collect:"XPlat Code Coverage" + +echo "Installing/updating ReportGenerator..." +if ! dotnet tool update --global dotnet-reportgenerator-globaltool >/dev/null 2>&1; then + dotnet tool install --global dotnet-reportgenerator-globaltool >/dev/null 2>&1 +fi + +REPORTGENERATOR_CMD="${HOME}/.dotnet/tools/reportgenerator" + +echo "Generating coverage report..." +"$REPORTGENERATOR_CMD" \ + -reports:"$RESULTS_DIR/**/coverage.cobertura.xml" \ + -targetdir:"$REPORT_DIR" \ + -reporttypes:"Html;MarkdownSummary;TextSummary" + +echo +echo "Coverage report generated:" +echo " HTML: $REPORT_DIR/index.html" +echo " Markdown: $REPORT_DIR/Summary.md" +echo +cat "$REPORT_DIR/Summary.txt" From 613e630f286b623e5e23c3bc780c6fa86d17d94f Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Sat, 30 May 2026 09:17:04 -0400 Subject: [PATCH 02/11] Update DiskVectorDatabaseTests.cs --- src/SharpVectorTest/DiskVectorDatabaseTests.cs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/SharpVectorTest/DiskVectorDatabaseTests.cs b/src/SharpVectorTest/DiskVectorDatabaseTests.cs index 458831f..7ccc0f4 100644 --- a/src/SharpVectorTest/DiskVectorDatabaseTests.cs +++ b/src/SharpVectorTest/DiskVectorDatabaseTests.cs @@ -50,6 +50,21 @@ public async Task Search_ReturnsSimilarResults() Assert.IsTrue(results.Texts.Any(r => r.Text.Contains("quick", StringComparison.OrdinalIgnoreCase))); } + [TestMethod] + public async Task AddText_IsImmediatelyVisibleToSearchAndIds() + { + var root = CreateTempDir(); + var db = new BasicDiskVectorDatabase(root); + + await db.AddTextAsync("The quick brown fox", "a"); + + Assert.AreEqual(1, db.GetIds().Count()); + + var results = await db.SearchAsync("quick fox", threshold: null, pageIndex: 0, pageCount: null); + Assert.AreEqual(1, results.Texts.Count()); + Assert.AreEqual("The quick brown fox", results.Texts.Single().Text); + } + [TestMethod] public async Task Delete_RemovesFromIndexButKeepsFile() { From b1b1ce11137c15b07f4f532ad52093c7f17556e0 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Sat, 30 May 2026 09:17:06 -0400 Subject: [PATCH 03/11] Update BasicDiskVectorStore.cs --- .../VectorStore/BasicDiskVectorStore.cs | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs b/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs index fa48453..8d98aeb 100644 --- a/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs +++ b/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs @@ -26,6 +26,7 @@ public class BasicDiskVectorStore _index = new(); + private readonly ConcurrentDictionary _visibleIds = new(); private readonly ConcurrentDictionary> _cache = new(); private readonly ConcurrentQueue<(TId id, VectorTextItem? item, bool isDelete)> _pending = new(); @@ -34,7 +35,7 @@ public class BasicDiskVectorStore _index.Count; + public int Count => _visibleIds.Count; public BasicDiskVectorStore(string rootPath, TVocabularyStore vocabularyStore) { @@ -48,7 +49,7 @@ public BasicDiskVectorStore(string rootPath, TVocabularyStore vocabularyStore) _backgroundFlushTask = Task.Run(BackgroundFlusherAsync); } - public IEnumerable GetIds() => _index.Keys; + public IEnumerable GetIds() => _visibleIds.Keys; public IVectorTextItem Get(TId id) { @@ -58,6 +59,8 @@ public IVectorTextItem Get(TId id) _rwLock.EnterReadLock(); try { + if (!_visibleIds.ContainsKey(id)) throw new KeyNotFoundException(); + if (_cache.TryGetValue(id, out cached)) return cached; if (!_index.TryGetValue(id, out var offset)) throw new KeyNotFoundException(); using var fs = File.OpenRead(_itemsPath); fs.Seek(offset, SeekOrigin.Begin); @@ -76,11 +79,12 @@ public void Set(TId id, VectorTextItem item) // Write-Ahead Log entry to ensure durability (A in ACID) AppendWalRecord(id, item, isDelete: false); - // Update memory state atomically + // Update memory state atomically so reads observe writes immediately _rwLock.EnterWriteLock(); try { _cache[id] = item; + _visibleIds[id] = 0; _pending.Enqueue((id, item, false)); } finally @@ -106,6 +110,7 @@ public IVectorTextItem Delete(TId id) try { _cache.TryRemove(id, out _); + _visibleIds.TryRemove(id, out _); _pending.Enqueue((id, null, true)); } finally @@ -116,7 +121,7 @@ public IVectorTextItem Delete(TId id) return existing; } - public bool ContainsKey(TId id) => _index.ContainsKey(id); + public bool ContainsKey(TId id) => _visibleIds.ContainsKey(id); public async Task SerializeToJsonStreamAsync(Stream stream) { @@ -128,13 +133,17 @@ public async Task DeserializeFromJsonStreamAsync(Stream stream) var loaded = await JsonSerializer.DeserializeAsync>(stream); if (loaded != null) { - foreach (var kv in loaded) _index[kv.Key] = kv.Value; + foreach (var kv in loaded) + { + _index[kv.Key] = kv.Value; + _visibleIds[kv.Key] = 0; + } } } public IEnumerator>> GetEnumerator() { - foreach (var key in _index.Keys) + foreach (var key in _visibleIds.Keys) { yield return new KeyValuePair>(key, (VectorTextItem)Get(key)); } @@ -144,7 +153,7 @@ public IEnumerator>> public async IAsyncEnumerator>> GetAsyncEnumerator(CancellationToken cancellationToken = default) { - foreach (var key in _index.Keys) + foreach (var key in _visibleIds.Keys) { yield return new KeyValuePair>(key, (VectorTextItem)Get(key)); await Task.Yield(); @@ -180,7 +189,11 @@ private void LoadIndexIfExists() var loaded = JsonSerializer.Deserialize>(fs); if (loaded != null) { - foreach (var kv in loaded) _index[kv.Key] = kv.Value; + foreach (var kv in loaded) + { + _index[kv.Key] = kv.Value; + _visibleIds[kv.Key] = 0; + } } } @@ -201,6 +214,7 @@ private void RecoverFromWalOrIndex() if (isDelete) { _index.TryRemove(id, out _); + _visibleIds.TryRemove(id, out _); _cache.TryRemove(id, out _); } else @@ -215,6 +229,7 @@ private void RecoverFromWalOrIndex() WriteItem(ofs, item); ofs.Flush(true); _index[id] = offset; + _visibleIds[id] = 0; _cache[id] = item; } } From ba38dd86e839e097eb8451f2f25ed52f1e7af0da Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Sat, 30 May 2026 09:18:57 -0400 Subject: [PATCH 04/11] Update CHANGELOG.md --- CHANGELOG.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 81e87a9..66b6474 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v2.2.1 + +Add: + +- Added `src/test.sh` helper script to run the solution tests with XPlat code coverage and generate HTML, Markdown, and text coverage reports. +- Added regression tests in `BugDiscoveryTests` and `DiskVectorDatabaseTests` to cover paging metadata, empty database stream round-tripping, disk persistence reload behavior, and immediate visibility of disk-backed writes. + +Fixed: + +- Fixed `.Search()` / `.SearchAsync()` result paging metadata so `TotalPages` is calculated from the total result count instead of echoing the requested page size. +- Fixed `MemoryVectorDatabase` deserialization from an empty binary stream so ID generation resets correctly instead of failing when no items exist. +- Fixed `BasicDiskVectorStore` read-after-write behavior so added items are immediately visible to `Count`, `GetIds()`, `ContainsKey()`, enumeration, and search before the background disk flush completes. +- Fixed reopened disk-backed databases to correctly expose persisted IDs and search results after checkpoint recovery. + ## v2.2.0 Add: From 7113b6164622903483ccf50885c4bea6124fa0de Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Sat, 30 May 2026 09:18:59 -0400 Subject: [PATCH 05/11] Update Build5Nines.SharpVector.csproj --- src/Build5Nines.SharpVector/Build5Nines.SharpVector.csproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Build5Nines.SharpVector/Build5Nines.SharpVector.csproj b/src/Build5Nines.SharpVector/Build5Nines.SharpVector.csproj index 269cf34..611b595 100644 --- a/src/Build5Nines.SharpVector/Build5Nines.SharpVector.csproj +++ b/src/Build5Nines.SharpVector/Build5Nines.SharpVector.csproj @@ -9,9 +9,9 @@ Build5Nines.SharpVector https://sharpvector.build5nines.com https://github.com/Build5Nines/SharpVector - 2.2.0 + 2.2.1 Lightweight In-memory Vector Database to embed in any .NET Applications - Copyright (c) 2025 Build5Nines LLC + Copyright (c) 2024-2026 Build5Nines LLC README.md LICENSE Chris Pietschmann From d33f0941dfdb75bc2d6dbfcc8889adecf433e39e Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Sat, 30 May 2026 09:22:30 -0400 Subject: [PATCH 06/11] Update DiskVectorDatabaseTests.cs --- src/SharpVectorTest/DiskVectorDatabaseTests.cs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/SharpVectorTest/DiskVectorDatabaseTests.cs b/src/SharpVectorTest/DiskVectorDatabaseTests.cs index 7ccc0f4..ff7a3dd 100644 --- a/src/SharpVectorTest/DiskVectorDatabaseTests.cs +++ b/src/SharpVectorTest/DiskVectorDatabaseTests.cs @@ -83,4 +83,18 @@ public async Task Delete_RemovesFromIndexButKeepsFile() Assert.IsFalse(db2.GetIds().Contains(id)); Assert.ThrowsException(() => db2.GetText(id)); } + + [TestMethod] + public async Task Delete_IsPersistedWhenDatabaseIsImmediatelyReopened() + { + var root = CreateTempDir(); + var db = new BasicDiskVectorDatabase(root); + var id = await db.AddTextAsync("delete me now", "m"); + + db.DeleteText(id); + + var reopened = new BasicDiskVectorDatabase(root); + Assert.IsFalse(reopened.GetIds().Contains(id)); + Assert.ThrowsException(() => reopened.GetText(id)); + } } \ No newline at end of file From 7abeae903a2f2a4ed8d4a7cf95147fc8cac2bebc Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Sat, 30 May 2026 09:22:32 -0400 Subject: [PATCH 07/11] Update BasicDiskVectorStore.cs --- .../VectorStore/BasicDiskVectorStore.cs | 90 +++++++++++-------- 1 file changed, 53 insertions(+), 37 deletions(-) diff --git a/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs b/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs index 8d98aeb..d8b4986 100644 --- a/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs +++ b/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs @@ -76,13 +76,13 @@ public IVectorTextItem Get(TId id) public void Set(TId id, VectorTextItem item) { - // Write-Ahead Log entry to ensure durability (A in ACID) - AppendWalRecord(id, item, isDelete: false); - - // Update memory state atomically so reads observe writes immediately _rwLock.EnterWriteLock(); try { + // Write-Ahead Log entry to ensure durability (A in ACID) + AppendWalRecord(id, item, isDelete: false); + + // Update memory state atomically so reads observe writes immediately _cache[id] = item; _visibleIds[id] = 0; _pending.Enqueue((id, item, false)); @@ -103,12 +103,12 @@ public IVectorTextItem Delete(TId id) { var existing = Get(id); - // WAL for delete - AppendWalRecord(id, item: null, isDelete: true); - _rwLock.EnterWriteLock(); try { + // WAL for delete + AppendWalRecord(id, item: null, isDelete: true); + _cache.TryRemove(id, out _); _visibleIds.TryRemove(id, out _); _pending.Enqueue((id, null, true)); @@ -268,26 +268,34 @@ private async Task BackgroundFlusherAsync() continue; } - using var itemsFs = new FileStream(_itemsPath, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.Read); - while (_pending.TryDequeue(out var op)) + _rwLock.EnterWriteLock(); + try { - if (op.isDelete) + using var itemsFs = new FileStream(_itemsPath, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.Read); + while (_pending.TryDequeue(out var op)) { - _index.TryRemove(op.id, out _); + if (op.isDelete) + { + _index.TryRemove(op.id, out _); + } + else if (op.item is not null) + { + itemsFs.Seek(0, SeekOrigin.End); + var offset = itemsFs.Position; + WriteItem(itemsFs, op.item); + _index[op.id] = offset; + } } - else if (op.item is not null) - { - itemsFs.Seek(0, SeekOrigin.End); - var offset = itemsFs.Position; - WriteItem(itemsFs, op.item); - _index[op.id] = offset; - } - } - itemsFs.Flush(true); - PersistIndex(); + itemsFs.Flush(true); + PersistIndex(); - // After index checkpoint, truncate WAL safely - File.WriteAllBytes(_walPath, Array.Empty()); + // After index checkpoint, truncate WAL safely + File.WriteAllBytes(_walPath, Array.Empty()); + } + finally + { + _rwLock.ExitWriteLock(); + } } catch (OperationCanceledException) { @@ -314,24 +322,32 @@ public void Dispose() // Attempt a final flush of pending operations synchronously try { - using var itemsFs = new FileStream(_itemsPath, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.Read); - while (_pending.TryDequeue(out var op)) + _rwLock.EnterWriteLock(); + try { - if (op.isDelete) + using var itemsFs = new FileStream(_itemsPath, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.Read); + while (_pending.TryDequeue(out var op)) { - _index.TryRemove(op.id, out _); - } - else if (op.item is not null) - { - itemsFs.Seek(0, SeekOrigin.End); - var offset = itemsFs.Position; - WriteItem(itemsFs, op.item); - _index[op.id] = offset; + if (op.isDelete) + { + _index.TryRemove(op.id, out _); + } + else if (op.item is not null) + { + itemsFs.Seek(0, SeekOrigin.End); + var offset = itemsFs.Position; + WriteItem(itemsFs, op.item); + _index[op.id] = offset; + } } + itemsFs.Flush(true); + PersistIndex(); + File.WriteAllBytes(_walPath, Array.Empty()); + } + finally + { + _rwLock.ExitWriteLock(); } - itemsFs.Flush(true); - PersistIndex(); - File.WriteAllBytes(_walPath, Array.Empty()); } catch { } From 3d3ced067e5a07ec8835763bb36ced1427d2e260 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Sat, 30 May 2026 09:23:04 -0400 Subject: [PATCH 08/11] Update CHANGELOG.md --- CHANGELOG.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 66b6474..57ff930 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,14 +10,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Add: - Added `src/test.sh` helper script to run the solution tests with XPlat code coverage and generate HTML, Markdown, and text coverage reports. -- Added regression tests in `BugDiscoveryTests` and `DiskVectorDatabaseTests` to cover paging metadata, empty database stream round-tripping, disk persistence reload behavior, and immediate visibility of disk-backed writes. +- Added regression tests in `BugDiscoveryTests` and `DiskVectorDatabaseTests` to cover paging metadata, empty database stream round-tripping, disk persistence reload behavior, immediate visibility of disk-backed writes, and immediate reopen-after-delete persistence. Fixed: - Fixed `.Search()` / `.SearchAsync()` result paging metadata so `TotalPages` is calculated from the total result count instead of echoing the requested page size. - Fixed `MemoryVectorDatabase` deserialization from an empty binary stream so ID generation resets correctly instead of failing when no items exist. - Fixed `BasicDiskVectorStore` read-after-write behavior so added items are immediately visible to `Count`, `GetIds()`, `ContainsKey()`, enumeration, and search before the background disk flush completes. +- Fixed `BasicDiskVectorStore` delete persistence to avoid WAL truncation races when a database is reopened immediately after delete operations. - Fixed reopened disk-backed databases to correctly expose persisted IDs and search results after checkpoint recovery. +- Updated the package version to `2.2.1` and refreshed the copyright year range to `2024-2026`. + +Notes: + +- Added `coveragereport` to `.gitignore` to keep generated coverage artifacts out of source control. ## v2.2.0 From 309aa419ecd4319c3afc7dc231fb1322452dba07 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Sat, 30 May 2026 09:33:00 -0400 Subject: [PATCH 09/11] Create CoverageExpansionTests.cs --- src/SharpVectorTest/CoverageExpansionTests.cs | 433 ++++++++++++++++++ 1 file changed, 433 insertions(+) create mode 100644 src/SharpVectorTest/CoverageExpansionTests.cs diff --git a/src/SharpVectorTest/CoverageExpansionTests.cs b/src/SharpVectorTest/CoverageExpansionTests.cs new file mode 100644 index 0000000..b81630d --- /dev/null +++ b/src/SharpVectorTest/CoverageExpansionTests.cs @@ -0,0 +1,433 @@ +namespace SharpVectorTest; + +using System.ComponentModel.DataAnnotations; +using System.IO.Compression; +using System.Text; +using Build5Nines.SharpVector; +using Build5Nines.SharpVector.Data; +using Build5Nines.SharpVector.Id; +using Build5Nines.SharpVector.VectorStore; +using Build5Nines.SharpVector.Vocabulary; + +[TestClass] +public class CoverageExpansionTests +{ + private static string CreateTempDir() + { + var dir = Path.Combine(Path.GetTempPath(), "SharpVectorTests", Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(dir); + return dir; + } + + private static MemoryStream CreateZipArchive(params (string name, string content)[] entries) + { + var stream = new MemoryStream(); + using (var archive = new ZipArchive(stream, ZipArchiveMode.Create, leaveOpen: true)) + { + foreach (var entry in entries) + { + var zipEntry = archive.CreateEntry(entry.name); + using var entryStream = zipEntry.Open(); + using var writer = new StreamWriter(entryStream, Encoding.UTF8, leaveOpen: true); + writer.Write(entry.content); + } + } + + stream.Position = 0; + return stream; + } + + [TestMethod] + public void DatabaseInfo_Constructors_AssignExpectedValues() + { + var empty = new DatabaseInfo(); + Assert.IsNull(empty.Schema); + Assert.IsNull(empty.Version); + Assert.IsNull(empty.ClassType); + + var classOnly = new DatabaseInfo("MyType"); + Assert.AreEqual("Build5Nines.SharpVector", classOnly.Schema); + Assert.AreEqual("1.0.0", classOnly.Version); + Assert.AreEqual("MyType", classOnly.ClassType); + + var custom = new DatabaseInfo("schema", "2.0", "CustomType"); + Assert.AreEqual("schema", custom.Schema); + Assert.AreEqual("2.0", custom.Version); + Assert.AreEqual("CustomType", custom.ClassType); + } + + [TestMethod] + public void DatabaseFileExceptions_PreserveMessagesAndState() + { + var inner = new InvalidOperationException("inner"); + + var baseDefault = new DatabaseFileException(); + Assert.IsNotNull(baseDefault); + + var baseWithMessage = new DatabaseFileException("base message"); + Assert.AreEqual("base message", baseWithMessage.Message); + + var baseWithInner = new DatabaseFileException("wrapped", inner); + Assert.AreEqual("wrapped", baseWithInner.Message); + Assert.AreSame(inner, baseWithInner.InnerException); + + Assert.AreEqual("info", new DatabaseFileInfoException("info").Message); + Assert.AreEqual("schema", new DatabaseFileSchemaException("schema").Message); + Assert.AreEqual("version", new DatabaseFileVersionException("version").Message); + Assert.AreEqual("class", new DatabaseFileClassTypeException("class").Message); + + var missing = new DatabaseFileMissingEntryException("missing", "vectorstore"); + Assert.AreEqual("missing", missing.Message); + Assert.AreEqual("vectorstore", missing.MissingEntry); + } + + [TestMethod] + public void VectorTextModelTypes_ExposeExpectedProperties() + { + var vector = new[] { 1f, 2f, 3f }; + var databaseItem = new VectorTextDatabaseItem(7, "hello", "meta", vector); + Assert.AreEqual(7, databaseItem.Id); + Assert.AreEqual("hello", databaseItem.Text); + Assert.AreEqual("meta", databaseItem.Metadata); + CollectionAssert.AreEqual(vector, databaseItem.Vector); + + var textItem = new VectorTextItem("doc", "m1", vector); + textItem.Text = "doc2"; + textItem.Metadata = "m2"; + textItem.Vector = new[] { 5f, 6f }; + Assert.AreEqual("doc2", textItem.Text); + Assert.AreEqual("m2", textItem.Metadata); + CollectionAssert.AreEqual(new[] { 5f, 6f }, textItem.Vector); + + var resultItem = new VectorTextResultItem(9, textItem, 0.42f); + Assert.AreEqual(9, resultItem.Id); + Assert.AreEqual("doc2", resultItem.Text); + Assert.AreEqual("m2", resultItem.Metadata); + Assert.AreEqual(0.42f, resultItem.Similarity); + CollectionAssert.AreEqual(new[] { 5f, 6f }, resultItem.Vectors.ToArray()); +#pragma warning disable CS0618 + Assert.AreEqual(0.42f, resultItem.VectorComparison); +#pragma warning restore CS0618 + } + + [TestMethod] + public void VectorTextResult_IsEmpty_TracksNullEmptyAndPopulatedCollections() + { + var nullTexts = new VectorTextResult(0, 0, 0, null!); + Assert.IsTrue(nullTexts.IsEmpty); + + var emptyTexts = new VectorTextResult(0, 0, 0, Array.Empty>()); + Assert.IsTrue(emptyTexts.IsEmpty); + Assert.AreEqual(0, emptyTexts.TotalCount); + Assert.AreEqual(0, emptyTexts.PageIndex); + Assert.AreEqual(0, emptyTexts.TotalPages); + + var item = new VectorTextResultItem(1, new VectorTextItem("alpha", "meta", new[] { 1f }), 0.9f); + var populated = new VectorTextResult(1, 2, 3, new[] { item }); + Assert.IsFalse(populated.IsEmpty); + Assert.AreEqual(1, populated.TotalCount); + Assert.AreEqual(2, populated.PageIndex); + Assert.AreEqual(3, populated.TotalPages); + } + + [TestMethod] + public void IdGenerators_GenerateExpectedIdentifiers() + { + var guidGenerator = new GuidIdGenerator(); + var firstGuid = guidGenerator.NewId(); + var secondGuid = guidGenerator.NewId(); + Assert.AreNotEqual(Guid.Empty, firstGuid); + Assert.AreNotEqual(firstGuid, secondGuid); + + var intGenerator = new IntIdGenerator(); + Assert.AreEqual(1, intGenerator.NewId()); + Assert.AreEqual(2, intGenerator.NewId()); + + var seededIntGenerator = new IntIdGenerator(10); + Assert.AreEqual(11, seededIntGenerator.NewId()); + + var numericGenerator = new NumericIdGenerator(100); + Assert.AreEqual(101L, numericGenerator.NewId()); + numericGenerator.SetMostRecent(500L); + Assert.AreEqual(501L, numericGenerator.NewId()); + } + + [TestMethod] + public async Task MemoryAndVocabularyStores_HandleNullJsonDeleteAndAsyncEnumeration() + { + var vectorStore = new MemoryDictionaryVectorStore(); + await vectorStore.SetAsync(1, new VectorTextItem("first", "m1", new[] { 1f, 2f })); + Assert.IsTrue(vectorStore.ContainsKey(1)); + Assert.AreEqual("first", vectorStore.Get(1).Text); + + var seen = new List(); + await foreach (var item in vectorStore) + { + seen.Add(item.Key); + } + CollectionAssert.AreEqual(new[] { 1 }, seen); + + var removed = vectorStore.Delete(1); + Assert.AreEqual("first", removed.Text); + Assert.IsFalse(vectorStore.ContainsKey(1)); + Assert.ThrowsException(() => vectorStore.Get(1)); + Assert.ThrowsException(() => vectorStore.Delete(1)); + await Assert.ThrowsExceptionAsync(() => vectorStore.SerializeToJsonStreamAsync(null!)); + + using var nullVectorStoreStream = new MemoryStream(Encoding.UTF8.GetBytes("null")); + await vectorStore.DeserializeFromJsonStreamAsync(nullVectorStoreStream); + Assert.AreEqual(0, vectorStore.Count); + + var vocabularyStore = new DictionaryVocabularyStore(); + vocabularyStore.Update(["alpha", "beta"]); + Assert.AreEqual(2, vocabularyStore.Count); + Assert.IsTrue(vocabularyStore.TryGetValue("alpha", out _)); + await Assert.ThrowsExceptionAsync(() => vocabularyStore.SerializeToJsonStreamAsync(null!)); + + using var nullVocabularyStream = new MemoryStream(Encoding.UTF8.GetBytes("null")); + await vocabularyStore.DeserializeFromJsonStreamAsync(nullVocabularyStream); + Assert.AreEqual(0, vocabularyStore.Count); + Assert.IsFalse(vocabularyStore.TryGetValue("alpha", out _)); + } + + [TestMethod] + public async Task TextDataLoader_CoversChunkingAndValidationBranches() + { + var loader = new InspectableTextDataLoader(new BasicMemoryVectorDatabase()); + + var sentences = loader.ExposeChunkText( + "One. Two? Three!", + new TextChunkingOptions + { + Method = TextChunkingMethod.Sentence, + RetrieveMetadata = chunk => chunk + }); + CollectionAssert.AreEqual(new[] { "One.", "Two?", "Three!" }, sentences); + + var fixedLength = loader.ExposeChunkText( + "Hello world from SharpVector tests", + new TextChunkingOptions + { + Method = TextChunkingMethod.FixedLength, + ChunkSize = 2, + RetrieveMetadata = chunk => chunk + }); + CollectionAssert.AreEqual(new[] { "hello world", "from sharpvector", "tests" }, fixedLength); + + var chineseAndEnglish = loader.ExposeChunkText( + "你好世界 alpha beta", + new TextChunkingOptions + { + Method = TextChunkingMethod.FixedLength, + ChunkSize = 6, + RetrieveMetadata = chunk => chunk + }); + CollectionAssert.AreEqual(new[] { "你好世界 alpha beta" }, chineseAndEnglish); + + Assert.ThrowsException(() => + loader.ExposeChunkText( + "one two three", + new TextChunkingOptions + { + Method = (TextChunkingMethod)999, + RetrieveMetadata = chunk => chunk + })); + + Assert.ThrowsException(() => + loader.ExposeChunkText( + "one two three four", + new TextChunkingOptions + { + Method = TextChunkingMethod.OverlappingWindow, + ChunkSize = 2, + OverlapSize = 2, + RetrieveMetadata = chunk => chunk + })); + + var syncLoader = new TextDataLoader(new BasicMemoryVectorDatabase()); + Assert.ThrowsException(() => + syncLoader.AddDocument( + "document", + new TextChunkingOptions + { + Method = TextChunkingMethod.Paragraph, + RetrieveMetadata = null! + })); + + await Assert.ThrowsExceptionAsync(() => + syncLoader.AddDocumentAsync( + "document", + new TextChunkingOptions + { + Method = TextChunkingMethod.Paragraph, + RetrieveMetadata = null! + })); + } + + [TestMethod] + public async Task DatabaseFile_HelperMethods_ValidateStreamsEntriesAndMetadata() + { + await Assert.ThrowsExceptionAsync(() => + DatabaseFile.SaveDatabaseToZipArchiveAsync(null!, new DatabaseInfo("Type"), _ => Task.CompletedTask)); + + using var nullInfoStream = new MemoryStream(Encoding.UTF8.GetBytes("null")); + await Assert.ThrowsExceptionAsync(() => DatabaseFile.LoadDatabaseInfoFromJsonAsync(nullInfoStream)); + + await Assert.ThrowsExceptionAsync(() => + DatabaseFile.LoadDatabaseFromZipArchiveAsync(null!, "Type", _ => Task.CompletedTask)); + + using (var missingDatabaseArchive = CreateZipArchive()) + using (var archive = new ZipArchive(missingDatabaseArchive, ZipArchiveMode.Read, leaveOpen: true)) + { + await Assert.ThrowsExceptionAsync(() => DatabaseFile.LoadDatabaseInfoAsync(archive)); + } + + using (var missingVectorArchiveStream = CreateZipArchive(("database.json", "{}"))) + using (var archive = new ZipArchive(missingVectorArchiveStream, ZipArchiveMode.Read, leaveOpen: true)) + { + var vectorStore = new MemoryDictionaryVectorStore(); + var ex = await Assert.ThrowsExceptionAsync(() => DatabaseFile.LoadVectorStoreAsync(archive, vectorStore)); + Assert.AreEqual("vectorstore", ex.MissingEntry); + } + + using (var missingVocabularyArchiveStream = CreateZipArchive(("database.json", "{}"))) + using (var archive = new ZipArchive(missingVocabularyArchiveStream, ZipArchiveMode.Read, leaveOpen: true)) + { + var vocabularyStore = new DictionaryVocabularyStore(); + var ex = await Assert.ThrowsExceptionAsync(() => DatabaseFile.LoadVocabularyStoreAsync(archive, vocabularyStore)); + Assert.AreEqual("vocabularystore", ex.MissingEntry); + } + + using var schemaStream = CreateZipArchive(("database.json", "{\"Schema\":\"Wrong\",\"Version\":\"1.0.0\",\"ClassType\":\"Type\"}")); + await Assert.ThrowsExceptionAsync(() => + DatabaseFile.LoadDatabaseFromZipArchiveAsync(schemaStream, "Type", _ => Task.CompletedTask)); + + using var versionStream = CreateZipArchive(("database.json", "{\"Schema\":\"Build5Nines.SharpVector\",\"Version\":\"2.0.0\",\"ClassType\":\"Type\"}")); + await Assert.ThrowsExceptionAsync(() => + DatabaseFile.LoadDatabaseFromZipArchiveAsync(versionStream, "Type", _ => Task.CompletedTask)); + + using var classTypeStream = CreateZipArchive(("database.json", "{\"Schema\":\"Build5Nines.SharpVector\",\"Version\":\"1.0.0\",\"ClassType\":\"OtherType\"}")); + await Assert.ThrowsExceptionAsync(() => + DatabaseFile.LoadDatabaseFromZipArchiveAsync(classTypeStream, "Type", _ => Task.CompletedTask)); + } + + [TestMethod] + public async Task VectorDatabaseBase_BranchesCoverNullEmptyAndEmptyDatabasePaths() + { + var bowDatabase = new BasicMemoryVectorDatabase(); + await Assert.ThrowsExceptionAsync(() => bowDatabase.AddTextsAsync(null!)); + Assert.AreEqual(0, (await bowDatabase.AddTextsAsync(Array.Empty<(string text, string? metadata)>())).Count); + await Assert.ThrowsExceptionAsync(() => bowDatabase.SearchAsync("query")); + + var embeddingsDatabase = new EmbeddingGeneratorMemoryVectorDatabase(); + await Assert.ThrowsExceptionAsync(() => embeddingsDatabase.AddTextsAsync(null!)); + Assert.AreEqual(0, (await embeddingsDatabase.AddTextsAsync(Array.Empty<(string text, string? metadata)>())).Count); + await Assert.ThrowsExceptionAsync(() => embeddingsDatabase.SearchAsync("query")); + } + + [TestMethod] + public void MemoryVectorDatabase_ObsoleteSyncSerializationMethods_RoundTripEmptyAndNonEmptyDatabases() + { +#pragma warning disable CS0618 + var populated = new MemoryVectorDatabase(); + populated.AddText("hello world", "m1"); + using var populatedStream = new MemoryStream(); + populated.SerializeToJsonStream(populatedStream); + populatedStream.Position = 0; + + var reloaded = new MemoryVectorDatabase(); + reloaded.DeserializeFromJsonStream(populatedStream); + Assert.AreEqual("hello world", reloaded.GetText(1).Text); + Assert.AreEqual("m1", reloaded.GetText(1).Metadata); + + var empty = new MemoryVectorDatabase(); + using var emptyStream = new MemoryStream(); + empty.SerializeToJsonStream(emptyStream); + emptyStream.Position = 0; + + var reloadedEmpty = new MemoryVectorDatabase(); + reloadedEmpty.DeserializeFromJsonStream(emptyStream); + var id = reloadedEmpty.AddText("new item", "m2"); + Assert.AreEqual(1, id); +#pragma warning restore CS0618 + } + + [TestMethod] + public async Task MemoryVectorDatabase_ObsoleteAsyncSerializationMethods_RoundTripDatabase() + { +#pragma warning disable CS0618 + var database = new MemoryVectorDatabase(); + await database.AddTextAsync("alpha", "m1"); + using var stream = new MemoryStream(); + await database.SerializeToJsonStreamAsync(stream); + stream.Position = 0; + + var reloaded = new MemoryVectorDatabase(); + await reloaded.DeserializeFromJsonStreamAsync(stream); + Assert.AreEqual("alpha", reloaded.GetText(1).Text); + Assert.AreEqual("m1", reloaded.GetText(1).Metadata); +#pragma warning restore CS0618 + } + + [TestMethod] + public async Task EmbeddingVectorDatabase_ObsoleteSerializationMethods_RoundTripDatabase() + { +#pragma warning disable CS0618 + var database = new EmbeddingGeneratorMemoryVectorDatabase(); + await database.AddTextAsync("alpha", "m1"); + + using var asyncStream = new MemoryStream(); + await database.SerializeToJsonStreamAsync(asyncStream); + asyncStream.Position = 0; + var asyncReloaded = new EmbeddingGeneratorMemoryVectorDatabase(); + await asyncReloaded.DeserializeFromJsonStreamAsync(asyncStream); + Assert.AreEqual("alpha", asyncReloaded.GetText(1).Text); + + using var syncStream = new MemoryStream(); + database.SerializeToJsonStream(syncStream); + syncStream.Position = 0; + var syncReloaded = new EmbeddingGeneratorMemoryVectorDatabase(); + syncReloaded.DeserializeFromJsonStream(syncStream); + Assert.AreEqual("alpha", syncReloaded.GetText(1).Text); +#pragma warning restore CS0618 + } + + [TestMethod] + public async Task BasicDiskVectorDatabase_ObsoleteDeserializeMethods_RoundTripEmptyDatabase() + { +#pragma warning disable CS0618 + var source = new BasicDiskVectorDatabase(CreateTempDir()); + + using var asyncStream = new MemoryStream(); + await source.SerializeToJsonStreamAsync(asyncStream); + asyncStream.Position = 0; + + var asyncReloaded = new BasicDiskVectorDatabase(CreateTempDir()); + await asyncReloaded.DeserializeFromJsonStreamAsync(asyncStream); + var asyncId = await asyncReloaded.AddTextAsync("alpha", "m1"); + Assert.AreEqual(1, asyncId); + + using var syncStream = new MemoryStream(); + source.SerializeToJsonStream(syncStream); + syncStream.Position = 0; + + var syncReloaded = new BasicDiskVectorDatabase(CreateTempDir()); + syncReloaded.DeserializeFromJsonStream(syncStream); + var syncId = syncReloaded.AddText("beta", "m2"); + Assert.AreEqual(1, syncId); +#pragma warning restore CS0618 + } + + private sealed class InspectableTextDataLoader : TextDataLoader + { + public InspectableTextDataLoader(IVectorDatabase vectorDatabase) + : base(vectorDatabase) + { + } + + public List ExposeChunkText(string text, TextChunkingOptions chunkingOptions) + { + return base.ChunkText(text, chunkingOptions); + } + } +} From f22fcac766bea6775ee29b4b406d5a92afc541f8 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Sat, 30 May 2026 09:37:49 -0400 Subject: [PATCH 10/11] more bug fixing unit tests --- .../VectorStore/BasicDiskVectorStore.cs | 61 +++++++++++- .../Vocabulary/BasicDiskVocabularyStore.cs | 1 + .../DiskStoreRegressionTests.cs | 95 +++++++++++++++++++ 3 files changed, 152 insertions(+), 5 deletions(-) create mode 100644 src/SharpVectorTest/DiskStoreRegressionTests.cs diff --git a/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs b/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs index d8b4986..c47fcf7 100644 --- a/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs +++ b/src/Build5Nines.SharpVector/VectorStore/BasicDiskVectorStore.cs @@ -125,18 +125,69 @@ public IVectorTextItem Delete(TId id) public async Task SerializeToJsonStreamAsync(Stream stream) { - await JsonSerializer.SerializeAsync(stream, _index); + _rwLock.EnterWriteLock(); + try + { + using var itemsFs = new FileStream(_itemsPath, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.Read); + while (_pending.TryDequeue(out var op)) + { + if (op.isDelete) + { + _index.TryRemove(op.id, out _); + } + else if (op.item is not null) + { + itemsFs.Seek(0, SeekOrigin.End); + var offset = itemsFs.Position; + WriteItem(itemsFs, op.item); + _index[op.id] = offset; + } + } + itemsFs.Flush(true); + PersistIndex(); + File.WriteAllBytes(_walPath, Array.Empty()); + + var items = _visibleIds.Keys + .Select(id => new KeyValuePair>(id, (VectorTextItem)Get(id))) + .ToDictionary(kv => kv.Key, kv => kv.Value); + + await JsonSerializer.SerializeAsync(stream, items); + } + finally + { + _rwLock.ExitWriteLock(); + } } public async Task DeserializeFromJsonStreamAsync(Stream stream) { - var loaded = await JsonSerializer.DeserializeAsync>(stream); + var loaded = await JsonSerializer.DeserializeAsync>>(stream); if (loaded != null) { - foreach (var kv in loaded) + _rwLock.EnterWriteLock(); + try { - _index[kv.Key] = kv.Value; - _visibleIds[kv.Key] = 0; + _index.Clear(); + _visibleIds.Clear(); + _cache.Clear(); + + using var itemsFs = new FileStream(_itemsPath, FileMode.Create, FileAccess.ReadWrite, FileShare.Read); + foreach (var kv in loaded) + { + itemsFs.Seek(0, SeekOrigin.End); + var offset = itemsFs.Position; + WriteItem(itemsFs, kv.Value); + _index[kv.Key] = offset; + _visibleIds[kv.Key] = 0; + _cache[kv.Key] = kv.Value; + } + itemsFs.Flush(true); + PersistIndex(); + File.WriteAllBytes(_walPath, Array.Empty()); + } + finally + { + _rwLock.ExitWriteLock(); } } } diff --git a/src/Build5Nines.SharpVector/Vocabulary/BasicDiskVocabularyStore.cs b/src/Build5Nines.SharpVector/Vocabulary/BasicDiskVocabularyStore.cs index 2b1c6cd..a1e0bd1 100644 --- a/src/Build5Nines.SharpVector/Vocabulary/BasicDiskVocabularyStore.cs +++ b/src/Build5Nines.SharpVector/Vocabulary/BasicDiskVocabularyStore.cs @@ -76,6 +76,7 @@ public async Task DeserializeFromJsonStreamAsync(Stream stream) if (loaded != null) { _vocab = loaded; + _cache = new ConcurrentDictionary(loaded); } } diff --git a/src/SharpVectorTest/DiskStoreRegressionTests.cs b/src/SharpVectorTest/DiskStoreRegressionTests.cs new file mode 100644 index 0000000..e3eb940 --- /dev/null +++ b/src/SharpVectorTest/DiskStoreRegressionTests.cs @@ -0,0 +1,95 @@ +namespace SharpVectorTest; + +using System.Text; +using Build5Nines.SharpVector; +using Build5Nines.SharpVector.VectorStore; +using Build5Nines.SharpVector.Vocabulary; + +[TestClass] +public class DiskStoreRegressionTests +{ + private static string CreateTempDir() + { + var dir = Path.Combine(Path.GetTempPath(), "SharpVectorTests", Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(dir); + return dir; + } + + [TestMethod] + public async Task BasicDiskVocabularyStore_DeserializeFromJsonStreamAsync_RestoresReadableCache() + { + var root = CreateTempDir(); + var vocabularyStore = new BasicDiskVocabularyStore(root); + vocabularyStore.Update(["alpha", "beta"]); + vocabularyStore.Dispose(); + + using var stream = new MemoryStream(); + await vocabularyStore.SerializeToJsonStreamAsync(stream); + stream.Position = 0; + + var reloaded = new BasicDiskVocabularyStore(CreateTempDir()); + await reloaded.DeserializeFromJsonStreamAsync(stream); + + Assert.AreEqual(2, reloaded.Count, "Deserialization should restore in-memory lookup state."); + Assert.IsTrue(reloaded.TryGetValue("alpha", out var alphaIndex)); + Assert.IsTrue(reloaded.TryGetValue("beta", out var betaIndex)); + Assert.AreEqual(0, alphaIndex); + Assert.AreEqual(1, betaIndex); + } + + [TestMethod] + public async Task BasicDiskVectorStore_SerializeToJsonStreamAsync_PersistsActualItemsForDeserialization() + { + var sourceRoot = CreateTempDir(); + using var sourceVocabulary = new BasicDiskVocabularyStore(sourceRoot); + using var sourceStore = new BasicDiskVectorStore(sourceRoot, sourceVocabulary); + sourceStore.Set(1, new VectorTextItem("alpha text", "m1", new[] { 1f, 2f })); + await sourceStore.SetAsync(2, new VectorTextItem("beta text", "m2", new[] { 3f, 4f })); + + using var stream = new MemoryStream(); + await sourceStore.SerializeToJsonStreamAsync(stream); + stream.Position = 0; + var json = Encoding.UTF8.GetString(stream.ToArray()); + StringAssert.Contains(json, "alpha text"); + StringAssert.Contains(json, "beta text"); + + var targetRoot = CreateTempDir(); + using var targetVocabulary = new BasicDiskVocabularyStore(targetRoot); + using var targetStore = new BasicDiskVectorStore(targetRoot, targetVocabulary); + stream.Position = 0; + await targetStore.DeserializeFromJsonStreamAsync(stream); + + Assert.AreEqual(2, targetStore.Count); + Assert.AreEqual("alpha text", targetStore.Get(1).Text); + Assert.AreEqual("beta text", targetStore.Get(2).Text); + CollectionAssert.AreEqual(new[] { 1f, 2f }, targetStore.Get(1).Vector); + CollectionAssert.AreEqual(new[] { 3f, 4f }, targetStore.Get(2).Vector); + } + + [TestMethod] + public async Task BasicDiskVectorStore_EnumerationAndDelete_WorkAcrossSyncAndAsyncPaths() + { + var root = CreateTempDir(); + using var vocabulary = new BasicDiskVocabularyStore(root); + using var store = new BasicDiskVectorStore(root, vocabulary); + store.Set(1, new VectorTextItem("alpha", "m1", new[] { 1f })); + store.Set(2, new VectorTextItem("beta", "m2", new[] { 2f })); + + var syncIds = store.Select(x => x.Key).OrderBy(x => x).ToArray(); + CollectionAssert.AreEqual(new[] { 1, 2 }, syncIds); + + var asyncIds = new List(); + await foreach (var item in store) + { + asyncIds.Add(item.Key); + } + asyncIds.Sort(); + CollectionAssert.AreEqual(new[] { 1, 2 }, asyncIds); + + Assert.IsTrue(store.ContainsKey(1)); + var removed = store.Delete(1); + Assert.AreEqual("alpha", removed.Text); + Assert.IsFalse(store.ContainsKey(1)); + Assert.ThrowsException(() => store.Get(1)); + } +} From 3810965c5dacb7ce812f4beb02806d91f38b5a94 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Sat, 30 May 2026 09:42:08 -0400 Subject: [PATCH 11/11] Update CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 57ff930..5b4b85f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ Add: - Added `src/test.sh` helper script to run the solution tests with XPlat code coverage and generate HTML, Markdown, and text coverage reports. - Added regression tests in `BugDiscoveryTests` and `DiskVectorDatabaseTests` to cover paging metadata, empty database stream round-tripping, disk persistence reload behavior, immediate visibility of disk-backed writes, and immediate reopen-after-delete persistence. +- Added `CoverageExpansionTests` to extend coverage across model types, exception hierarchy, ID generators, `VectorTextResult`, `TextDataLoader`, `DatabaseFile`, memory store branches, and obsolete serialization wrappers. +- Added `DiskStoreRegressionTests` to validate disk-backed vocabulary and vector store serialization, deserialization, enumeration, and persistence behavior. Fixed: @@ -19,6 +21,9 @@ Fixed: - Fixed `BasicDiskVectorStore` read-after-write behavior so added items are immediately visible to `Count`, `GetIds()`, `ContainsKey()`, enumeration, and search before the background disk flush completes. - Fixed `BasicDiskVectorStore` delete persistence to avoid WAL truncation races when a database is reopened immediately after delete operations. - Fixed reopened disk-backed databases to correctly expose persisted IDs and search results after checkpoint recovery. +- Fixed `BasicDiskVocabularyStore.DeserializeFromJsonStreamAsync` so deserialized vocabularies restore the in-memory lookup cache as well as the persisted vocabulary map. +- Fixed `BasicDiskVectorStore.SerializeToJsonStreamAsync` to flush pending operations before serialization and persist actual vector items rather than only index offsets. +- Fixed `BasicDiskVectorStore.DeserializeFromJsonStreamAsync` to rebuild item storage, index, visible IDs, and cache from serialized vector items. - Updated the package version to `2.2.1` and refreshed the copyright year range to `2024-2026`. Notes: