diff --git a/Tests/ConnectionTest/Client/Client.csproj b/Tests/ConnectionTest/Client/Client.csproj
index f18c94b..fd7fff8 100644
--- a/Tests/ConnectionTest/Client/Client.csproj
+++ b/Tests/ConnectionTest/Client/Client.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
enable
enable
diff --git a/Tests/ConnectionTest/Server/Server.csproj b/Tests/ConnectionTest/Server/Server.csproj
index 9c71c66..5470b3f 100644
--- a/Tests/ConnectionTest/Server/Server.csproj
+++ b/Tests/ConnectionTest/Server/Server.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
AnyCPU;x64;x86
diff --git a/Tests/FileTransferTest/FileTransferClient/FileTransferClient.csproj b/Tests/FileTransferTest/FileTransferClient/FileTransferClient.csproj
index 51f959c..e1b7800 100644
--- a/Tests/FileTransferTest/FileTransferClient/FileTransferClient.csproj
+++ b/Tests/FileTransferTest/FileTransferClient/FileTransferClient.csproj
@@ -2,7 +2,7 @@
WinExe
- net6.0-windows
+ net8.0-windows7.0
enable
true
AnyCPU;x64
diff --git a/Tests/FileTransferTest/FileTransferServer/FileTransferServer.csproj b/Tests/FileTransferTest/FileTransferServer/FileTransferServer.csproj
index 51f959c..e1b7800 100644
--- a/Tests/FileTransferTest/FileTransferServer/FileTransferServer.csproj
+++ b/Tests/FileTransferTest/FileTransferServer/FileTransferServer.csproj
@@ -2,7 +2,7 @@
WinExe
- net6.0-windows
+ net8.0-windows7.0
enable
true
AnyCPU;x64
diff --git a/Tests/HNSWDemo/HNSWDemo.csproj b/Tests/HNSWDemo/HNSWDemo.csproj
deleted file mode 100644
index 46fe152..0000000
--- a/Tests/HNSWDemo/HNSWDemo.csproj
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-
- Exe
- net6.0
- AnyCPU;x64
-
-
-
-
-
-
-
-
-
-
-
-
- Always
-
-
-
-
diff --git a/Tests/HNSWDemo/Model/Gender.cs b/Tests/HNSWDemo/Model/Gender.cs
deleted file mode 100644
index 518e5a3..0000000
--- a/Tests/HNSWDemo/Model/Gender.cs
+++ /dev/null
@@ -1,7 +0,0 @@
-namespace HNSWDemo.Model
-{
- public enum Gender
- {
- Unknown, Male, Feemale
- }
-}
diff --git a/Tests/HNSWDemo/Model/Person.cs b/Tests/HNSWDemo/Model/Person.cs
deleted file mode 100644
index bcd031c..0000000
--- a/Tests/HNSWDemo/Model/Person.cs
+++ /dev/null
@@ -1,51 +0,0 @@
-using System;
-using System.Collections.Generic;
-using ZeroLevel.HNSW;
-
-namespace HNSWDemo.Model
-{
- public class Person
- {
- public Gender Gender { get; set; }
- public int Age { get; set; }
- public long Number { get; set; }
-
- private static (float[], Person) Generate(int vector_size)
- {
- var rnd = new Random((int)Environment.TickCount);
- var vector = new float[vector_size];
- DefaultRandomGenerator.Instance.NextFloats(vector);
- VectorUtils.NormalizeSIMD(vector);
- var p = new Person();
- p.Age = rnd.Next(15, 80);
- var gr = rnd.Next(0, 3);
- p.Gender = (gr == 0) ? Gender.Male : (gr == 1) ? Gender.Feemale : Gender.Unknown;
- p.Number = CreateNumber(rnd);
- return (vector, p);
- }
-
- public static List<(float[], Person)> GenerateRandom(int vectorSize, int vectorsCount)
- {
- var vectors = new List<(float[], Person)>();
- for (int i = 0; i < vectorsCount; i++)
- {
- vectors.Add(Generate(vectorSize));
- }
- return vectors;
- }
-
- static HashSet _exists = new HashSet();
- private static long CreateNumber(Random rnd)
- {
- long start_number;
- do
- {
- start_number = 79600000000L;
- start_number = start_number + rnd.Next(4, 8) * 10000000;
- start_number += rnd.Next(0, 1000000);
- }
- while (_exists.Add(start_number) == false);
- return start_number;
- }
- }
-}
diff --git a/Tests/HNSWDemo/Program.cs b/Tests/HNSWDemo/Program.cs
deleted file mode 100644
index 6ede5ff..0000000
--- a/Tests/HNSWDemo/Program.cs
+++ /dev/null
@@ -1,35 +0,0 @@
-using HNSWDemo.Tests;
-using System;
-using System.IO;
-using ZeroLevel.HNSW;
-
-namespace HNSWDemo
-{
- class Program
- {
- static void Main(string[] args)
- {
- //new QuantizatorTest().Run();
- //new AutoClusteringMNISTTest().Run();
- new AccuracityTest().Run();
- Console.WriteLine("Completed");
- Console.ReadKey();
- }
-
- static int GetC(string file)
- {
- var name = Path.GetFileNameWithoutExtension(file);
- var index = name.IndexOf("_M");
- if (index > 0)
- {
- index = name.IndexOf("_", index + 2);
- if (index > 0)
- {
- var num = name.Substring(index + 1, name.Length - index - 1);
- return int.Parse(num);
- }
- }
- return -1;
- }
- }
-}
diff --git a/Tests/HNSWDemo/Properties/launchSettings.json b/Tests/HNSWDemo/Properties/launchSettings.json
deleted file mode 100644
index 0246ed6..0000000
--- a/Tests/HNSWDemo/Properties/launchSettings.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
- "profiles": {
- "HNSWDemo": {
- "commandName": "Project",
- "hotReloadEnabled": false,
- "nativeDebugging": false
- }
- }
-}
\ No newline at end of file
diff --git a/Tests/HNSWDemo/Tests/AccuracityTest.cs b/Tests/HNSWDemo/Tests/AccuracityTest.cs
deleted file mode 100644
index e04ec64..0000000
--- a/Tests/HNSWDemo/Tests/AccuracityTest.cs
+++ /dev/null
@@ -1,76 +0,0 @@
-using HNSWDemo.Utils;
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.Linq;
-using ZeroLevel.HNSW;
-using ZeroLevel.Services.Mathemathics;
-
-namespace HNSWDemo.Tests
-{
- public class AccuracityTest
- : ITest
- {
- private static int K = 200;
- private static int count = 10000;
- private static int testCount = 500;
- private static int dimensionality = 128;
-
- public void Run()
- {
- var totalHits = new List();
- var timewatchesNP = new List();
- var timewatchesHNSW = new List();
-
- var samples = VectorUtils.RandomVectors(dimensionality, count);
-
- var sw = new Stopwatch();
-
- var test = new VectorsDirectCompare(samples, Metrics.CosineDistance);
- var world = new SmallWorld(NSWOptions.Create(8, 16, 100, 100, (a, b) => (float)Metrics.DotProductDistance(a, b)));
-
- sw.Start();
- var ids = world.AddItems(samples.ToArray());
- sw.Stop();
-
- Console.WriteLine($"Insert {ids.Length} items: {sw.ElapsedMilliseconds} ms");
- Console.WriteLine("Start test");
-
- var test_vectors = VectorUtils.RandomVectors(dimensionality, testCount);
- foreach (var v in test_vectors)
- {
- sw.Restart();
- var gt = test.KNearest(v, K).ToDictionary(p => p.Item1, p => p.Item2);
- sw.Stop();
- timewatchesNP.Add(sw.ElapsedMilliseconds);
-
- sw.Restart();
- var result = world.Search(v, K);
- sw.Stop();
-
- timewatchesHNSW.Add(sw.ElapsedMilliseconds);
- var hits = 0;
- foreach (var r in result)
- {
- if (gt.ContainsKey(r.Item1))
- {
- hits++;
- }
- }
- totalHits.Add(hits);
- }
-
- Console.WriteLine($"MIN Accuracity: {totalHits.Min() * 100 / K}%");
- Console.WriteLine($"AVG Accuracity: {totalHits.Average() * 100 / K}%");
- Console.WriteLine($"MAX Accuracity: {totalHits.Max() * 100 / K}%");
-
- Console.WriteLine($"MIN HNSW TIME: {timewatchesHNSW.Min()} ms");
- Console.WriteLine($"AVG HNSW TIME: {timewatchesHNSW.Average()} ms");
- Console.WriteLine($"MAX HNSW TIME: {timewatchesHNSW.Max()} ms");
-
- Console.WriteLine($"MIN NP TIME: {timewatchesNP.Min()} ms");
- Console.WriteLine($"AVG NP TIME: {timewatchesNP.Average()} ms");
- Console.WriteLine($"MAX NP TIME: {timewatchesNP.Max()} ms");
- }
- }
-}
diff --git a/Tests/HNSWDemo/Tests/AutoClusteringMNISTTest.cs b/Tests/HNSWDemo/Tests/AutoClusteringMNISTTest.cs
deleted file mode 100644
index f8b470a..0000000
--- a/Tests/HNSWDemo/Tests/AutoClusteringMNISTTest.cs
+++ /dev/null
@@ -1,160 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Drawing;
-using System.Drawing.Imaging;
-using System.IO;
-using System.Linq;
-using System.Runtime.InteropServices;
-using ZeroLevel.HNSW;
-using ZeroLevel.HNSW.Services;
-using ZeroLevel.Services.FileSystem;
-using ZeroLevel.Services.Mathemathics;
-
-namespace HNSWDemo.Tests
-{
- public class AutoClusteringMNISTTest
- : ITest
- {
- private static int Width = 3000;
- private static int Height = 3000;
-
- private static byte[] PadLines(byte[] bytes, int rows, int columns)
- {
- int currentStride = columns; // 3
- int newStride = columns; // 4
- byte[] newBytes = new byte[newStride * rows];
- for (int i = 0; i < rows; i++)
- Buffer.BlockCopy(bytes, currentStride * i, newBytes, newStride * i, currentStride);
- return newBytes;
- }
-
- public void Run()
- {
- var folder = @"D:\Mnist";
- int columns = 28;
- int rows = 28;
- int imageCount, rowCount, colCount;
- var buf = new byte[4];
- var image = new byte[rows * columns];
- var vectors = new List();
- using (var fs = new FileStream("t10k-images.idx3-ubyte", FileMode.Open, FileAccess.Read, FileShare.None))
- {
- // first 4 bytes is a magic number
- fs.Read(buf, 0, 4);
- // second 4 bytes is the number of images
- fs.Read(buf, 0, 4);
- imageCount = BitConverter.ToInt32(buf.Reverse().ToArray(), 0);
- // third 4 bytes is the row count
- fs.Read(buf, 0, 4);
- rowCount = BitConverter.ToInt32(buf.Reverse().ToArray(), 0);
- // fourth 4 bytes is the column count
- fs.Read(buf, 0, 4);
- colCount = BitConverter.ToInt32(buf.Reverse().ToArray(), 0);
-
- for (int i = 0; i < imageCount; i++)
- {
- fs.Read(image, 0, image.Length);
- var v = new byte[image.Length];
- Array.Copy(image, v, image.Length);
- vectors.Add(v);
- }
- }
- var options = NSWOptions.Create(8, 16, 200, 200, Metrics.L2EuclideanDistance);
- SmallWorld world;
- if (File.Exists("graph_mnist.bin"))
- {
- using (var fs = new FileStream("graph_mnist.bin", FileMode.Open, FileAccess.Read, FileShare.None))
- {
- world = SmallWorld.CreateWorldFrom(options, fs);
- }
- }
- else
- {
- world = SmallWorld.CreateWorld(options);
- world.AddItems(vectors);
- using (var fs = new FileStream("graph_mnist.bin", FileMode.Create, FileAccess.Write, FileShare.None))
- {
- world.Serialize(fs);
- }
- }
-
- var distance = new Func((id1, id2) => Metrics.L2EuclideanDistance(world.GetVector(id1), world.GetVector(id2)));
- var links = world.GetLinks().SelectMany(pair => pair.Value.Select(p=> distance(pair.Key, p))).ToList();
- var exists = links.Where(n => n > 0).ToArray();
-
- var histogram = new Histogram(HistogramMode.LOG, links);
- DrawHistogram(histogram, @"D:\histogram.jpg");
-
- var clusters = AutomaticGraphClusterer.DetectClusters(world);
- Console.WriteLine($"Found {clusters.Count} clusters");
-
- while (clusters.Count > 10)
- {
- var last = clusters[clusters.Count - 1];
- var testDistance = clusters[0].MinDistance(distance, last);
- var index = 0;
- for (int i = 1; i < clusters.Count - 1; i++)
- {
- var d = clusters[i].MinDistance(distance, last);
- if (d < testDistance)
- {
- testDistance = d;
- index = i;
- }
- }
- clusters[index].Merge(last);
- clusters.RemoveAt(clusters.Count - 1);
- }
-
- for (int i = 0; i < clusters.Count; i++)
- {
- var ouput = Path.Combine(folder, i.ToString("D3"));
- FSUtils.CleanAndTestFolder(ouput);
- foreach (var v in clusters[i])
- {
- int stride = columns;
- byte[] newbytes = PadLines(world.GetVector(v), rows, columns);
- using (var im = new Bitmap(columns, rows, stride, PixelFormat.Format8bppIndexed, Marshal.UnsafeAddrOfPinnedArrayElement(newbytes, 0)))
- {
- im.Save(Path.Combine(ouput, $"{v}.bmp"));
- }
- }
- Console.WriteLine($"Cluster {i + 1} countains {clusters[i].Count} items");
- }
- }
-
- static void DrawHistogram(Histogram histogram, string filename)
- {
- var wb = Width / histogram.Values.Length;
- var k = ((float)Height) / (float)histogram.Values.Max();
-
- var maxes = histogram.GetMaximums().ToDictionary(m => m.Index, m => m);
- int threshold = histogram.CuttOff();
-
- using (var bmp = new Bitmap(Width, Height))
- {
- using (var g = Graphics.FromImage(bmp))
- {
- for (int i = 0; i < histogram.Values.Length; i++)
- {
- var height = (int)(histogram.Values[i] * k);
- if (maxes.ContainsKey(i))
- {
- g.DrawRectangle(Pens.Red, i * wb, bmp.Height - height, wb, height);
- g.DrawRectangle(Pens.Red, i * wb + 1, bmp.Height - height, wb - 1, height);
- }
- else
- {
- g.DrawRectangle(Pens.Blue, i * wb, bmp.Height - height, wb, height);
- }
- if (i == threshold)
- {
- g.DrawLine(Pens.Green, i * wb + wb / 2, 0, i * wb + wb / 2, bmp.Height);
- }
- }
- }
- bmp.Save(filename);
- }
- }
- }
-}
diff --git a/Tests/HNSWDemo/Tests/AutoClusteringTest.cs b/Tests/HNSWDemo/Tests/AutoClusteringTest.cs
deleted file mode 100644
index 2e77c21..0000000
--- a/Tests/HNSWDemo/Tests/AutoClusteringTest.cs
+++ /dev/null
@@ -1,27 +0,0 @@
-using System;
-using ZeroLevel.HNSW;
-using ZeroLevel.HNSW.Services;
-using ZeroLevel.Services.Mathemathics;
-
-namespace HNSWDemo.Tests
-{
- public class AutoClusteringTest
- : ITest
- {
- private static int Count = 3000;
- private static int Dimensionality = 128;
-
- public void Run()
- {
- var vectors = VectorUtils.RandomVectors(Dimensionality, Count);
- var world = SmallWorld.CreateWorld(NSWOptions.Create(8, 16, 200, 200, Metrics.L2EuclideanDistance));
- world.AddItems(vectors);
- var clusters = AutomaticGraphClusterer.DetectClusters(world);
- Console.WriteLine($"Found {clusters.Count} clusters");
- for (int i = 0; i < clusters.Count; i++)
- {
- Console.WriteLine($"Cluster {i + 1} countains {clusters[i].Count} items");
- }
- }
- }
-}
diff --git a/Tests/HNSWDemo/Tests/FilterTest.cs b/Tests/HNSWDemo/Tests/FilterTest.cs
deleted file mode 100644
index d69cd8f..0000000
--- a/Tests/HNSWDemo/Tests/FilterTest.cs
+++ /dev/null
@@ -1,57 +0,0 @@
-using HNSWDemo.Model;
-using System;
-using System.Linq;
-using ZeroLevel.HNSW;
-
-namespace HNSWDemo.Tests
-{
- public class FilterTest
- : ITest
- {
- private const int count = 3000;
- private const int testCount = 100;
- private const int dimensionality = 128;
-
- public void Run()
- {
- var map = new HNSWMap();
- var samples = Person.GenerateRandom(dimensionality, count);
- var testDict = samples.ToDictionary(s => s.Item2.Number, s => s.Item2);
- var world = new SmallWorld(NSWOptions.Create(6, 15, 200, 200, CosineDistance.ForUnits));
- var ids = world.AddItems(samples.Select(i => i.Item1).ToArray());
- for (int bi = 0; bi < samples.Count; bi++)
- {
- map.Append(samples[bi].Item2.Number, ids[bi]);
- }
- Console.WriteLine("Start test");
- int K = 200;
- var vectors = VectorUtils.RandomVectors(dimensionality, testCount);
-
- var context = new SearchContext()
- .SetActiveNodes(map
- .ConvertFeaturesToIds(samples
- .Where(p => p.Item2.Age > 20 && p.Item2.Age < 50 && p.Item2.Gender == Gender.Feemale)
- .Select(p => p.Item2.Number)));
- var hits = 0;
- var miss = 0;
- foreach (var v in vectors)
- {
- var numbers = map.ConvertIdsToFeatures(world.Search(v, K, context).Select(r => r.Item1));
- foreach (var r in numbers)
- {
- var record = testDict[r];
- if (context.NodeCheckMode == Mode.None || (record.Gender == Gender.Feemale && record.Age > 20 && record.Age < 50))
- {
- hits++;
- }
- else
- {
- miss++;
- }
- }
- }
- Console.WriteLine($"SUCCESS: {hits}");
- Console.WriteLine($"ERROR: {miss}");
- }
- }
-}
diff --git a/Tests/HNSWDemo/Tests/HistogramTest.cs b/Tests/HNSWDemo/Tests/HistogramTest.cs
deleted file mode 100644
index e13375d..0000000
--- a/Tests/HNSWDemo/Tests/HistogramTest.cs
+++ /dev/null
@@ -1,82 +0,0 @@
-using System;
-using System.Drawing;
-using System.IO;
-using System.Linq;
-using ZeroLevel.HNSW;
-using ZeroLevel.Services.Mathemathics;
-
-namespace HNSWDemo.Tests
-{
- public class HistogramTest
- : ITest
- {
- private static int Count = 3000;
- private static int Dimensionality = 128;
- private static int Width = 2440;
- private static int Height = 1920;
-
- public void Run()
- {
- Create(Dimensionality, @"D:\hist");
- // Process.Start("explorer", $"D:\\hist{Dimensionality.ToString("D3")}.jpg");
-
- /* for (int i = 12; i < 512; i++)
- {
- Create(i, @"D:\hist");
- }*/
- }
-
- private void Create(int dim, string output)
- {
- var vectors = VectorUtils.RandomVectors(dim, Count);
- var world = SmallWorld.CreateWorld(NSWOptions.Create(8, 16, 200, 200, Metrics.L2EuclideanDistance));
- world.AddItems(vectors);
-
- var distance = new Func((id1, id2) => Metrics.L2EuclideanDistance(world.GetVector(id1), world.GetVector(id2)));
- var weights = world.GetLinks().SelectMany(pair => pair.Value.Select(id => distance(pair.Key, id)));
- var histogram = new Histogram(HistogramMode.SQRT, weights);
- histogram.Smooth();
-
- int threshold = histogram.CuttOff();
- var min = histogram.Bounds[threshold - 1];
- var max = histogram.Bounds[threshold];
- var R = (max + min) / 2;
-
- DrawHistogram(histogram, Path.Combine(output, $"hist{dim.ToString("D3")}.jpg"));
- }
-
- static void DrawHistogram(Histogram histogram, string filename)
- {
- var wb = Width / histogram.Values.Length;
- var k = ((float)Height) / (float)histogram.Values.Max();
-
- var maxes = histogram.GetMaximums().ToDictionary(m => m.Index, m => m);
- int threshold = histogram.CuttOff();
-
- using (var bmp = new Bitmap(Width, Height))
- {
- using (var g = Graphics.FromImage(bmp))
- {
- for (int i = 0; i < histogram.Values.Length; i++)
- {
- var height = (int)(histogram.Values[i] * k);
- if (maxes.ContainsKey(i))
- {
- g.DrawRectangle(Pens.Red, i * wb, bmp.Height - height, wb, height);
- g.DrawRectangle(Pens.Red, i * wb + 1, bmp.Height - height, wb - 1, height);
- }
- else
- {
- g.DrawRectangle(Pens.Blue, i * wb, bmp.Height - height, wb, height);
- }
- if (i == threshold)
- {
- g.DrawLine(Pens.Green, i * wb + wb / 2, 0, i * wb + wb / 2, bmp.Height);
- }
- }
- }
- bmp.Save(filename);
- }
- }
- }
-}
diff --git a/Tests/HNSWDemo/Tests/ITest.cs b/Tests/HNSWDemo/Tests/ITest.cs
deleted file mode 100644
index 6ee3d4c..0000000
--- a/Tests/HNSWDemo/Tests/ITest.cs
+++ /dev/null
@@ -1,7 +0,0 @@
-namespace HNSWDemo.Tests
-{
- public interface ITest
- {
- void Run();
- }
-}
diff --git a/Tests/HNSWDemo/Tests/InsertTimeExplosionTest.cs b/Tests/HNSWDemo/Tests/InsertTimeExplosionTest.cs
deleted file mode 100644
index d4c7513..0000000
--- a/Tests/HNSWDemo/Tests/InsertTimeExplosionTest.cs
+++ /dev/null
@@ -1,29 +0,0 @@
-using System;
-using System.Diagnostics;
-using ZeroLevel.HNSW;
-using ZeroLevel.Services.Mathemathics;
-
-namespace HNSWDemo.Tests
-{
- public class InsertTimeExplosionTest
- : ITest
- {
- private static int Count = 10000;
- private static int IterationCount = 100;
- private static int Dimensionality = 128;
-
- public void Run()
- {
- var sw = new Stopwatch();
- var world = new SmallWorld(NSWOptions.Create(6, 12, 100, 100, Metrics.CosineDistance));
- for (int i = 0; i < IterationCount; i++)
- {
- var samples = VectorUtils.RandomVectors(Dimensionality, Count);
- sw.Restart();
- var ids = world.AddItems(samples.ToArray());
- sw.Stop();
- Console.WriteLine($"ITERATION: [{i.ToString("D4")}] COUNT: [{ids.Length}] ELAPSED [{sw.ElapsedMilliseconds} ms]");
- }
- }
- }
-}
diff --git a/Tests/HNSWDemo/Tests/LALTest.cs b/Tests/HNSWDemo/Tests/LALTest.cs
deleted file mode 100644
index dbeedcd..0000000
--- a/Tests/HNSWDemo/Tests/LALTest.cs
+++ /dev/null
@@ -1,121 +0,0 @@
-using HNSWDemo.Model;
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using ZeroLevel.HNSW;
-using ZeroLevel.Services.Mathemathics;
-
-namespace HNSWDemo.Tests
-{
- internal class LALTest
- : ITest
- {
- private const int count = 20000;
- private const int dimensionality = 128;
- private const string _graphFileCachee = @"lal_test_graph.bin";
- private const string _mapFileCachee = @"lal_test_map.bin";
-
- public void Run()
- {
- var moda = 3;
- var persons = Person.GenerateRandom(dimensionality, count);
- var samples = new Dictionary>();
- var options = NSWOptions.Create(6, 8, 100, 100, Metrics.CosineDistance);
-
- foreach (var p in persons)
- {
- var c = (int)Math.Abs(p.Item2.Number.GetHashCode() % moda);
- if (samples.ContainsKey(c) == false) samples.Add(c, new List<(float[], Person)>());
- samples[c].Add(p);
- }
-
- SplittedLALGraph worlds;
- HNSWMappers mappers;
-
- if (File.Exists(_graphFileCachee) && File.Exists(_mapFileCachee))
- {
- worlds = new SplittedLALGraph(_graphFileCachee);
- mappers = new HNSWMappers(_mapFileCachee, l => (int)Math.Abs(l.GetHashCode() % moda));
- }
- else
- {
-
- worlds = new SplittedLALGraph();
- mappers = new HNSWMappers(l => (int)Math.Abs(l.GetHashCode() % moda));
-
- var worlds_dict = new Dictionary>();
- var maps_dict = new Dictionary>();
-
- foreach (var p in samples)
- {
- var c = p.Key;
- if (worlds_dict.ContainsKey(c) == false)
- {
- worlds_dict.Add(c, new SmallWorld(options));
- }
- if (maps_dict.ContainsKey(c) == false)
- {
- maps_dict.Add(c, new HNSWMap());
- }
- var w = worlds_dict[c];
- var m = maps_dict[c];
- var ids = w.AddItems(p.Value.Select(i => i.Item1));
-
- for (int i = 0; i < ids.Length; i++)
- {
- m.Append(p.Value[i].Item2.Number, ids[i]);
- }
- }
-
- var name = Guid.NewGuid().ToString();
- foreach (var p in samples)
- {
- var c = p.Key;
- var w = worlds_dict[c];
- var m = maps_dict[c];
-
- using (var s = File.Create(name))
- {
- w.Serialize(s);
- }
- using (var s = File.OpenRead(name))
- {
- var l = LALGraph.FromHNSWGraph(s);
- worlds.Append(l, c);
- }
- File.Delete(name);
- mappers.Append(m, c);
- }
-
- worlds.Save(_graphFileCachee);
- mappers.Save(_mapFileCachee);
- }
-
- var entries = new long[10];
- for (int i = 0; i < entries.Length; i++)
- {
- entries[i] = persons[DefaultRandomGenerator.Instance.Next(0, persons.Count - 1)].Item2.Number;
- }
- var contexts = mappers.CreateContext(null, entries);
- var result = worlds.KNearest(5000, contexts);
-
- Console.WriteLine($"Found: {result.Sum(r=>r.Value.Count)}");
-
- /*Console.WriteLine("Entries:");
- foreach (var n in entries)
- {
- Console.WriteLine($"\t{n}");
- }
-
- Console.WriteLine("Extensions:");
- foreach (var r in result)
- {
- foreach (var n in mappers.ConvertIdsToFeatures(r.Key, r.Value))
- {
- Console.WriteLine($"\t[{n}]");
- }
- }*/
- }
- }
-}
diff --git a/Tests/HNSWDemo/Tests/QuantizatorTest.cs b/Tests/HNSWDemo/Tests/QuantizatorTest.cs
deleted file mode 100644
index ffa01e8..0000000
--- a/Tests/HNSWDemo/Tests/QuantizatorTest.cs
+++ /dev/null
@@ -1,44 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using ZeroLevel.HNSW;
-using ZeroLevel.HNSW.Services;
-using ZeroLevel.Services.Mathemathics;
-
-namespace HNSWDemo.Tests
-{
- public class QuantizatorTest
- : ITest
- {
- private static int Count = 500000;
- private static int Dimensionality = 221;
-
- public void Run()
- {
- var samples = VectorUtils.RandomVectors(Dimensionality, Count);
- var min = samples.SelectMany(s => s).Min();
- var max = samples.SelectMany(s => s).Max();
- var q = new Quantizator(min, max);
- var q_samples = samples.Select(s => q.QuantizeToInt(s)).ToArray();
-
- // comparing
- var list = new List();
- for (int i = 0; i < samples.Count - 1; i++)
- {
- var v1 = samples[i];
- var v2 = samples[i + 1];
- var dist = Metrics.CosineDistance(v1, v2);
-
- var qv1 = q_samples[i];
- var qv2 = q_samples[i + 1];
- var qdist = Metrics.CosineDistance(qv1, qv2);
-
- list.Add(Math.Abs(dist - qdist));
- }
-
- Console.WriteLine($"Min diff: {list.Min()}");
- Console.WriteLine($"Avg diff: {list.Average()}");
- Console.WriteLine($"Max diff: {list.Max()}");
- }
- }
-}
diff --git a/Tests/HNSWDemo/Tests/QuantizeAccuracityTest.cs b/Tests/HNSWDemo/Tests/QuantizeAccuracityTest.cs
deleted file mode 100644
index 7518257..0000000
--- a/Tests/HNSWDemo/Tests/QuantizeAccuracityTest.cs
+++ /dev/null
@@ -1,80 +0,0 @@
-using HNSWDemo.Utils;
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.Linq;
-using ZeroLevel.HNSW;
-using ZeroLevel.HNSW.Services;
-using ZeroLevel.Services.Mathemathics;
-
-namespace HNSWDemo.Tests
-{
- public class QuantizeAccuracityTest
- : ITest
- {
- private static int Count = 5000;
- private static int Dimensionality = 128;
- private static int K = 200;
- private static int TestCount =500;
-
- public void Run()
- {
- var totalHits = new List();
- var timewatchesNP = new List();
- var timewatchesHNSW = new List();
- var q = new Quantizator(-1f, 1f);
-
- var s = VectorUtils.RandomVectors(Dimensionality, Count);
- var samples = s.Select(v => q.QuantizeToLong(v)).ToList();
-
- var sw = new Stopwatch();
-
- var test = new VectorsDirectCompare(s, Metrics.CosineDistance);
- var world = new SmallWorld(NSWOptions.Create(6, 8, 100, 100, Metrics.CosineDistance));
-
- sw.Start();
- var ids = world.AddItems(samples.ToArray());
- sw.Stop();
-
- Console.WriteLine($"Insert {ids.Length} items: {sw.ElapsedMilliseconds} ms");
- Console.WriteLine("Start test");
-
- var tv = VectorUtils.RandomVectors(Dimensionality, TestCount);
- var test_vectors = tv.Select(v => q.QuantizeToLong(v)).ToList();
- for (int i = 0; i < tv.Count; i++)
- {
- sw.Restart();
- var gt = test.KNearest(tv[i], K).ToDictionary(p => p.Item1, p => p.Item2);
- sw.Stop();
- timewatchesNP.Add(sw.ElapsedMilliseconds);
-
- sw.Restart();
- var result = world.Search(test_vectors[i], K);
- sw.Stop();
-
- timewatchesHNSW.Add(sw.ElapsedMilliseconds);
- var hits = 0;
- foreach (var r in result)
- {
- if (gt.ContainsKey(r.Item1))
- {
- hits++;
- }
- }
- totalHits.Add(hits);
- }
-
- Console.WriteLine($"MIN Accuracity: {totalHits.Min() * 100 / K}%");
- Console.WriteLine($"AVG Accuracity: {totalHits.Average() * 100 / K}%");
- Console.WriteLine($"MAX Accuracity: {totalHits.Max() * 100 / K}%");
-
- Console.WriteLine($"MIN HNSW TIME: {timewatchesHNSW.Min()} ms");
- Console.WriteLine($"AVG HNSW TIME: {timewatchesHNSW.Average()} ms");
- Console.WriteLine($"MAX HNSW TIME: {timewatchesHNSW.Max()} ms");
-
- Console.WriteLine($"MIN NP TIME: {timewatchesNP.Min()} ms");
- Console.WriteLine($"AVG NP TIME: {timewatchesNP.Average()} ms");
- Console.WriteLine($"MAX NP TIME: {timewatchesNP.Max()} ms");
- }
- }
-}
diff --git a/Tests/HNSWDemo/Tests/QuantizeHistogramTest.cs b/Tests/HNSWDemo/Tests/QuantizeHistogramTest.cs
deleted file mode 100644
index 53fa40e..0000000
--- a/Tests/HNSWDemo/Tests/QuantizeHistogramTest.cs
+++ /dev/null
@@ -1,72 +0,0 @@
-using System;
-using System.Drawing;
-using System.Linq;
-using ZeroLevel.HNSW;
-using ZeroLevel.HNSW.Services;
-using ZeroLevel.Services.Mathemathics;
-
-namespace HNSWDemo.Tests
-{
- public class QuantizeHistogramTest
- : ITest
- {
- private static int Count = 3000;
- private static int Dimensionality = 128;
- private static int Width = 3000;
- private static int Height = 3000;
-
- public void Run()
- {
- var vectors = VectorUtils.RandomVectors(Dimensionality, Count);
- var q = new Quantizator(-1f, 1f);
- var world = SmallWorld.CreateWorld(NSWOptions.Create(8, 16, 200, 200, Metrics.CosineDistance));
- world.AddItems(vectors.Select(v => q.QuantizeToLong(v)).ToList());
-
- var distance = new Func((id1, id2) => Metrics.CosineDistance(world.GetVector(id1), world.GetVector(id2)));
- var weights = world.GetLinks().SelectMany(pair => pair.Value.Select(id => distance(pair.Key, id)));
- var histogram = new Histogram(HistogramMode.SQRT, weights);
- histogram.Smooth();
-
- int threshold = histogram.CuttOff();
- var min = histogram.Bounds[threshold - 1];
- var max = histogram.Bounds[threshold];
- var R = (max + min) / 2;
-
- DrawHistogram(histogram, @"D:\hist.jpg");
- }
-
- static void DrawHistogram(Histogram histogram, string filename)
- {
- var wb = Width / histogram.Values.Length;
- var k = ((float)Height) / (float)histogram.Values.Max();
-
- var maxes = histogram.GetMaximums().ToDictionary(m => m.Index, m => m);
- int threshold = histogram.CuttOff();
-
- using (var bmp = new Bitmap(Width, Height))
- {
- using (var g = Graphics.FromImage(bmp))
- {
- for (int i = 0; i < histogram.Values.Length; i++)
- {
- var height = (int)(histogram.Values[i] * k);
- if (maxes.ContainsKey(i))
- {
- g.DrawRectangle(Pens.Red, i * wb, bmp.Height - height, wb, height);
- g.DrawRectangle(Pens.Red, i * wb + 1, bmp.Height - height, wb - 1, height);
- }
- else
- {
- g.DrawRectangle(Pens.Blue, i * wb, bmp.Height - height, wb, height);
- }
- if (i == threshold)
- {
- g.DrawLine(Pens.Green, i * wb + wb / 2, 0, i * wb + wb / 2, bmp.Height);
- }
- }
- }
- bmp.Save(filename);
- }
- }
- }
-}
diff --git a/Tests/HNSWDemo/Tests/QuantizeInsertTimeExplosionTest.cs b/Tests/HNSWDemo/Tests/QuantizeInsertTimeExplosionTest.cs
deleted file mode 100644
index a0c6679..0000000
--- a/Tests/HNSWDemo/Tests/QuantizeInsertTimeExplosionTest.cs
+++ /dev/null
@@ -1,32 +0,0 @@
-using System;
-using System.Diagnostics;
-using System.Linq;
-using ZeroLevel.HNSW;
-using ZeroLevel.HNSW.Services;
-using ZeroLevel.Services.Mathemathics;
-
-namespace HNSWDemo.Tests
-{
- public class QuantizeInsertTimeExplosionTest
- : ITest
- {
- private static int Count = 10000;
- private static int IterationCount = 100;
- private static int Dimensionality = 128;
-
- public void Run()
- {
- var sw = new Stopwatch();
- var world = new SmallWorld(NSWOptions.Create(6, 12, 100, 100, Metrics.CosineDistance));
- var q = new Quantizator(-1f, 1f);
- for (int i = 0; i < IterationCount; i++)
- {
- var samples = VectorUtils.RandomVectors(Dimensionality, Count);
- sw.Restart();
- var ids = world.AddItems(samples.Select(v => q.QuantizeToLong(v)).ToArray());
- sw.Stop();
- Console.WriteLine($"ITERATION: [{i.ToString("D4")}] COUNT: [{ids.Length}] ELAPSED [{sw.ElapsedMilliseconds} ms]");
- }
- }
- }
-}
diff --git a/Tests/HNSWDemo/Tests/SaveRestoreTest.cs b/Tests/HNSWDemo/Tests/SaveRestoreTest.cs
deleted file mode 100644
index f135ea6..0000000
--- a/Tests/HNSWDemo/Tests/SaveRestoreTest.cs
+++ /dev/null
@@ -1,52 +0,0 @@
-using System;
-using System.Diagnostics;
-using System.IO;
-using ZeroLevel.HNSW;
-
-namespace HNSWDemo.Tests
-{
- public class SaveRestoreTest
- : ITest
- {
- private static int Count = 1000;
- private static int Dimensionality = 128;
-
- public void Run()
- {
- var samples = VectorUtils.RandomVectors(Dimensionality, Count);
- var world = new SmallWorld(NSWOptions.Create(6, 15, 200, 200, CosineDistance.ForUnits));
- var sw = new Stopwatch();
- sw.Start();
- var ids = world.AddItems(samples.ToArray());
- sw.Stop();
- Console.WriteLine($"Insert {ids.Length} items on {sw.ElapsedMilliseconds} ms");
- Console.WriteLine("Start test");
-
- byte[] dump;
- using (var ms = new MemoryStream())
- {
- world.Serialize(ms);
- dump = ms.ToArray();
- }
- Console.WriteLine($"Full dump size: {dump.Length} bytes");
-
- byte[] testDump;
- var restoredWorld = new SmallWorld(NSWOptions.Create(6, 15, 200, 200, CosineDistance.ForUnits));
- using (var ms = new MemoryStream(dump))
- {
- restoredWorld.Deserialize(ms);
- }
-
- using (var ms = new MemoryStream())
- {
- restoredWorld.Serialize(ms);
- testDump = ms.ToArray();
- }
- if (testDump.Length != dump.Length)
- {
- Console.WriteLine($"Incorrect restored size. Got {testDump.Length}. Expected: {dump.Length}");
- return;
- }
- }
- }
-}
diff --git a/Tests/HNSWDemo/Utils/QLVectorsDirectCompare.cs b/Tests/HNSWDemo/Utils/QLVectorsDirectCompare.cs
deleted file mode 100644
index 10182b5..0000000
--- a/Tests/HNSWDemo/Utils/QLVectorsDirectCompare.cs
+++ /dev/null
@@ -1,95 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using ZeroLevel.HNSW;
-
-namespace HNSWDemo.Utils
-{
- public class QLVectorsDirectCompare
- {
- private const int HALF_LONG_BITS = 32;
- private readonly IList _vectors;
- private readonly Func _distance;
-
- public QLVectorsDirectCompare(List vectors, Func distance)
- {
- _vectors = vectors;
- _distance = distance;
- }
-
- public IEnumerable<(int, float)> KNearest(long[] v, int k)
- {
- var weights = new Dictionary();
- for (int i = 0; i < _vectors.Count; i++)
- {
- var d = _distance(v, _vectors[i]);
- weights[i] = d;
- }
- return weights.OrderBy(p => p.Value).Take(k).Select(p => (p.Key, p.Value));
- }
-
- public List> DetectClusters()
- {
- var links = new SortedList();
- for (int i = 0; i < _vectors.Count; i++)
- {
- for (int j = i + 1; j < _vectors.Count; j++)
- {
- long k = (((long)(i)) << HALF_LONG_BITS) + j;
- links.Add(k, _distance(_vectors[i], _vectors[j]));
- }
- }
-
- // 1. Find R - bound between intra-cluster distances and out-of-cluster distances
- var histogram = new Histogram(HistogramMode.SQRT, links.Values);
- int threshold = histogram.CuttOff();
- var min = histogram.Bounds[threshold - 1];
- var max = histogram.Bounds[threshold];
- var R = (max + min) / 2;
-
- // 2. Get links with distances less than R
- var resultLinks = new SortedList();
- foreach (var pair in links)
- {
- if (pair.Value < R)
- {
- resultLinks.Add(pair.Key, pair.Value);
- }
- }
-
- // 3. Extract clusters
- List> clusters = new List>();
- foreach (var pair in resultLinks)
- {
- var k = pair.Key;
- var id1 = (int)(k >> HALF_LONG_BITS);
- var id2 = (int)(k - (((long)id1) << HALF_LONG_BITS));
-
- bool found = false;
- foreach (var c in clusters)
- {
- if (c.Contains(id1))
- {
- c.Add(id2);
- found = true;
- break;
- }
- else if (c.Contains(id2))
- {
- c.Add(id1);
- found = true;
- break;
- }
- }
- if (found == false)
- {
- var c = new HashSet();
- c.Add(id1);
- c.Add(id2);
- clusters.Add(c);
- }
- }
- return clusters;
- }
- }
-}
diff --git a/Tests/HNSWDemo/Utils/QVectorsDirectCompare.cs b/Tests/HNSWDemo/Utils/QVectorsDirectCompare.cs
deleted file mode 100644
index 1649ea9..0000000
--- a/Tests/HNSWDemo/Utils/QVectorsDirectCompare.cs
+++ /dev/null
@@ -1,95 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using ZeroLevel.HNSW;
-
-namespace HNSWDemo.Utils
-{
- public class QVectorsDirectCompare
- {
- private const int HALF_LONG_BITS = 32;
- private readonly IList _vectors;
- private readonly Func _distance;
-
- public QVectorsDirectCompare(List vectors, Func distance)
- {
- _vectors = vectors;
- _distance = distance;
- }
-
- public IEnumerable<(int, float)> KNearest(byte[] v, int k)
- {
- var weights = new Dictionary();
- for (int i = 0; i < _vectors.Count; i++)
- {
- var d = _distance(v, _vectors[i]);
- weights[i] = d;
- }
- return weights.OrderBy(p => p.Value).Take(k).Select(p => (p.Key, p.Value));
- }
-
- public List> DetectClusters()
- {
- var links = new SortedList();
- for (int i = 0; i < _vectors.Count; i++)
- {
- for (int j = i + 1; j < _vectors.Count; j++)
- {
- long k = (((long)i) << HALF_LONG_BITS) + j;
- links.Add(k, _distance(_vectors[i], _vectors[j]));
- }
- }
-
- // 1. Find R - bound between intra-cluster distances and out-of-cluster distances
- var histogram = new Histogram(HistogramMode.SQRT, links.Values);
- int threshold = histogram.CuttOff();
- var min = histogram.Bounds[threshold - 1];
- var max = histogram.Bounds[threshold];
- var R = (max + min) / 2;
-
- // 2. Get links with distances less than R
- var resultLinks = new SortedList();
- foreach (var pair in links)
- {
- if (pair.Value < R)
- {
- resultLinks.Add(pair.Key, pair.Value);
- }
- }
-
- // 3. Extract clusters
- List> clusters = new List>();
- foreach (var pair in resultLinks)
- {
- var k = pair.Key;
- var id1 = (int)(k >> HALF_LONG_BITS);
- var id2 = (int)(k - (((long)id1) << HALF_LONG_BITS));
-
- bool found = false;
- foreach (var c in clusters)
- {
- if (c.Contains(id1))
- {
- c.Add(id2);
- found = true;
- break;
- }
- else if (c.Contains(id2))
- {
- c.Add(id1);
- found = true;
- break;
- }
- }
- if (found == false)
- {
- var c = new HashSet();
- c.Add(id1);
- c.Add(id2);
- clusters.Add(c);
- }
- }
- return clusters;
- }
- }
-}
diff --git a/Tests/HNSWDemo/Utils/VectorsDirectCompare.cs b/Tests/HNSWDemo/Utils/VectorsDirectCompare.cs
deleted file mode 100644
index 4d50c3f..0000000
--- a/Tests/HNSWDemo/Utils/VectorsDirectCompare.cs
+++ /dev/null
@@ -1,95 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using ZeroLevel.HNSW;
-
-namespace HNSWDemo.Utils
-{
- public class VectorsDirectCompare
- {
- private const int HALF_LONG_BITS = 32;
- private readonly IList _vectors;
- private readonly Func _distance;
-
- public VectorsDirectCompare(List vectors, Func distance)
- {
- _vectors = vectors;
- _distance = distance;
- }
-
- public IEnumerable<(int, float)> KNearest(float[] v, int k)
- {
- var weights = new Dictionary();
- for (int i = 0; i < _vectors.Count; i++)
- {
- var d = _distance(v, _vectors[i]);
- weights[i] = d;
- }
- return weights.OrderBy(p => p.Value).Take(k).Select(p => (p.Key, p.Value));
- }
-
- public List> DetectClusters()
- {
- var links = new SortedList();
- for (int i = 0; i < _vectors.Count; i++)
- {
- for (int j = i + 1; j < _vectors.Count; j++)
- {
- long k = (((long)(i)) << HALF_LONG_BITS) + j;
- links.Add(k, _distance(_vectors[i], _vectors[j]));
- }
- }
-
- // 1. Find R - bound between intra-cluster distances and out-of-cluster distances
- var histogram = new Histogram(HistogramMode.SQRT, links.Values);
- int threshold = histogram.CuttOff();
- var min = histogram.Bounds[threshold - 1];
- var max = histogram.Bounds[threshold];
- var R = (max + min) / 2;
-
- // 2. Get links with distances less than R
- var resultLinks = new SortedList();
- foreach (var pair in links)
- {
- if (pair.Value < R)
- {
- resultLinks.Add(pair.Key, pair.Value);
- }
- }
-
- // 3. Extract clusters
- List> clusters = new List>();
- foreach (var pair in resultLinks)
- {
- var k = pair.Key;
- var id1 = (int)(k >> HALF_LONG_BITS);
- var id2 = (int)(k - (((long)id1) << HALF_LONG_BITS));
-
- bool found = false;
- foreach (var c in clusters)
- {
- if (c.Contains(id1))
- {
- c.Add(id2);
- found = true;
- break;
- }
- else if (c.Contains(id2))
- {
- c.Add(id1);
- found = true;
- break;
- }
- }
- if (found == false)
- {
- var c = new HashSet();
- c.Add(id1);
- c.Add(id2);
- clusters.Add(c);
- }
- }
- return clusters;
- }
- }
-}
diff --git a/Tests/HNSWDemo/t10k-images.idx3-ubyte b/Tests/HNSWDemo/t10k-images.idx3-ubyte
deleted file mode 100644
index 1170b2c..0000000
Binary files a/Tests/HNSWDemo/t10k-images.idx3-ubyte and /dev/null differ
diff --git a/Tests/PartitionFileStorageTest/PartitionFileStorageTest.csproj b/Tests/PartitionFileStorageTest/PartitionFileStorageTest.csproj
index 3d7cb0b..2a0493b 100644
--- a/Tests/PartitionFileStorageTest/PartitionFileStorageTest.csproj
+++ b/Tests/PartitionFileStorageTest/PartitionFileStorageTest.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
enable
enable
diff --git a/Tests/PartitionFileStorageTest/Program.cs b/Tests/PartitionFileStorageTest/Program.cs
index 152483a..4d07f42 100644
--- a/Tests/PartitionFileStorageTest/Program.cs
+++ b/Tests/PartitionFileStorageTest/Program.cs
@@ -61,7 +61,7 @@ namespace PartitionFileStorageTest
await storePart.Store(c3, Generate(r));
await storePart.Store(c3, Generate(r));
storePart.CompleteAdding();
- await storePart.Compress();
+ storePart.Compress();
}
using (var readPart = store.CreateAccessor(new Metadata { Date = new DateTime(2022, 11, 08) }))
@@ -124,7 +124,7 @@ namespace PartitionFileStorageTest
Log.Info($"Fill journal: {sw.ElapsedMilliseconds}ms. Records writed: {storePart.TotalRecords}");
sw.Restart();
storePart.CompleteAdding();
- await storePart.Compress();
+ storePart.Compress();
sw.Stop();
Log.Info($"Compress: {sw.ElapsedMilliseconds}ms");
sw.Restart();
@@ -269,11 +269,11 @@ namespace PartitionFileStorageTest
using (var storePart = store.CreateBuilder(meta))
{
- Parallel.ForEach(MassGenerator((long)(0.7 * PAIRS_COUNT)), parallelOptions, pair =>
+ await Parallel.ForEachAsync(MassGenerator((long)(0.7 * PAIRS_COUNT)), CancellationToken.None, async (pair, _) =>
{
var key = pair.Item1;
var val = pair.Item2;
- storePart.Store(key, val);
+ await storePart.Store(key, val);
if (key % 717 == 0)
{
testKeys1.Add(key);
@@ -292,7 +292,7 @@ namespace PartitionFileStorageTest
Log.Info($"Fill journal: {sw.ElapsedMilliseconds}ms");
sw.Restart();
storePart.CompleteAdding();
- await storePart.Compress();
+ storePart.Compress();
sw.Stop();
Log.Info($"Compress: {sw.ElapsedMilliseconds}ms");
sw.Restart();
@@ -305,11 +305,11 @@ namespace PartitionFileStorageTest
sw.Restart();
using (var merger = store.CreateMergeAccessor(meta, data => Compressor.DecodeBytesContent(data)))
{
- Parallel.ForEach(MassGenerator((long)(0.3 * PAIRS_COUNT)), parallelOptions, pair =>
+ await Parallel.ForEachAsync(MassGenerator((long)(0.3 * PAIRS_COUNT)), CancellationToken.None, async (pair, _) =>
{
var key = pair.Item1;
var val = pair.Item2;
- merger.Store(key, val);
+ await merger.Store(key, val);
Keys.Add(key);
});
@@ -610,9 +610,10 @@ namespace PartitionFileStorageTest
FSUtils.CleanAndTestFolder(root);
await FastTest(options);
+
FSUtils.CleanAndTestFolder(root);
await FullStoreMultithreadTest(optionsMultiThread);
-
+
/*FSUtils.CleanAndTestFolder(root);
diff --git a/Tests/Qdrant.Test/Program.cs b/Tests/Qdrant.Test/Program.cs
deleted file mode 100644
index 9b8878a..0000000
--- a/Tests/Qdrant.Test/Program.cs
+++ /dev/null
@@ -1,41 +0,0 @@
-using Grpc.Net.Client;
-using static Qdrant.Collections;
-
-namespace Qdrant.Test
-{
- // QDRANT VERSION 1.15.1
- internal class Program
- {
- const string COLLECTION_NAME = "my_test_collection";
- static void Main(string[] args)
- {
- var address = @"http://localhost:6334";
- var channel = GrpcChannel.ForAddress(address);
- var collections = new CollectionsClient(channel);
- var response = collections.Create(new CreateCollection
- {
- CollectionName = COLLECTION_NAME,
- VectorsConfig = new VectorsConfig
- {
- Params = new VectorParams
- {
- Distance = Distance.Dot,
- Size = 32,
- HnswConfig = new HnswConfigDiff
- {
- OnDisk = false
- }
- }
- }
- });
-
- Console.WriteLine($"CREATED: {response.Result}");
-
- var d_response = collections.Delete(new DeleteCollection
- {
- CollectionName = COLLECTION_NAME
- });
- Console.WriteLine($"DELETED: {d_response.Result}");
- }
- }
-}
\ No newline at end of file
diff --git a/Tests/Qdrant.Test/Qdrant.Test.csproj b/Tests/Qdrant.Test/Qdrant.Test.csproj
deleted file mode 100644
index d459a5e..0000000
--- a/Tests/Qdrant.Test/Qdrant.Test.csproj
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
- Exe
- net6.0
- enable
- enable
-
-
-
-
-
-
-
-
-
- ..\..\ZeroLevel.Qdrant.GrpcClient\bin\Release\net6.0\ZeroLevel.Qdrant.GrpcClient.dll
-
-
-
-
diff --git a/Tests/TestApp/TestApp.csproj b/Tests/TestApp/TestApp.csproj
index 375d4ee..6e86b70 100644
--- a/Tests/TestApp/TestApp.csproj
+++ b/Tests/TestApp/TestApp.csproj
@@ -2,13 +2,16 @@
Exe
- net6.0
+ net8.0
AnyCPU;x64;x86
-
+
+
+
+
diff --git a/Tests/TestPipeLine/Consumer/Consumer.csproj b/Tests/TestPipeLine/Consumer/Consumer.csproj
index bfff132..d187edf 100644
--- a/Tests/TestPipeLine/Consumer/Consumer.csproj
+++ b/Tests/TestPipeLine/Consumer/Consumer.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
enable
enable
AnyCPU;x64
diff --git a/Tests/TestPipeLine/Processor/Processor.csproj b/Tests/TestPipeLine/Processor/Processor.csproj
index bfff132..d187edf 100644
--- a/Tests/TestPipeLine/Processor/Processor.csproj
+++ b/Tests/TestPipeLine/Processor/Processor.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
enable
enable
AnyCPU;x64
diff --git a/Tests/TestPipeLine/Source/Source.csproj b/Tests/TestPipeLine/Source/Source.csproj
index bfff132..d187edf 100644
--- a/Tests/TestPipeLine/Source/Source.csproj
+++ b/Tests/TestPipeLine/Source/Source.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
enable
enable
AnyCPU;x64
diff --git a/Tests/TestPipeLine/Watcher/Watcher.csproj b/Tests/TestPipeLine/Watcher/Watcher.csproj
index bfff132..d187edf 100644
--- a/Tests/TestPipeLine/Watcher/Watcher.csproj
+++ b/Tests/TestPipeLine/Watcher/Watcher.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
enable
enable
AnyCPU;x64
diff --git a/Tests/ZeroLevel.UnitTests/PartitionStorageTests.cs b/Tests/ZeroLevel.UnitTests/PartitionStorageTests.cs
index 6e8bdc5..e5a9e75 100644
--- a/Tests/ZeroLevel.UnitTests/PartitionStorageTests.cs
+++ b/Tests/ZeroLevel.UnitTests/PartitionStorageTests.cs
@@ -152,7 +152,7 @@ namespace ZeroLevel.UnitTests
await storePart.Store(numbers[2], texts[7]); // 2 - 21
storePart.CompleteAdding();
- await storePart.Compress();
+ storePart.Compress();
}
// Assert
diff --git a/ZeroLevel.Discovery/ZeroLevel.Discovery.csproj b/ZeroLevel.Discovery/ZeroLevel.Discovery.csproj
index d0cfea3..e6651e9 100644
--- a/ZeroLevel.Discovery/ZeroLevel.Discovery.csproj
+++ b/ZeroLevel.Discovery/ZeroLevel.Discovery.csproj
@@ -2,7 +2,7 @@
Exe
- net6.0
+ net8.0
AnyCPU;x64;x86
diff --git a/ZeroLevel.EventsServer/EventRepository.cs b/ZeroLevel.EventsServer/EventRepository.cs
deleted file mode 100644
index afd30f8..0000000
--- a/ZeroLevel.EventsServer/EventRepository.cs
+++ /dev/null
@@ -1,24 +0,0 @@
-using System.Threading;
-
-namespace ZeroLevel.EventServer
-{
- public class EventRepository
- :BaseSqLiteDB
- {
- private readonly SQLiteConnection _db;
- private readonly ReaderWriterLockSlim _rwLock = new ReaderWriterLockSlim();
- private readonly string _tableName;
-
- public EventRepository()
- {
- _tableName = "events";
-
- var path = PrepareDb($"{_tableName}.db");
- _db = new SQLiteConnection($"Data Source={path};Version=3;");
- _db.Open();
-
- Execute($"CREATE TABLE IF NOT EXISTS {_tableName} (id INTEGER PRIMARY KEY AUTOINCREMENT, key TEXT, body BLOB)", _db);
- Execute($"CREATE INDEX IF NOT EXISTS key_index ON {_tableName} (key)", _db);
- }
- }
-}
diff --git a/ZeroLevel.EventsServer/EventService.cs b/ZeroLevel.EventsServer/EventService.cs
deleted file mode 100644
index 8741acf..0000000
--- a/ZeroLevel.EventsServer/EventService.cs
+++ /dev/null
@@ -1,62 +0,0 @@
-using ZeroLevel.EventServer.Model;
-using ZeroLevel.Network;
-using ZeroLevel.Services.Applications;
-
-namespace ZeroLevel.EventServer
-{
- public class EventService
- : BaseZeroService
- {
- public EventService()
- {
- }
-
- protected override void StartAction()
- {
- var host = UseHost();
- this.AutoregisterInboxes(host);
- host.OnConnect += Host_OnConnect;
- host.OnDisconnect += Host_OnDisconnect;
- }
-
- private void Host_OnDisconnect(ISocketClient obj)
- {
- Log.Info($"Client '{obj.Endpoint.Address}:{obj.Endpoint.Port}' disconnected");
- }
-
- private void Host_OnConnect(IClient obj)
- {
- Log.Info($"Client '{obj.Socket.Endpoint.Address}:{obj.Socket.Endpoint.Port}' connected");
- }
-
- protected override void StopAction()
- {
- }
-
- #region Inboxes
- [ExchangeReplier("onetime")]
- public long OneTimeHandler(ISocketClient client, OneTimeEvent e)
- {
- return 0;
- }
-
- [ExchangeReplier("periodic")]
- public long PeriodicHandler(ISocketClient client, PeriodicTimeEvent e)
- {
- return 0;
- }
-
- [ExchangeReplier("eventtrigger")]
- public long AfterEventHandler(ISocketClient client, EventAfterEvent e)
- {
- return 0;
- }
-
- [ExchangeReplier("eventstrigger")]
- public long AfterEventsHandler(ISocketClient client, EventAfterEvents e)
- {
- return 0;
- }
- #endregion
- }
-}
diff --git a/ZeroLevel.EventsServer/Model/BaseEvent.cs b/ZeroLevel.EventsServer/Model/BaseEvent.cs
deleted file mode 100644
index 1ba11bd..0000000
--- a/ZeroLevel.EventsServer/Model/BaseEvent.cs
+++ /dev/null
@@ -1,8 +0,0 @@
-namespace ZeroLevel.EventServer.Model
-{
- public abstract class BaseEvent
- {
- public string ServiceKey { get; set; }
- public string Inbox { get; set; }
- }
-}
diff --git a/ZeroLevel.EventsServer/Model/Condition.cs b/ZeroLevel.EventsServer/Model/Condition.cs
deleted file mode 100644
index 0a7f365..0000000
--- a/ZeroLevel.EventsServer/Model/Condition.cs
+++ /dev/null
@@ -1,23 +0,0 @@
-namespace ZeroLevel.EventServer.Model
-{
- public enum Condition
- : int
- {
- ///
- /// В любом случае
- ///
- None = 0,
- ///
- /// Если хотя бы одно событие успешно обработано
- ///
- OneSuccessfull = 1,
- ///
- /// Если обработаны все события
- ///
- AllSuccessfull = 2,
- ///
- /// Если хотя бы одно событие не обработано
- ///
- AnyFault = 3
- }
-}
diff --git a/ZeroLevel.EventsServer/Model/EventAfterEvent.cs b/ZeroLevel.EventsServer/Model/EventAfterEvent.cs
deleted file mode 100644
index 3df207f..0000000
--- a/ZeroLevel.EventsServer/Model/EventAfterEvent.cs
+++ /dev/null
@@ -1,10 +0,0 @@
-namespace ZeroLevel.EventServer.Model
-{
- public class EventAfterEvent
- : BaseEvent
- {
- public long EventId { get; set; }
-
- public Condition Confition { get; set; }
- }
-}
diff --git a/ZeroLevel.EventsServer/Model/EventAfterEvents.cs b/ZeroLevel.EventsServer/Model/EventAfterEvents.cs
deleted file mode 100644
index 9229d1a..0000000
--- a/ZeroLevel.EventsServer/Model/EventAfterEvents.cs
+++ /dev/null
@@ -1,12 +0,0 @@
-using System.Collections.Generic;
-
-namespace ZeroLevel.EventServer.Model
-{
- public class EventAfterEvents
- : BaseEvent
- {
- public IEnumerable EventIds { get; set; }
-
- public Condition Confition { get; set; }
- }
-}
diff --git a/ZeroLevel.EventsServer/Model/EventInfoRecord.cs b/ZeroLevel.EventsServer/Model/EventInfoRecord.cs
deleted file mode 100644
index 207e339..0000000
--- a/ZeroLevel.EventsServer/Model/EventInfoRecord.cs
+++ /dev/null
@@ -1,13 +0,0 @@
-namespace ZeroLevel.EventServer
-{
- public class EventInfoRecord
- {
- public long EventId { get; set; }
-
- public string ServiceKey { get; set; }
- // OR
- public string ServiceEndpoint { get; set; }
-
- public string Inbox { get; set; }
- }
-}
diff --git a/ZeroLevel.EventsServer/Model/EventResult.cs b/ZeroLevel.EventsServer/Model/EventResult.cs
deleted file mode 100644
index 406a0c0..0000000
--- a/ZeroLevel.EventsServer/Model/EventResult.cs
+++ /dev/null
@@ -1,10 +0,0 @@
-namespace ZeroLevel.EventServer.Model
-{
- public class EventResult
- {
- public long EventId;
- public EventResultState State;
- public long StartTimestamp;
- public long EndTimestamp;
- }
-}
diff --git a/ZeroLevel.EventsServer/Model/EventResultState.cs b/ZeroLevel.EventsServer/Model/EventResultState.cs
deleted file mode 100644
index 7c418b5..0000000
--- a/ZeroLevel.EventsServer/Model/EventResultState.cs
+++ /dev/null
@@ -1,9 +0,0 @@
-namespace ZeroLevel.EventServer.Model
-{
- public enum EventResultState
- {
- InProgress,
- Success,
- Unsuccess
- }
-}
diff --git a/ZeroLevel.EventsServer/Model/EventType.cs b/ZeroLevel.EventsServer/Model/EventType.cs
deleted file mode 100644
index 85f2688..0000000
--- a/ZeroLevel.EventsServer/Model/EventType.cs
+++ /dev/null
@@ -1,11 +0,0 @@
-namespace ZeroLevel.EventServer.Model
-{
- public enum EventType
- : int
- {
- OneType = 0,
- Periodic = 1,
- EventTrigger = 2,
- EventsTrigger = 3
- }
-}
diff --git a/ZeroLevel.EventsServer/Model/OneTimeEvent.cs b/ZeroLevel.EventsServer/Model/OneTimeEvent.cs
deleted file mode 100644
index 590c6b5..0000000
--- a/ZeroLevel.EventsServer/Model/OneTimeEvent.cs
+++ /dev/null
@@ -1,10 +0,0 @@
-using System;
-
-namespace ZeroLevel.EventServer.Model
-{
- public class OneTimeEvent
- : BaseEvent
- {
- public TimeSpan Period { get; set; }
- }
-}
diff --git a/ZeroLevel.EventsServer/Model/PeriodicTimeEvent.cs b/ZeroLevel.EventsServer/Model/PeriodicTimeEvent.cs
deleted file mode 100644
index c165591..0000000
--- a/ZeroLevel.EventsServer/Model/PeriodicTimeEvent.cs
+++ /dev/null
@@ -1,10 +0,0 @@
-using System;
-
-namespace ZeroLevel.EventServer.Model
-{
- public class PeriodicTimeEvent
- : BaseEvent
- {
- public TimeSpan Period { get; set; }
- }
-}
diff --git a/ZeroLevel.EventsServer/Program.cs b/ZeroLevel.EventsServer/Program.cs
deleted file mode 100644
index 62d2c7c..0000000
--- a/ZeroLevel.EventsServer/Program.cs
+++ /dev/null
@@ -1,15 +0,0 @@
-namespace ZeroLevel.EventServer
-{
- class Program
- {
- static void Main(string[] args)
- {
- Bootstrap.Startup(args, configuration: () => Configuration.ReadOrEmptySetFromIniFile("config.ini"))
- .EnableConsoleLog()
- .UseDiscovery()
- .Run()
- .WaitWhileStatus(ZeroServiceStatus.Running);
- Bootstrap.Shutdown();
- }
- }
-}
diff --git a/ZeroLevel.EventsServer/ZeroLevel.EventsServer.csproj b/ZeroLevel.EventsServer/ZeroLevel.EventsServer.csproj
deleted file mode 100644
index c1cb915..0000000
--- a/ZeroLevel.EventsServer/ZeroLevel.EventsServer.csproj
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
-
- net6.0
- AnyCPU;x64;x86
-
-
-
-
-
-
-
diff --git a/ZeroLevel.HNSW/Model/Histogram.cs b/ZeroLevel.HNSW/Model/Histogram.cs
deleted file mode 100644
index f89ec1a..0000000
--- a/ZeroLevel.HNSW/Model/Histogram.cs
+++ /dev/null
@@ -1,287 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-
-namespace ZeroLevel.HNSW
-{
- public class HistogramValue
- {
- public int Index { get; internal set; }
- public int Value { get; internal set; }
- public float MinBound { get; internal set; }
- public float MaxBound { get; internal set; }
- }
-
- public class Histogram
- {
- public HistogramMode Mode { get; }
- public float Min { get; }
- public float Max { get; }
- public float BoundsPeriod { get; }
- public float[] Bounds { get; }
- public int[] Values { get; }
-
- public Histogram(HistogramMode mode, IEnumerable data)
- {
- Mode = mode;
- Min = data.Min();
- Max = data.Max();
- int count = data.Count();
- int M = mode == HistogramMode.LOG ? (int)(1f + 3.2f * Math.Log(count)) : (int)(Math.Sqrt(count));
- BoundsPeriod = (Max - Min) / M;
- Bounds = new float[M - 1];
-
- float bound = Min + BoundsPeriod;
- for (int i = 0; i < Bounds.Length; i++)
- {
- Bounds[i] = bound;
- bound += BoundsPeriod;
- }
- Values = new int[M];
- for (int i = 0; i < Values.Length; i++)
- {
- Values[i] = 0;
- }
- foreach (var v in data)
- {
- if (v < float.Epsilon) continue;
- for (int i = 0; i < Bounds.Length; i++)
- {
- if (v < Bounds[i])
- {
- Values[i]++;
- break;
- }
- }
- }
- }
-
- public int Count => Values?.Length ?? 0;
-
- public int CountSignChanges()
- {
- if ((Values?.Length ?? 0) <= 2) return 0;
- int i = 0;
- while (Values[i] <= float.Epsilon) { i++; continue; }
- if ((Values.Length - i) <= 2) return 0;
-
- var delta = Values[i + 1] - Values[i];
- int changes = 0;
- i++;
- for (; i < Values.Length - 1; i++)
- {
- var d = Values[i + 1] - Values[i];
- if (Math.Abs(d) <= float.Epsilon)
- {
- continue;
- }
- if (NumbersHasSameSign(d, delta) == false)
- {
- delta = d;
- changes++;
- }
- }
- return changes;
- }
-
- public void Smooth()
- {
- var buffer = new int[Values.Length];
- Array.Copy(Values, buffer, buffer.Length);
- for (int i = 2; i < Values.Length - 3; i++)
- {
- Values[i] = (buffer[i - 2] + buffer[i - 1] + buffer[i] + buffer[i + 1] + buffer[i + 2]) / 5;
- }
- }
-
- public IEnumerable GetMaximums()
- {
- var list = new List();
-
- if ((Values?.Length ?? 0) <= 2) return list;
- int i = 0;
- while (Values[i] <= float.Epsilon) { i++; continue; }
- if ((Values.Length - i) <= 2) return list;
-
- var delta = Values[i + 1] - Values[i];
- i++;
- for (; i < Values.Length - 1; i++)
- {
- var d = Values[i + 1] - Values[i];
- if (Math.Abs(d) <= float.Epsilon)
- {
- continue;
- }
- if (NumbersHasSameSign(d, delta) == false)
- {
- if (delta > 0)
- {
- list.Add(new HistogramValue
- {
- Index = i,
- Value = Values[i],
- MinBound = Bounds[i - 1],
- MaxBound = Bounds[i]
- });
- }
- delta = d;
- }
- }
- return list;
- }
-
- #region OTSU "https://en.wikipedia.org/wiki/Otsu's_method"
- // function is used to compute the q values in the equation
- private float Px(int init, int end)
- {
- int sum = 0;
- int i;
- for (i = init; i < end; i++)
- sum += Values[i];
- return (float)sum;
- }
- // function is used to compute the mean values in the equation (mu)
- private float Mx(int init, int end)
- {
- int sum = 0;
- int i;
- for (i = init; i < end; i++)
- sum += i * Values[i];
-
- return (float)sum;
- }
- /*
- public int OTSU()
- {
- float p1, p2, p12;
- int k;
- int threshold = 0;
- float bcv = 0;
- for (k = 0; k < Values.Length; k++)
- {
- p1 = Px(0, k);
- p2 = Px(k + 1, Values.Length);
- p12 = p1 * p2;
- if (p12 == 0)
- p12 = 1;
- float diff = (Mx(0, k) * p2) - (Mx(k + 1, Values.Length) * p1);
- var test = (float)diff * diff / p12;
- if (test > bcv)
- {
- bcv = test;
- threshold = k;
- }
- }
- return threshold;
- }
- */
- /*
-1. Градиент V[I] - V[i-1]
-2. Походы окнами от 1 и выше, пока не сойдется к бимодальности
-3. Найти cutoff как минимум между пиками
-
-Modes = 0
-W = 1
-D = [V.count1]
-Maxes = []
-For I in [1..V.count]
- D= V[I] - V[i-1]
-do
-
-Modes = 0
-S = +1
-do
- for wnd in D
- if wnd.sum > 0 & S < 0
- S = +1
- Elif wnd.sum < 0 & S > 0
- Maxes.push(wnd.maxindex)
- Modes ++
- S = -1
-W++
-while Modes > 2
-If Modes == 2
-Cutoff = Maxes[0]
-Min = V[I]
-For I=Maxes[0] to Maxes[1]
- if V[I] < Min
- Min = V[I]
- Cutoff = i
- */
-
- public int CuttOff()
- {
- if (Values.Length > 1)
- {
- var grad = new int[Values.Length];
- grad[0] = 0;
- grad[1] = 0;
- for (int k = 2; k < Values.Length; k++)
- {
- grad[k - 1] = Values[k] - Values[k - 1];
- }
- var modes = 0;
- var window = 0;
- var sign = 1;
- var sum = 0;
- var max = 0;
- var maxInd = 0;
- var maxes = new List();
- do
- {
- maxes.Clear();
- window++;
- modes = 0;
- sum = 0;
- for (int i = 0; i < grad.Length; i += window)
- {
- sum = grad[i];
- max = Values[i];
- maxInd = i;
- for (var w = 1; w < window && (i + w) < grad.Length; w++)
- {
- sum += grad[i + w];
- if (Values[i + w] > max)
- {
- max = Values[i + w];
- maxInd = i + w;
- }
- }
- if (sum > 0 && sign < 0)
- {
- sign = 1;
- }
- else if (sum < 0 && sign > 0)
- {
- modes++;
- maxes.Add(maxInd);
- sign = -1;
- }
- }
- } while (modes > 2);
- if (modes == 2)
- {
- var cutoff = maxes[0];
- var min = Values[cutoff];
- for (int i = maxes[0] + 1; i < maxes[1]; i++)
- {
- if (Values[i] < min)
- {
- cutoff = i;
- min = Values[i];
- }
- }
- return cutoff;
- }
- }
- return -1;
- }
-
- #endregion
-
- static bool NumbersHasSameSign(int left, int right)
- {
- return left >= 0 && right >= 0 || left < 0 && right < 0;
- }
- }
-}
diff --git a/ZeroLevel.HNSW/Model/HistogramMode.cs b/ZeroLevel.HNSW/Model/HistogramMode.cs
deleted file mode 100644
index b897d0d..0000000
--- a/ZeroLevel.HNSW/Model/HistogramMode.cs
+++ /dev/null
@@ -1,14 +0,0 @@
-namespace ZeroLevel.HNSW
-{
- public enum HistogramMode
- {
- ///
- /// 1 + 3.2 * Ln(LinksCount)
- ///
- SQRT,
- ///
- /// Sqrt(LinksCount)
- ///
- LOG
- }
-}
diff --git a/ZeroLevel.HNSW/Model/NSWOptions.cs b/ZeroLevel.HNSW/Model/NSWOptions.cs
deleted file mode 100644
index caf47a0..0000000
--- a/ZeroLevel.HNSW/Model/NSWOptions.cs
+++ /dev/null
@@ -1,53 +0,0 @@
-using System;
-
-namespace ZeroLevel.HNSW
-{
- public sealed class NSWOptions
- {
- ///
- /// Max node connections on Layer
- ///
- public readonly int M;
- ///
- /// Max search buffer
- ///
- public readonly int EF;
- ///
- /// Max search buffer for inserting
- ///
- public readonly int EFConstruction;
-
- public static NSWOptions Create(int v1, int v2, int v3, int v4, Func l2Euclidean, object selectionHeuristic)
- {
- throw new NotImplementedException();
- }
-
- ///
- /// Distance function beetween vectors
- ///
- public readonly Func Distance;
-
- public readonly int LayersCount;
-
-
- private NSWOptions(int layersCount,
- int m,
- int ef,
- int ef_construction,
- Func distance)
- {
- LayersCount = layersCount;
- M = m;
- EF = ef;
- EFConstruction = ef_construction;
- Distance = distance;
- }
-
- public static NSWOptions Create(int layersCount,
- int M,
- int EF,
- int EF_construction,
- Func distance) =>
- new NSWOptions(layersCount, M, EF, EF_construction, distance);
- }
-}
diff --git a/ZeroLevel.HNSW/Model/SearchContext.cs b/ZeroLevel.HNSW/Model/SearchContext.cs
deleted file mode 100644
index a80597c..0000000
--- a/ZeroLevel.HNSW/Model/SearchContext.cs
+++ /dev/null
@@ -1,126 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Runtime.CompilerServices;
-
-namespace ZeroLevel.HNSW
-{
- public enum Mode
- {
- None,
- ActiveCheck,
- InactiveCheck,
- ActiveInactiveCheck
- }
-
- public sealed class SearchContext
- {
- ///
- /// Список номеров которые разрешены к добавлению итогового результата, если поиск ведется в ограниченном наборе точек (например, после предварительной фильтрации)
- ///
- private HashSet _activeNodes;
- ///
- /// Список точек с которых начинается поиск в графе для расширения
- ///
- private HashSet _entryNodes;
- ///
- /// Режим работы алгоритма расширения, зависящий от того заданы ли ограничения в точках, и заданы ли точки начала поиска
- ///
- private Mode _mode;
-
- public Mode NodeCheckMode => _mode;
- public double PercentInTotal { get; private set; } = 0;
- public long AvaliableNodesCount => _activeNodes?.Count ?? 0;
-
- public SearchContext()
- {
- _mode = Mode.None;
- }
-
- ///
- /// Расчет процентного содержания точек доступных для использования в данном контексте, по отношению к общему количеству точек
- ///
- public SearchContext CaclulatePercentage(long total)
- {
- if ((_mode == Mode.ActiveCheck || _mode == Mode.ActiveInactiveCheck) && total > 0)
- {
- PercentInTotal = ((_activeNodes?.Count ?? 0 * 100d) / (double)total) / 100.0d;
- }
- return this;
- }
-
- public SearchContext SetPercentage(double percent)
- {
- PercentInTotal = percent;
- return this;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private bool _isActiveNode(int nodeId) => _activeNodes?.Contains(nodeId) ?? false;
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private bool _isEntryNode(int nodeId) => _entryNodes?.Contains(nodeId) ?? false;
-
-
- ///
- /// Проверка, подходит ли указанная точка для включения в набор расширения
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal bool IsActiveNode(int nodeId)
- {
- switch (_mode)
- {
- // Если задан набор разрешенных к использованию точек, проверяется вхождение в него
- case Mode.ActiveCheck: return _isActiveNode(nodeId);
- // Если задан набор точек начала поиска, проверка невхождения точки в него
- case Mode.InactiveCheck: return _isEntryNode(nodeId) == false;
- // Если задан и ограничивающий и начальный наборы точек, проверка и на ограничение и на невхождение в начальный набор
- case Mode.ActiveInactiveCheck: return false == _isEntryNode(nodeId) && _isActiveNode(nodeId);
- }
- return nodeId >= 0;
- }
-
- public IEnumerable EntryPoints => _entryNodes;
-
- public SearchContext SetActiveNodes(IEnumerable activeNodes)
- {
- if (activeNodes != null && activeNodes.Any())
- {
- if (_mode == Mode.ActiveCheck || _mode == Mode.ActiveInactiveCheck)
- {
- throw new InvalidOperationException("Active nodes are already defined");
- }
- _activeNodes = new HashSet(activeNodes);
- if (_mode == Mode.None)
- {
- _mode = Mode.ActiveCheck;
- }
- else if (_mode == Mode.InactiveCheck)
- {
- _mode = Mode.ActiveInactiveCheck;
- }
- }
- return this;
- }
-
- public SearchContext SetEntryPointsNodes(IEnumerable entryNodes)
- {
- if (entryNodes != null && entryNodes.Any())
- {
- if (_mode == Mode.InactiveCheck || _mode == Mode.ActiveInactiveCheck)
- {
- throw new InvalidOperationException("Inctive nodes are already defined");
- }
- _entryNodes = new HashSet(entryNodes);
- if (_mode == Mode.None)
- {
- _mode = Mode.InactiveCheck;
- }
- else if (_mode == Mode.ActiveCheck)
- {
- _mode = Mode.ActiveInactiveCheck;
- }
- }
- return this;
- }
- }
-}
diff --git a/ZeroLevel.HNSW/PHNSW/HLevel.cs b/ZeroLevel.HNSW/PHNSW/HLevel.cs
deleted file mode 100644
index ad2fc7a..0000000
--- a/ZeroLevel.HNSW/PHNSW/HLevel.cs
+++ /dev/null
@@ -1,61 +0,0 @@
-using System;
-
-namespace ZeroLevel.HNSW.PHNSW
-{
- internal class HLevel
- : IPHNSWLevel
- {
- private readonly float _distance;
- public HLevel(float distance)
- {
- _distance = distance;
- }
-
- public Node Node { get; set; } = null;
- public IPHNSWLevel NextLevelA { get; set; }
- public IPHNSWLevel NextLevelB { get; set; }
-
- private float _abDistance = float.MinValue;
-
- public void Add(Node node)
- {
- if (NextLevelA.Node == null) { NextLevelA.Node = node; }
- else if (NextLevelB.Node == null)
- {
- NextLevelB.Node = node;
- _abDistance = PHNSWMetric.CosineDistance(NextLevelA.Node.Vector, NextLevelB.Node.Vector);
- }
- else
- {
- var an = PHNSWMetric.CosineDistance(NextLevelA.Node.Vector, node.Vector);
- var bn = PHNSWMetric.CosineDistance(NextLevelB.Node.Vector, node.Vector);
-
- var abDiff = Math.Abs(_distance - _abDistance);
- var anDiff = Math.Abs(_distance - an);
- var bnDiff = Math.Abs(_distance - bn);
-
- if (abDiff < anDiff && abDiff < bnDiff)
- {
- if (an < bn)
- {
- NextLevelA.Add(node);
- }
- else
- {
- NextLevelB.Add(node);
- }
- }
- else if (anDiff < bnDiff && anDiff < abDiff)
- {
- NextLevelA.Node = node;
- NextLevelA.Add(node);
- }
- else
- {
- NextLevelB.Node = node;
- NextLevelB.Add(node);
- }
- }
- }
- }
-}
diff --git a/ZeroLevel.HNSW/PHNSW/IPHNSWLevel.cs b/ZeroLevel.HNSW/PHNSW/IPHNSWLevel.cs
deleted file mode 100644
index 64dfba9..0000000
--- a/ZeroLevel.HNSW/PHNSW/IPHNSWLevel.cs
+++ /dev/null
@@ -1,8 +0,0 @@
-namespace ZeroLevel.HNSW.PHNSW
-{
- public interface IPHNSWLevel
- {
- void Add(IPHNSWLevel prevLayer, Node node);
- Node Node { get; internal set; }
- }
-}
diff --git a/ZeroLevel.HNSW/PHNSW/Node.cs b/ZeroLevel.HNSW/PHNSW/Node.cs
deleted file mode 100644
index 1ef51bd..0000000
--- a/ZeroLevel.HNSW/PHNSW/Node.cs
+++ /dev/null
@@ -1,12 +0,0 @@
-using ZeroLevel.DocumentObjectModel.Flow;
-
-namespace ZeroLevel.HNSW.PHNSW
-{
- public class Node
- {
- public float[] Vector { get; set; }
- public TPayload Payload { get; set; }
-
- public List> Neighbors { get; }
- }
-}
diff --git a/ZeroLevel.HNSW/PHNSW/PHNSWBuilder.cs b/ZeroLevel.HNSW/PHNSW/PHNSWBuilder.cs
deleted file mode 100644
index 6351ba5..0000000
--- a/ZeroLevel.HNSW/PHNSW/PHNSWBuilder.cs
+++ /dev/null
@@ -1,35 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-
-namespace ZeroLevel.HNSW.PHNSW
-{
- public static class PHNSWBuilder
- {
- public static IPHNSWLevel Build(int levels)
- {
- var distance = 0.33f;
- var root = new HLevel(distance);
- var horizontalLayers = new List>(new[] { root });
- for (var i = 0; i < levels; i++)
- {
- distance /= 2.0f;
- var nextList = new List>();
- foreach (var layer in horizontalLayers)
- {
- var a = new HLevel(distance);
- var b = new HLevel(distance);
- layer.NextLevelA = a;
- layer.NextLevelB = b;
- nextList.Add(a);
- nextList.Add(b);
- }
- horizontalLayers = nextList;
- }
- var uwLevel = new UWLevel();
-
- }
- }
-}
diff --git a/ZeroLevel.HNSW/PHNSW/PHNSWMetric.cs b/ZeroLevel.HNSW/PHNSW/PHNSWMetric.cs
deleted file mode 100644
index f6b7dd6..0000000
--- a/ZeroLevel.HNSW/PHNSW/PHNSWMetric.cs
+++ /dev/null
@@ -1,28 +0,0 @@
-using System;
-
-namespace ZeroLevel.HNSW.PHNSW
-{
- internal static class PHNSWMetric
- {
- internal static float CosineDistance(float[] u, float[] v)
- {
- if (u.Length != v.Length)
- {
- throw new ArgumentException("Vectors have non-matching dimensions");
- }
-
- float dot = 0.0f;
- float nru = 0.0f;
- float nrv = 0.0f;
- for (int i = 0; i < u.Length; ++i)
- {
- dot += u[i] * v[i];
- nru += u[i] * u[i];
- nrv += v[i] * v[i];
- }
-
- var similarity = dot / (float)(Math.Sqrt(nru) * Math.Sqrt(nrv));
- return 1 - similarity;
- }
- }
-}
diff --git a/ZeroLevel.HNSW/Services/AutomaticGraphClusterer.cs b/ZeroLevel.HNSW/Services/AutomaticGraphClusterer.cs
deleted file mode 100644
index e8e0d9c..0000000
--- a/ZeroLevel.HNSW/Services/AutomaticGraphClusterer.cs
+++ /dev/null
@@ -1,150 +0,0 @@
-using System;
-using System.Collections;
-using System.Collections.Generic;
-using System.Linq;
-
-namespace ZeroLevel.HNSW.Services
-{
- public class Cluster
- : IEnumerable
- {
- private HashSet _elements = new HashSet();
-
- public int Count => _elements.Count;
-
- public bool Contains(int id) => _elements.Contains(id);
-
- public bool Add(int id) => _elements.Add(id);
-
- public IEnumerator GetEnumerator()
- {
- return _elements.GetEnumerator();
- }
-
- IEnumerator IEnumerable.GetEnumerator()
- {
- return _elements.GetEnumerator();
- }
-
- public void Merge(Cluster cluster)
- {
- foreach (var e in cluster)
- {
- this._elements.Add(e);
- }
- }
-
- public float MaxDistance(Func distance, Cluster other)
- {
- var max = float.MinValue;
- foreach (var e in this._elements)
- {
- foreach (var o in other)
- {
- var d = distance(e, o);
- if (d > max)
- {
- max = d;
- }
- }
- }
- return max;
- }
-
- public float MinDistance(Func distance, Cluster other)
- {
- var min = float.MaxValue;
- foreach (var e in this._elements)
- {
- foreach (var o in other)
- {
- var d = distance(e, o);
- if (d < min)
- {
- min = d;
- }
- }
- }
- return min;
- }
-
- public float AvgDistance(Func distance, Cluster other)
- {
- var dist = new List();
- foreach (var e in this._elements)
- {
- foreach (var o in other)
- {
- dist.Add(distance(e, o));
- }
- }
- return dist.Average();
- }
- }
-
- public static class AutomaticGraphClusterer
- {
- private class Link
- {
- public int Id1;
- public int Id2;
- public float Distance;
- }
-
- public static List DetectClusters(SmallWorld world)
- {
- var distance = world.DistanceFunction;
- var links = world.GetLinks().SelectMany(pair => pair.Value.Select(id => new Link { Id1 = pair.Key, Id2 = id, Distance = distance(pair.Key, id) })).ToList();
-
- // 1. Find R - bound between intra-cluster distances and out-of-cluster distances
- var histogram = new Histogram(HistogramMode.LOG, links.Select(l => l.Distance));
- int threshold = histogram.CuttOff();
- var min = histogram.Bounds[threshold - 1];
- var max = histogram.Bounds[threshold];
- var R = (max + min) / 2;
-
-
- // 2. Get links with distances less than R
- var resultLinks = new List();
- foreach (var l in links)
- {
- if (l.Distance < R)
- {
- resultLinks.Add(l);
- }
- }
-
- // 3. Extract clusters
- List clusters = new List();
- foreach (var l in resultLinks)
- {
- var id1 = l.Id1;
- var id2 = l.Id2;
- bool found = false;
- foreach (var c in clusters)
- {
- if (c.Contains(id1))
- {
- c.Add(id2);
- found = true;
- break;
- }
- else if (c.Contains(id2))
- {
- c.Add(id1);
- found = true;
- break;
- }
- }
- if (found == false)
- {
- var c = new Cluster();
- c.Add(id1);
- c.Add(id2);
- clusters.Add(c);
- }
- }
- return clusters;
- }
- }
-}
diff --git a/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs b/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs
deleted file mode 100644
index 5e4a99e..0000000
--- a/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs
+++ /dev/null
@@ -1,244 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Threading;
-using ZeroLevel.Services.Serialization;
-
-namespace ZeroLevel.HNSW
-{
- internal sealed class CompactBiDirectionalLinksSet
- : IBinarySerializable, IDisposable
- {
- private readonly ReaderWriterLockSlim _rwLock = new ReaderWriterLockSlim();
-
- private const int HALF_LONG_BITS = 32;
-
- private SortedList _set = new SortedList();
-
- internal SortedList Links => _set;
-
- internal (int, int) this[int index]
- {
- get
- {
- var k = _set.Keys[index];
- var id1 = (int)(k >> HALF_LONG_BITS);
- var id2 = (int)(k - (((long)id1) << HALF_LONG_BITS));
- return (id1, id2);
- }
- }
-
- internal int Count => _set.Count;
-
- internal IEnumerable<(int, int, float)> FindLinksForId(int id)
- {
- _rwLock.EnterReadLock();
- try
- {
- if (_set.Count == 1)
- {
- var k = _set.Keys[0];
- var v = _set[k];
- var id1 = (int)(k >> HALF_LONG_BITS);
- var id2 = (int)(k - (((long)id1) << HALF_LONG_BITS));
- if (id1 == id) yield return (id, id2, v);
- else if (id2 == id) yield return (id1, id, v);
- }
- else if (_set.Count > 1)
- {
- foreach (var (k, v) in Search(_set, id))
- {
- var id1 = (int)(k >> HALF_LONG_BITS);
- var id2 = (int)(k - (((long)id1) << HALF_LONG_BITS));
- yield return (id1, id2, v);
- }
- }
- }
- finally
- {
- _rwLock.ExitReadLock();
- }
- }
-
- internal IEnumerable<(int, int, float)> Items()
- {
- _rwLock.EnterReadLock();
- try
- {
- foreach (var pair in _set)
- {
- var id1 = (int)(pair.Key >> HALF_LONG_BITS);
- var id2 = (int)(pair.Key - (((long)id1) << HALF_LONG_BITS));
- yield return (id1, id2, pair.Value);
- }
- }
- finally
- {
- _rwLock.ExitReadLock();
- }
- }
-
- internal void RemoveIndex(int id1, int id2)
- {
- long k1 = (((long)(id1)) << HALF_LONG_BITS) + id2;
- long k2 = (((long)(id2)) << HALF_LONG_BITS) + id1;
- _rwLock.EnterWriteLock();
- try
- {
- if (_set.ContainsKey(k1))
- {
- _set.Remove(k1);
- }
- if (_set.ContainsKey(k2))
- {
- _set.Remove(k2);
- }
- }
- finally
- {
- _rwLock.ExitWriteLock();
- }
- }
-
- internal bool Add(int id1, int id2, float distance)
- {
- _rwLock.EnterWriteLock();
- try
- {
- long k1 = (((long)(id1)) << HALF_LONG_BITS) + id2;
- long k2 = (((long)(id2)) << HALF_LONG_BITS) + id1;
- if (_set.ContainsKey(k1) == false)
- {
- _set.Add(k1, distance);
- if (k1 != k2)
- {
- _set.Add(k2, distance);
- }
- return true;
- }
- }
- finally
- {
- _rwLock.ExitWriteLock();
- }
- return false;
- }
-
- /*
-
-function binary_search(A, n, T) is
- L := 0
- R := n − 1
- while L ≤ R do
- m := floor((L + R) / 2)
- if A[m] < T then
- L := m + 1
- else if A[m] > T then
- R := m − 1
- else:
- return m
- return unsuccessful
-
- */
-
- private static IEnumerable<(long, float)> Search(SortedList set, int index)
- {
- long k = ((long)index) << HALF_LONG_BITS; // T
- int left = 0;
- int right = set.Count - 1;
- int mid;
- long test;
- while (left <= right)
- {
- mid = (int)Math.Floor((right + left) / 2d);
- test = (set.Keys[mid] >> HALF_LONG_BITS) << HALF_LONG_BITS; // A[m]
-
- if (test < k)
- {
- left = mid + 1;
- }
- else if (test > k)
- {
- right = mid - 1;
- }
- else
- {
- return SearchByPosition(set, k, mid);
- }
- }
- return Enumerable.Empty<(long, float)>();
- }
-
- private static IEnumerable<(long, float)> SearchByPosition(SortedList set, long k, int position)
- {
- var start = position;
- var end = position;
- do
- {
- position--;
- } while (position >= 0 && ((set.Keys[position] >> HALF_LONG_BITS) << HALF_LONG_BITS) == k);
- start = position + 1;
- position = end + 1;
- while (position < set.Count && ((set.Keys[position] >> HALF_LONG_BITS) << HALF_LONG_BITS) == k)
- {
- position++;
- }
- end = position - 1;
- for (int i = start; i <= end; i++)
- {
- yield return (set.Keys[i], set.Values[i]);
- }
- }
-
- public Histogram CalculateHistogram(HistogramMode mode)
- {
- return new Histogram(mode, _set.Values);
- }
-
- internal float Distance(int id1, int id2)
- {
- long k = (((long)(id1)) << HALF_LONG_BITS) + id2;
- if (_set.ContainsKey(k))
- {
- return _set[k];
- }
- return float.MaxValue;
- }
-
- public void Dispose()
- {
- _rwLock.Dispose();
- _set.Clear();
- _set = null;
- }
-
- public void Serialize(IBinaryWriter writer)
- {
- writer.WriteBoolean(true); // true - set with weights
- writer.WriteInt32(_set.Count);
- foreach (var record in _set)
- {
- writer.WriteLong(record.Key);
- writer.WriteFloat(record.Value);
- }
- }
-
- public void Deserialize(IBinaryReader reader)
- {
- if (reader.ReadBoolean() == false)
- {
- throw new InvalidOperationException("Incompatible data format. The set does not contain weights.");
- }
- _set.Clear();
- _set = null;
- var count = reader.ReadInt32();
- _set = new SortedList(count + 1);
- for (int i = 0; i < count; i++)
- {
- var key = reader.ReadLong();
- var value = reader.ReadFloat();
- _set.Add(key, value);
- }
- }
- }
-}
diff --git a/ZeroLevel.HNSW/Services/HNSWMap.cs b/ZeroLevel.HNSW/Services/HNSWMap.cs
deleted file mode 100644
index 0f21705..0000000
--- a/ZeroLevel.HNSW/Services/HNSWMap.cs
+++ /dev/null
@@ -1,84 +0,0 @@
-using System.Collections.Generic;
-using System.IO;
-using ZeroLevel.Services.Serialization;
-
-namespace ZeroLevel.HNSW
-{
- // object -> vector -> vectorId
- // HNSW vectorId + vector
- // Map object feature - vectorId
- public class HNSWMap
- : IBinarySerializable
- {
- private Dictionary _map;
- private Dictionary _reverse_map;
-
- public int this[TFeature feature] => _map.GetValueOrDefault(feature);
-
- public HNSWMap() { }
- public HNSWMap(int capacity = -1)
- {
- if (capacity > 0)
- {
- _map = new Dictionary(capacity);
- _reverse_map = new Dictionary(capacity);
- }
- else
- {
- _map = new Dictionary();
- _reverse_map = new Dictionary();
-
- }
- }
-
- public HNSWMap(Stream stream)
- {
- using (var reader = new MemoryStreamReader(stream))
- {
- Deserialize(reader);
- }
- }
-
- public void Append(TFeature feature, int vectorId)
- {
- _map[feature] = vectorId;
- _reverse_map[vectorId] = feature;
- }
-
- public IEnumerable ConvertFeaturesToIds(IEnumerable features)
- {
- int id;
- foreach (var feature in features)
- {
- if (_map.TryGetValue(feature, out id))
- {
- yield return id;
- }
- }
- }
-
- public IEnumerable ConvertIdsToFeatures(IEnumerable ids)
- {
- TFeature feature;
- foreach (var id in ids)
- {
- if (_reverse_map.TryGetValue(id, out feature))
- {
- yield return feature;
- }
- }
- }
-
- public void Deserialize(IBinaryReader reader)
- {
- this._map = reader.ReadDictionary();
- this._reverse_map = reader.ReadDictionary();
- }
-
- public void Serialize(IBinaryWriter writer)
- {
- writer.WriteDictionary(this._map);
- writer.WriteDictionary(this._reverse_map);
- }
- }
-}
diff --git a/ZeroLevel.HNSW/Services/HNSWMappers.cs b/ZeroLevel.HNSW/Services/HNSWMappers.cs
deleted file mode 100644
index 8ac018e..0000000
--- a/ZeroLevel.HNSW/Services/HNSWMappers.cs
+++ /dev/null
@@ -1,142 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using ZeroLevel.Services.Serialization;
-
-namespace ZeroLevel.HNSW
-{
- public class HNSWMappers
- : IBinarySerializable
- {
- private IDictionary> _mappers;
- private readonly Func _bucketFunction;
-
- public HNSWMappers(string filePath, Func bucketFunction)
- {
- _bucketFunction = bucketFunction;
- using (var fs = File.OpenRead(filePath))
- {
- using (var bs = new BufferedStream(fs, 1024 * 1024 * 32))
- {
- using (var reader = new MemoryStreamReader(bs))
- {
- Deserialize(reader);
- }
- }
- }
- }
-
- public void Save(string filePath)
- {
- using (var fs = File.OpenWrite(filePath))
- {
- using (var bs = new BufferedStream(fs, 1024 * 1024 * 32))
- {
- using (var writer = new MemoryStreamWriter(bs))
- {
- Serialize(writer);
- }
- }
- }
- }
-
- public HNSWMappers(Func bucketFunction)
- {
- _mappers = new Dictionary>();
- _bucketFunction = bucketFunction;
- }
-
- public void Append(HNSWMap map, int c)
- {
- _mappers.Add(c, map);
- }
-
- public IEnumerable ConvertIdsToFeatures(int c, IEnumerable ids)
- {
- foreach (var feature in _mappers[c].ConvertIdsToFeatures(ids))
- {
- yield return feature;
- }
- }
-
- public IDictionary CreateContext(IEnumerable activeNodes, IEnumerable entryPoints)
- {
- var actives = new Dictionary>();
- var entries = new Dictionary>();
- if (activeNodes != null)
- {
- foreach (var node in activeNodes)
- {
- var c = _bucketFunction(node);
- if (_mappers.ContainsKey(c))
- {
- if (actives.ContainsKey(c) == false)
- {
- actives.Add(c, new List());
- }
- actives[c].Add(_mappers[c][node]);
- }
- else
- {
- Log.Warning($"Active node {node} is not included in graphs!");
- }
- }
- }
- if (entryPoints != null)
- {
- foreach (var entryPoint in entryPoints)
- {
- var c = _bucketFunction(entryPoint);
- if (_mappers.ContainsKey(c))
- {
- if (entries.ContainsKey(c) == false)
- {
- entries.Add(c, new List());
- }
- entries[c].Add(_mappers[c][entryPoint]);
- }
- else
- {
- Log.Warning($"Entry point {entryPoint} is not included in graphs!");
- }
- }
- }
- var result = new Dictionary();
- foreach (var pair in _mappers)
- {
- var active = actives.GetValueOrDefault(pair.Key);
- var entry = entries.GetValueOrDefault(pair.Key);
- result.Add(pair.Key, new SearchContext().SetActiveNodes(active).SetEntryPointsNodes(entry));
- }
- var total = result.Values.Sum(v => v.AvaliableNodesCount);
- if (total > 0)
- {
- foreach (var pair in result)
- {
- pair.Value.CaclulatePercentage(total);
- }
- }
- else
- {
- //total = result.Values.Sum(v => v.EntryPoints.Count());
- foreach (var pair in result)
- {
- //var p = (double)pair.Value.EntryPoints.Count() / (double)total;
- pair.Value.SetPercentage(0.2d);
- }
- }
- return result;
- }
-
- public void Deserialize(IBinaryReader reader)
- {
- this._mappers = reader.ReadDictionary>();
- }
-
- public void Serialize(IBinaryWriter writer)
- {
- writer.WriteDictionary>(this._mappers);
- }
- }
-}
diff --git a/ZeroLevel.HNSW/Services/LAL/LALGraph.cs b/ZeroLevel.HNSW/Services/LAL/LALGraph.cs
deleted file mode 100644
index 622f185..0000000
--- a/ZeroLevel.HNSW/Services/LAL/LALGraph.cs
+++ /dev/null
@@ -1,120 +0,0 @@
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using ZeroLevel.Services.Serialization;
-
-namespace ZeroLevel.HNSW
-{
- public class LALGraph
- : IBinarySerializable
- {
- private readonly LALLinks _links = new LALLinks();
-
- public LALGraph() { }
- public static LALGraph FromLALGraph(Stream stream)
- {
- var l = new LALGraph();
- l.Deserialize(stream);
- return l;
- }
-
- public static LALGraph FromHNSWGraph(Stream stream)
- {
- var l = new LALGraph();
- l.DeserializeFromHNSW(stream);
- return l;
- }
-
- public IEnumerable KNearest(int k, SearchContext context)
- {
- var v = new VisitedBitSet(_links.Count, 1);
- var C = new Queue();
- var W = new HashSet();
- var entryPoints = context.EntryPoints;
- var nextEntry = new HashSet();
- do
- {
- foreach (var ep in entryPoints)
- {
- var neighboursIds = _links.FindNeighbors(ep);
- for (int i = 0; i < neighboursIds.Length; ++i)
- {
- if (v.Contains(neighboursIds[i]) == false)
- {
- C.Enqueue(neighboursIds[i]);
- nextEntry.Add(neighboursIds[i]);
- }
- }
- v.Add(ep);
- }
- // run bfs
- while (C.Count > 0)
- {
- // get next candidate to check and expand
- var toExpand = C.Dequeue();
- if (context.IsActiveNode(toExpand))
- {
- if (W.Count < k)
- {
- W.Add(toExpand);
- if (W.Count > k)
- {
- W.Remove(W.First());
- }
- }
- }
- }
- entryPoints = nextEntry.Select(id => id).ToList();
- nextEntry.Clear();
- }
- while (W.Count < k && entryPoints.Any());
- C.Clear();
- v.Clear();
- return W;
- }
-
- public void Deserialize(Stream stream)
- {
- using (var reader = new MemoryStreamReader(stream))
- {
- _links.Deserialize(reader);
- }
- }
-
- public void DeserializeFromHNSW(Stream stream)
- {
- using (var reader = new MemoryStreamReader(stream))
- {
- reader.ReadInt32(); // EntryPoint
- reader.ReadInt32(); // MaxLayer
-
- int count = reader.ReadInt32(); // Vectors count
- for (int i = 0; i < count; i++)
- {
- var v = reader.ReadCompatible(); // Vector
- }
-
- var lc = reader.ReadInt32(); // countLayers
- _links.Deserialize(reader); // deserialize only base layer and skip another
- }
- }
-
- public void Serialize(Stream stream)
- {
- using (var writer = new MemoryStreamWriter(stream))
- {
- _links.Serialize(writer);
- }
- }
-
- public void Serialize(IBinaryWriter writer)
- {
- _links.Serialize(writer);
- }
-
- public void Deserialize(IBinaryReader reader)
- {
- _links.Deserialize(reader);
- }
- }
-}
diff --git a/ZeroLevel.HNSW/Services/LAL/LALLinks.cs b/ZeroLevel.HNSW/Services/LAL/LALLinks.cs
deleted file mode 100644
index 3a3ef5d..0000000
--- a/ZeroLevel.HNSW/Services/LAL/LALLinks.cs
+++ /dev/null
@@ -1,81 +0,0 @@
-using System.Collections.Concurrent;
-using System.Collections.Generic;
-using System.Linq;
-using ZeroLevel.Services.Serialization;
-
-namespace ZeroLevel.HNSW
-{
- internal class LALLinks
- : IBinarySerializable
- {
- private ConcurrentDictionary _set = new ConcurrentDictionary();
- internal IDictionary Links => _set;
-
- private readonly int[] _empty = new int[0];
- internal int Count => _set.Count;
-
- public LALLinks()
- {
- }
-
- internal IEnumerable<(int, int)> FindLinksForId(int id)
- {
- if (_set.ContainsKey(id))
- {
- return _set[id].Select(v => (id, v));
- }
- return Enumerable.Empty<(int, int)>();
- }
-
- internal int[] FindNeighbors(int id)
- {
- if (_set.ContainsKey(id))
- {
- return _set[id];
- }
- return _empty;
- }
-
- internal IEnumerable<(int, int)> Items()
- {
- return _set
- .SelectMany(pair => _set[pair.Key]
- .Select(v => (pair.Key, v)));
- }
-
- public void Dispose()
- {
- _set.Clear();
- _set = null;
- }
-
- public void Serialize(IBinaryWriter writer)
- {
- writer.WriteInt32(_set.Count);
- foreach (var record in _set)
- {
- writer.WriteInt32(record.Key);
- writer.WriteCollection(record.Value);
- }
- }
-
- public void Deserialize(IBinaryReader reader)
- {
- _set.Clear();
- _set = null;
- var count = reader.ReadInt32();
- _set = new ConcurrentDictionary(1, count);
-
- for (int i = 0; i < count; i++)
- {
- var id = reader.ReadInt32();
- var links_count = reader.ReadInt32();
- _set[id] = new int[links_count];
- for (int l = 0; l < links_count; l++)
- {
- _set[id][l] = reader.ReadInt32();
- }
- }
- }
- }
-}
diff --git a/ZeroLevel.HNSW/Services/LAL/SplittedLALGraph.cs b/ZeroLevel.HNSW/Services/LAL/SplittedLALGraph.cs
deleted file mode 100644
index 3c2ce70..0000000
--- a/ZeroLevel.HNSW/Services/LAL/SplittedLALGraph.cs
+++ /dev/null
@@ -1,79 +0,0 @@
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using ZeroLevel.Services.Serialization;
-
-namespace ZeroLevel.HNSW
-{
- public class SplittedLALGraph
- : IBinarySerializable
- {
- private IDictionary _graphs;
-
- public SplittedLALGraph()
- {
- _graphs = new Dictionary();
- }
-
- public SplittedLALGraph(string filePath)
- {
- using (var fs = File.OpenRead(filePath))
- {
- using (var bs = new BufferedStream(fs, 1024 * 1024 * 32))
- {
- using (var reader = new MemoryStreamReader(bs))
- {
- Deserialize(reader);
- }
- }
- }
- }
-
- public void Save(string filePath)
- {
- using (var fs = File.OpenWrite(filePath))
- {
- using (var bs = new BufferedStream(fs, 1024 * 1024 * 32))
- {
- using (var writer = new MemoryStreamWriter(bs))
- {
- Serialize(writer);
- }
- }
- }
- }
-
- public void Append(LALGraph graph, int c)
- {
- _graphs.Add(c, graph);
- }
-
- public IDictionary> KNearest(int k, IDictionary contexts)
- {
- var result = new Dictionary>();
- int step = 1;
- foreach (var graph in _graphs)
- {
- result.Add(graph.Key, new List());
- var context = contexts[graph.Key];
- if (context.EntryPoints != null)
- {
- var partial_k = 1 + (int)(context.PercentInTotal * k);
- var r = graph.Value.KNearest(partial_k, context) as HashSet;
- result[graph.Key].AddRange(r);
- }
- step++;
- }
- return result;
- }
-
- public void Serialize(IBinaryWriter writer)
- {
- writer.WriteDictionary(this._graphs);
- }
- public void Deserialize(IBinaryReader reader)
- {
- this._graphs = reader.ReadDictionary();
- }
- }
-}
diff --git a/ZeroLevel.HNSW/Services/Layer.cs b/ZeroLevel.HNSW/Services/Layer.cs
deleted file mode 100644
index f4489a1..0000000
--- a/ZeroLevel.HNSW/Services/Layer.cs
+++ /dev/null
@@ -1,470 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using ZeroLevel.HNSW.Services;
-using ZeroLevel.Services.Serialization;
-
-namespace ZeroLevel.HNSW
-{
- ///
- /// NSW graph
- ///
- internal sealed class Layer
- : IBinarySerializable
- {
- private readonly NSWOptions _options;
- private readonly VectorSet _vectors;
- private readonly LinksSet _links;
- public readonly int M;
- private readonly Dictionary connections;
- internal IDictionary> Links => _links.Links;
-
- ///
- /// There are links е the layer
- ///
- internal bool HasLinks => (_links.Count > 0);
-
- internal IEnumerable this[int vector_index] => _links.FindNeighbors(vector_index);
-
- ///
- /// HNSW layer
- ///
- /// Article: Section 4.1:
- /// "Selection of the Mmax0 (the maximum number of connections that an element can have in the zero layer) also
- /// has a strong influence on the search performance, especially in case of high quality(high recall) search.
- /// Simulations show that setting Mmax0 to M(this corresponds to kNN graphs on each layer if the neighbors
- /// selection heuristic is not used) leads to a very strong performance penalty at high recall.
- /// Simulations also suggest that 2∙M is a good choice for Mmax0;
- /// setting the parameter higher leads to performance degradation and excessive memory usage."
- ///
- ///
- /// HNSW graph options
- /// General vector set
- internal Layer(NSWOptions options, VectorSet vectors, bool nswLayer)
- {
- _options = options;
- _vectors = vectors;
- M = nswLayer ? 2 * _options.M : _options.M;
- _links = new LinksSet(M);
- connections = new Dictionary(M + 1);
- }
-
- internal int FindEntryPointAtLayer(Func targetCosts)
- {
- if (_links.Count == 0) return EntryPoint;
- var set = new HashSet(_links.Items().Select(p => p.Item1));
- int minId = -1;
- float minDist = float.MaxValue;
- foreach (var id in set)
- {
- var d = targetCosts(id);
- if (d < minDist && Math.Abs(d) > float.Epsilon)
- {
- minDist = d;
- minId = id;
- }
- }
- return minId;
- }
-
- internal void Push(int q, int ep, MinHeap W, Func distance)
- {
- if (HasLinks == false)
- {
- AddBidirectionallConnections(q, q);
- }
- else
- {
- // W ← SEARCH - LAYER(q, ep, efConstruction, lc)
- foreach (var i in KNearestAtLayer(ep, distance, _options.EFConstruction))
- {
- W.Push(i);
- }
-
- int count = 0;
- connections.Clear();
- while (count < M && W.Count > 0)
- {
- var nearest = W.Pop();
- var nearest_nearest = GetNeighbors(nearest.Item1).ToArray();
- if (nearest_nearest.Length < M)
- {
- if (AddBidirectionallConnections(q, nearest.Item1))
- {
- connections.Add(nearest.Item1, nearest.Item2);
- count++;
- }
- }
- else
- {
- if ((M - count) < 2)
- {
- // remove link q - max_q
- var max = connections.OrderBy(pair => pair.Value).First();
- RemoveBidirectionallConnections(q, max.Key);
- connections.Remove(max.Key);
- }
- // get nearest_nearest candidate
- var mn_id = -1;
- var mn_d = float.MinValue;
- for (int i = 0; i < nearest_nearest.Length; i++)
- {
- var d = _options.Distance(_vectors[nearest.Item1], _vectors[nearest_nearest[i]]);
- if (q != nearest_nearest[i] && connections.ContainsKey(nearest_nearest[i]) == false)
- {
- if (mn_id == -1 || d > mn_d)
- {
- mn_d = d;
- mn_id = nearest_nearest[i];
- }
- }
- }
- // remove link neareset - nearest_nearest
- RemoveBidirectionallConnections(nearest.Item1, mn_id);
- // add link q - neareset
- if (AddBidirectionallConnections(q, nearest.Item1))
- {
- connections.Add(nearest.Item1, nearest.Item2);
- count++;
- }
- // add link q - max_nearest_nearest
- if (AddBidirectionallConnections(q, mn_id))
- {
- connections.Add(mn_id, mn_d);
- count++;
- }
- }
- }
- }
- }
-
- internal void RemoveBidirectionallConnections(int q, int p)
- {
- _links.RemoveIndex(q, p);
- }
-
- internal bool AddBidirectionallConnections(int q, int p)
- {
- if (q == p)
- {
- if (EntryPoint >= 0)
- {
- return _links.Add(q, EntryPoint);
- }
- else
- {
- EntryPoint = q;
- }
- }
- else
- {
- return _links.Add(q, p);
- }
- return false;
- }
-
- private int EntryPoint = -1;
-
- #region Implementation of https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf
- ///
- /// Algorithm 2
- ///
- /// query element
- /// enter points ep
- /// Output: ef closest neighbors to q
- internal IEnumerable<(int, float)> KNearestAtLayer(int entryPointId, Func targetCosts, int ef)
- {
- /*
- * v ← ep // set of visited elements
- * C ← ep // set of candidates
- * W ← ep // dynamic list of found nearest neighbors
- * while │C│ > 0
- * c ← extract nearest element from C to q
- * f ← get furthest element from W to q
- * if distance(c, q) > distance(f, q)
- * break // all elements in W are evaluated
- * for each e ∈ neighbourhood(c) at layer lc // update C and W
- * if e ∉ v
- * v ← v ⋃ e
- * f ← get furthest element from W to q
- * if distance(e, q) < distance(f, q) or │W│ < ef
- * C ← C ⋃ e
- * W ← W ⋃ e
- * if │W│ > ef
- * remove furthest element from W to q
- * return W
- */
-
- int farthestId;
- float farthestDistance;
- var d = targetCosts(entryPointId);
-
- var v = new VisitedBitSet(_vectors.Count, _options.M);
- // * v ← ep // set of visited elements
- v.Add(entryPointId);
- // * C ← ep // set of candidates
- var C = new MinHeap(ef);
- C.Push((entryPointId, d));
- // * W ← ep // dynamic list of found nearest neighbors
- var W = new MaxHeap(ef + 1);
- W.Push((entryPointId, d));
-
- // * while │C│ > 0
- while (C.Count > 0)
- {
- // * c ← extract nearest element from C to q
- var c = C.Pop();
- // * f ← get furthest element from W to q
- // * if distance(c, q) > distance(f, q)
- if (W.TryPeek(out _, out farthestDistance) && c.Item2 > farthestDistance)
- {
- // * break // all elements in W are evaluated
- break;
- }
-
- // * for each e ∈ neighbourhood(c) at layer lc // update C and W
- foreach (var e in GetNeighbors(c.Item1))
- {
- // * if e ∉ v
- if (!v.Contains(e))
- {
- // * v ← v ⋃ e
- v.Add(e);
- // * f ← get furthest element from W to q
- W.TryPeek(out farthestId, out farthestDistance);
-
- var eDistance = targetCosts(e);
- // * if distance(e, q) < distance(f, q) or │W│ < ef
- if (W.Count < ef || (farthestId >= 0 && eDistance < farthestDistance))
- {
- // * C ← C ⋃ e
- C.Push((e, eDistance));
- // * W ← W ⋃ e
- W.Push((e, eDistance));
- // * if │W│ > ef
- if (W.Count > ef)
- {
- // * remove furthest element from W to q
- W.Pop();
- }
- }
- }
- }
- }
- C.Clear();
- v.Clear();
- return W;
- }
-
- internal IEnumerable<(int, float)> KNearestAtLayer(int entryPointId, Func targetCosts, int ef, SearchContext context)
- {
- int farthestId;
- float farthestDistance;
- var d = targetCosts(entryPointId);
-
- var v = new VisitedBitSet(_vectors.Count, _options.M);
- // * v ← ep // set of visited elements
- v.Add(entryPointId);
- // * C ← ep // set of candidates
- var C = new MinHeap(ef);
- C.Push((entryPointId, d));
- // * W ← ep // dynamic list of found nearest neighbors
- var W = new MaxHeap(ef + 1);
- if (context.IsActiveNode(entryPointId))
- {
- W.Push((entryPointId, d));
- }
-
- // * while │C│ > 0
- while (C.Count > 0)
- {
- // * c ← extract nearest element from C to q
- var c = C.Pop();
- // * f ← get furthest element from W to q
- // * if distance(c, q) > distance(f, q)
- if (W.TryPeek(out _, out farthestDistance) && c.Item2 > farthestDistance)
- {
- // * break // all elements in W are evaluated
- break;
- }
-
- // * for each e ∈ neighbourhood(c) at layer lc // update C and W
- foreach (var e in GetNeighbors(c.Item1))
- {
- // * if e ∉ v
- if (!v.Contains(e))
- {
- // * v ← v ⋃ e
- v.Add(e);
- // * f ← get furthest element from W to q
- W.TryPeek(out farthestId, out farthestDistance);
-
- var eDistance = targetCosts(e);
- // * if distance(e, q) < distance(f, q) or │W│ < ef
- if (W.Count < ef || (farthestId >= 0 && eDistance < farthestDistance))
- {
- // * C ← C ⋃ e
- C.Push((e, eDistance));
- // * W ← W ⋃ e
- if (context.IsActiveNode(e))
- {
- W.Push((e, eDistance));
- if (W.Count > ef)
- {
- W.Pop();
- }
- }
- }
- }
- }
- }
- C.Clear();
- v.Clear();
- return W;
- }
-
- ///
- /// Algorithm 2
- ///
- /// query element
- /// enter points ep
- /// Output: ef closest neighbors to q
- internal IEnumerable<(int, float)> KNearestAвtLayer(int entryPointId, Func targetCosts, int ef, SearchContext context)
- {
- int farthestId;
- float farthestDistance;
- var d = targetCosts(entryPointId);
-
- var v = new VisitedBitSet(_vectors.Count, _options.M);
- // v ← ep // set of visited elements
- v.Add(entryPointId);
- // C ← ep // set of candidates
- var C = new MinHeap(ef);
- C.Push((entryPointId, d));
- // W ← ep // dynamic list of found nearest neighbors
- var W = new MaxHeap(ef + 1);
- // W ← ep // dynamic list of found nearest neighbors
- if (context.IsActiveNode(entryPointId))
- {
- W.Push((entryPointId, d));
- }
- // run bfs
- while (C.Count > 0)
- {
- // get next candidate to check and expand
- var toExpand = C.Pop();
- if (W.TryPeek(out _, out farthestDistance) && toExpand.Item2 > farthestDistance)
- {
- // the closest candidate is farther than farthest result
- break;
- }
-
- // expand candidate
- var neighboursIds = GetNeighbors(toExpand.Item1).ToArray();
- for (int i = 0; i < neighboursIds.Length; ++i)
- {
- int neighbourId = neighboursIds[i];
- if (!v.Contains(neighbourId))
- {
- W.TryPeek(out farthestId, out farthestDistance);
- // enqueue perspective neighbours to expansion list
- var neighbourDistance = targetCosts(neighbourId);
- if (context.IsActiveNode(neighbourId))
- {
- if (W.Count < ef || (farthestId >= 0 && neighbourDistance < farthestDistance))
- {
- W.Push((neighbourId, neighbourDistance));
- if (W.Count > ef)
- {
- W.Pop();
- }
- }
- }
- if (W.TryPeek(out _, out farthestDistance) && neighbourDistance < farthestDistance)
- {
- C.Push((neighbourId, neighbourDistance));
- }
- v.Add(neighbourId);
- }
- }
- }
- C.Clear();
- v.Clear();
- return W;
- }
-
- ///
- /// Algorithm 2, modified for LookAlike
- ///
- /// query element
- /// enter points ep
- /// Output: ef closest neighbors to q
- internal IEnumerable<(int, float)> KNearestAtLayer(int ef, SearchContext context)
- {
- var distance = new Func((id1, id2) => _options.Distance(_vectors[id1], _vectors[id2]));
- // v ← ep // set of visited elements
- var v = new VisitedBitSet(_vectors.Count, _options.M);
- // C ← ep // set of candidates
- var C = new MinHeap(ef);
- float dist;
- var W = new MaxHeap(ef + 1);
- var entryPoints = context.EntryPoints;
-
- do
- {
- foreach (var ep in entryPoints)
- {
- var neighboursIds = GetNeighbors(ep).ToArray();
- for (int i = 0; i < neighboursIds.Length; ++i)
- {
- C.Push((ep, distance(ep, neighboursIds[i])));
- }
- v.Add(ep);
- }
- // run bfs
- while (C.Count > 0)
- {
- // get next candidate to check and expand
- var toExpand = C.Pop();
- if (W.TryPeek(out _, out dist) && toExpand.Item2 > dist)
- {
- // the closest candidate is farther than farthest result
- break;
- }
- if (context.IsActiveNode(toExpand.Item1))
- {
- if (W.Count < ef || W.Count == 0 || (W.TryPeek(out _, out dist) && toExpand.Item2 < dist))
- {
- W.Push((toExpand.Item1, toExpand.Item2));
- if (W.Count > ef)
- {
- W.Pop();
- }
- }
- }
- }
-
- entryPoints = W.Select(p => p.Item1);
- }
- while (W.Count < ef);
- C.Clear();
- v.Clear();
- return W;
- }
- #endregion
-
- internal IEnumerable GetNeighbors(int id) => _links.FindNeighbors(id);
-
- public void Serialize(IBinaryWriter writer)
- {
- _links.Serialize(writer);
- }
-
- public void Deserialize(IBinaryReader reader)
- {
- _links.Deserialize(reader);
- }
-
- // internal Histogram GetHistogram(HistogramMode mode) => _links.CalculateHistogram(mode);
- }
-}
diff --git a/ZeroLevel.HNSW/Services/LinksSet.cs b/ZeroLevel.HNSW/Services/LinksSet.cs
deleted file mode 100644
index 2ded5f4..0000000
--- a/ZeroLevel.HNSW/Services/LinksSet.cs
+++ /dev/null
@@ -1,103 +0,0 @@
-using System.Collections.Concurrent;
-using System.Collections.Generic;
-using System.Linq;
-using ZeroLevel.Services.Serialization;
-
-namespace ZeroLevel.HNSW
-{
- public class LinksSet
- {
- private ConcurrentDictionary> _set = new ConcurrentDictionary>();
- internal IDictionary> Links => _set;
- internal int Count => _set.Count;
- private readonly int _M;
-
- public LinksSet(int M)
- {
- _M = M;
- }
-
- internal IEnumerable<(int, int)> FindLinksForId(int id)
- {
- if (_set.ContainsKey(id))
- {
- return _set[id].Select(v => (id, v));
- }
- return Enumerable.Empty<(int, int)>();
- }
-
- internal IEnumerable FindNeighbors(int id)
- {
- if (_set.ContainsKey(id))
- {
- return _set[id];
- }
- return Enumerable.Empty();
- }
-
- internal IEnumerable<(int, int)> Items()
- {
- return _set
- .SelectMany(pair => _set[pair.Key]
- .Select(v => (pair.Key, v)));
- }
-
- internal void RemoveIndex(int id1, int id2)
- {
- _set[id1].Remove(id2);
- _set[id2].Remove(id1);
- }
-
- internal bool Add(int id1, int id2)
- {
- if (!_set.ContainsKey(id1))
- {
- _set[id1] = new HashSet(_M + 1);
- }
- if (!_set.ContainsKey(id2))
- {
- _set[id2] = new HashSet(_M + 1);
- }
- var r1 = _set[id1].Add(id2);
- var r2 = _set[id2].Add(id1);
- return r1 || r2;
- }
-
-
- public void Dispose()
- {
- _set.Clear();
- _set = null;
- }
- public void Serialize(IBinaryWriter writer)
- {
- writer.WriteInt32(_set.Count);
- foreach (var record in _set)
- {
- writer.WriteInt32(record.Key);
- writer.WriteCollection(record.Value);
- }
- }
- public void Deserialize(IBinaryReader reader)
- {
- /*if (reader.ReadBoolean() != false)
- {
- throw new InvalidOperationException("Incompatible format");
- }*/
- _set.Clear();
- _set = null;
- var count = reader.ReadInt32();
- _set = new ConcurrentDictionary>();
- for (int i = 0; i < count; i++)
- {
- var id = reader.ReadInt32();
- var links_count = reader.ReadInt32();
- _set[id] = new HashSet(links_count);
- for (var l = 0; l < links_count; l++)
- {
- _set[id].Add(reader.ReadInt32());
- }
- }
- }
- }
-}
diff --git a/ZeroLevel.HNSW/Services/MaxHeap.cs b/ZeroLevel.HNSW/Services/MaxHeap.cs
deleted file mode 100644
index cc66b37..0000000
--- a/ZeroLevel.HNSW/Services/MaxHeap.cs
+++ /dev/null
@@ -1,130 +0,0 @@
-using System;
-using System.Collections;
-using System.Collections.Generic;
-
-namespace ZeroLevel.HNSW.Services
-{
- ///
- /// Max element always on top
- ///
- public class MaxHeap :
- IEnumerable<(int, float)>
- {
- private readonly List<(int, float)> _elements;
-
- public MaxHeap(int size = -1)
- {
- if (size > 0)
- _elements = new List<(int, float)>(size);
- else
- _elements = new List<(int, float)>();
- }
-
- private int GetLeftChildIndex(int elementIndex) => 2 * elementIndex + 1;
- private int GetRightChildIndex(int elementIndex) => 2 * elementIndex + 2;
- private int GetParentIndex(int elementIndex) => (elementIndex - 1) / 2;
-
- private bool HasLeftChild(int elementIndex) => GetLeftChildIndex(elementIndex) < _elements.Count;
- private bool HasRightChild(int elementIndex) => GetRightChildIndex(elementIndex) < _elements.Count;
- private bool IsRoot(int elementIndex) => elementIndex == 0;
-
- private (int, float) GetLeftChild(int elementIndex) => _elements[GetLeftChildIndex(elementIndex)];
- private (int, float) GetRightChild(int elementIndex) => _elements[GetRightChildIndex(elementIndex)];
- private (int, float) GetParent(int elementIndex) => _elements[GetParentIndex(elementIndex)];
-
- public int Count => _elements.Count;
-
- public void Clear()
- {
- _elements.Clear();
- }
-
- private void Swap(int firstIndex, int secondIndex)
- {
- var temp = _elements[firstIndex];
- _elements[firstIndex] = _elements[secondIndex];
- _elements[secondIndex] = temp;
- }
-
- public bool IsEmpty()
- {
- return _elements.Count == 0;
- }
-
- public bool TryPeek(out int id, out float value)
- {
- if (_elements.Count == 0)
- {
- id = -1;
- value = 0;
- return false;
- }
- id = _elements[0].Item1;
- value = _elements[0].Item2;
- return true;
- }
-
- public (int, float) Pop()
- {
- if (_elements.Count == 0)
- throw new IndexOutOfRangeException();
-
- var result = _elements[0];
- _elements[0] = _elements[_elements.Count - 1];
- _elements.RemoveAt(_elements.Count - 1);
-
- ReCalculateDown();
-
- return result;
- }
-
- public void Push((int, float) element)
- {
- _elements.Add(element);
-
- ReCalculateUp();
- }
-
- private void ReCalculateDown()
- {
- int index = 0;
- while (HasLeftChild(index))
- {
- var biggerIndex = GetLeftChildIndex(index);
- if (HasRightChild(index) && GetRightChild(index).Item2 > GetLeftChild(index).Item2)
- {
- biggerIndex = GetRightChildIndex(index);
- }
-
- if (_elements[biggerIndex].Item2 < _elements[index].Item2)
- {
- break;
- }
-
- Swap(biggerIndex, index);
- index = biggerIndex;
- }
- }
-
- private void ReCalculateUp()
- {
- var index = _elements.Count - 1;
- while (!IsRoot(index) && _elements[index].Item2 > GetParent(index).Item2)
- {
- var parentIndex = GetParentIndex(index);
- Swap(parentIndex, index);
- index = parentIndex;
- }
- }
-
- public IEnumerator<(int, float)> GetEnumerator()
- {
- return _elements.GetEnumerator();
- }
-
- IEnumerator IEnumerable.GetEnumerator()
- {
- return _elements.GetEnumerator();
- }
- }
-}
diff --git a/ZeroLevel.HNSW/Services/MinHeap.cs b/ZeroLevel.HNSW/Services/MinHeap.cs
deleted file mode 100644
index c860da2..0000000
--- a/ZeroLevel.HNSW/Services/MinHeap.cs
+++ /dev/null
@@ -1,130 +0,0 @@
-using System;
-using System.Collections;
-using System.Collections.Generic;
-
-namespace ZeroLevel.HNSW.Services
-{
- ///
- /// Min element always on top
- ///
- public class MinHeap :
- IEnumerable<(int, float)>
- {
- private readonly List<(int, float)> _elements;
-
- public MinHeap(int size = -1)
- {
- if (size > 0)
- _elements = new List<(int, float)>(size);
- else
- _elements = new List<(int, float)>();
- }
-
- private int GetLeftChildIndex(int elementIndex) => 2 * elementIndex + 1;
- private int GetRightChildIndex(int elementIndex) => 2 * elementIndex + 2;
- private int GetParentIndex(int elementIndex) => (elementIndex - 1) / 2;
-
- private bool HasLeftChild(int elementIndex) => GetLeftChildIndex(elementIndex) < _elements.Count;
- private bool HasRightChild(int elementIndex) => GetRightChildIndex(elementIndex) < _elements.Count;
- private bool IsRoot(int elementIndex) => elementIndex == 0;
-
- private (int, float) GetLeftChild(int elementIndex) => _elements[GetLeftChildIndex(elementIndex)];
- private (int, float) GetRightChild(int elementIndex) => _elements[GetRightChildIndex(elementIndex)];
- private (int, float) GetParent(int elementIndex) => _elements[GetParentIndex(elementIndex)];
-
- public int Count => _elements.Count;
-
- public void Clear()
- {
- _elements.Clear();
- }
-
- private void Swap(int firstIndex, int secondIndex)
- {
- var temp = _elements[firstIndex];
- _elements[firstIndex] = _elements[secondIndex];
- _elements[secondIndex] = temp;
- }
-
- public bool IsEmpty()
- {
- return _elements.Count == 0;
- }
-
- public bool TryPeek(out int id, out float value)
- {
- if (_elements.Count == 0)
- {
- id = -1;
- value = 0;
- return false;
- }
- id = _elements[0].Item1;
- value = _elements[0].Item2;
- return true;
- }
-
- public (int, float) Pop()
- {
- if (_elements.Count == 0)
- throw new IndexOutOfRangeException();
-
- var result = _elements[0];
- _elements[0] = _elements[_elements.Count - 1];
- _elements.RemoveAt(_elements.Count - 1);
-
- ReCalculateDown();
-
- return result;
- }
-
- public void Push((int, float) element)
- {
- _elements.Add(element);
-
- ReCalculateUp();
- }
-
- private void ReCalculateDown()
- {
- int index = 0;
- while (HasLeftChild(index))
- {
- var smallerIndex = GetLeftChildIndex(index);
- if (HasRightChild(index) && GetRightChild(index).Item2 < GetLeftChild(index).Item2)
- {
- smallerIndex = GetRightChildIndex(index);
- }
-
- if (_elements[smallerIndex].Item2 >= _elements[index].Item2)
- {
- break;
- }
-
- Swap(smallerIndex, index);
- index = smallerIndex;
- }
- }
-
- private void ReCalculateUp()
- {
- var index = _elements.Count - 1;
- while (!IsRoot(index) && _elements[index].Item2 < GetParent(index).Item2)
- {
- var parentIndex = GetParentIndex(index);
- Swap(parentIndex, index);
- index = parentIndex;
- }
- }
-
- public IEnumerator<(int, float)> GetEnumerator()
- {
- return _elements.GetEnumerator();
- }
-
- IEnumerator IEnumerable.GetEnumerator()
- {
- return _elements.GetEnumerator();
- }
- }
-}
\ No newline at end of file
diff --git a/ZeroLevel.HNSW/Services/Quantizator.cs b/ZeroLevel.HNSW/Services/Quantizator.cs
deleted file mode 100644
index 8f031fb..0000000
--- a/ZeroLevel.HNSW/Services/Quantizator.cs
+++ /dev/null
@@ -1,108 +0,0 @@
-using System;
-
-namespace ZeroLevel.HNSW.Services
-{
- public class Quantizator
- {
- private readonly float _min;
- private readonly float _max;
- private readonly float _diff;
-
- public Quantizator(float min, float max)
- {
- _min = min;
- _max = max;
- _diff = _max - _min;
- }
-
- public byte[] Quantize(float[] v)
- {
- var result = new byte[v.Length];
- for (int i = 0; i < v.Length; i++)
- {
- result[i] = _quantizeInRange(v[i]);
- }
- return result;
- }
-
- public int[] QuantizeToInt(float[] v)
- {
- var diff = v.Length % 4;
- int count = (v.Length - diff) / 4;
- var result = new int[((diff == 0) ? 0 : 1) + (v.Length / 4)];
- byte[] buf = new byte[4];
- int i = 0;
- for (; i < count * 4; i += 4)
- {
- buf[0] = _quantizeInRange(v[i]);
- buf[1] = _quantizeInRange(v[i + 1]);
- buf[2] = _quantizeInRange(v[i + 2]);
- buf[3] = _quantizeInRange(v[i + 3]);
- result[(i >> 2)] = BitConverter.ToInt32(buf);
- }
- if (diff != 0)
- {
- for (var j = 0; j < diff; j++)
- {
- buf[j] = _quantizeInRange(v[i + j]);
- }
- for (var j = diff; j < 4; j++)
- {
- buf[j] = 0;
- }
- result[(i >> 2)] = BitConverter.ToInt32(buf);
- }
- return result;
- }
-
- public long[] QuantizeToLong(float[] v)
- {
- var diff = v.Length % 8;
- int count = (v.Length - diff) / 8;
- var result = new long[((diff == 0) ? 0 : 1) + (v.Length / 8)];
- byte[] buf = new byte[8];
- int i = 0;
- for (; i < count * 8; i += 8)
- {
- buf[0] = _quantizeInRange(v[i + 0]);
- buf[1] = _quantizeInRange(v[i + 1]);
- buf[2] = _quantizeInRange(v[i + 2]);
- buf[3] = _quantizeInRange(v[i + 3]);
- buf[4] = _quantizeInRange(v[i + 4]);
- buf[5] = _quantizeInRange(v[i + 5]);
- buf[6] = _quantizeInRange(v[i + 6]);
- buf[7] = _quantizeInRange(v[i + 7]);
-
- result[(i >> 3)] = BitConverter.ToInt64(buf);
- }
- if (diff != 0)
- {
- for (var j = 0; j < diff; j++)
- {
- buf[j] = _quantizeInRange(v[i + j]);
- }
- for (var j = diff; j < 8; j++)
- {
- buf[j] = 0;
- }
- result[(i >> 3)] = BitConverter.ToInt64(buf);
- }
- return result;
- }
-
- //Map x in [0,1] to {0, 1, ..., 255}
- private byte _quantize(float x)
- {
- x = (int)Math.Floor(256 * x);
- if (x < 0) return 0;
- else if (x > 255) return 255;
- else return (byte)x;
- }
-
- //Map x in [min,max] to {0, 1, ..., 255}
- private byte _quantizeInRange(float x)
- {
- return _quantize((x - _min) / (_diff));
- }
- }
-}
diff --git a/ZeroLevel.HNSW/Services/VectorSet.cs b/ZeroLevel.HNSW/Services/VectorSet.cs
deleted file mode 100644
index 07d1550..0000000
--- a/ZeroLevel.HNSW/Services/VectorSet.cs
+++ /dev/null
@@ -1,88 +0,0 @@
-using System.Collections;
-using System.Collections.Generic;
-using System.Threading;
-using ZeroLevel.Services.Serialization;
-
-namespace ZeroLevel.HNSW
-{
- internal sealed class VectorSet
- : IEnumerable, IBinarySerializable
- {
- private List _set = new List();
- private SpinLock _lock = new SpinLock();
-
- internal T this[int index] => _set[index];
- internal int Count => _set.Count;
-
- internal int Append(T vector)
- {
- bool gotLock = false;
- gotLock = false;
- try
- {
- _lock.Enter(ref gotLock);
- _set.Add(vector);
- return _set.Count - 1;
- }
- finally
- {
- // Only give up the lock if you actually acquired it
- if (gotLock) _lock.Exit();
- }
- }
-
- internal int[] Append(IEnumerable vectors)
- {
- bool gotLock = false;
- int startIndex, endIndex;
- gotLock = false;
- try
- {
- _lock.Enter(ref gotLock);
- startIndex = _set.Count;
- _set.AddRange(vectors);
- endIndex = _set.Count;
- }
- finally
- {
- // Only give up the lock if you actually acquired it
- if (gotLock) _lock.Exit();
- }
- var ids = new int[endIndex - startIndex];
- for (int i = startIndex, j = 0; i < endIndex; i++, j++)
- {
- ids[j] = i;
- }
- return ids;
- }
-
- public void Deserialize(IBinaryReader reader)
- {
- int count = reader.ReadInt32();
- _set = new List(count + 1);
- for (int i = 0; i < count; i++)
- {
- _set.Add(reader.ReadCompatible());
- }
- }
-
- public void Serialize(IBinaryWriter writer)
- {
- writer.WriteInt32(_set.Count);
- foreach (var r in _set)
- {
- writer.WriteCompatible(r);
- }
- }
-
- public IEnumerator GetEnumerator()
- {
- return _set.GetEnumerator();
- }
-
- IEnumerator IEnumerable.GetEnumerator()
- {
- return _set.GetEnumerator();
- }
- }
-}
diff --git a/ZeroLevel.HNSW/SmallWorld.cs b/ZeroLevel.HNSW/SmallWorld.cs
deleted file mode 100644
index c7295cc..0000000
--- a/ZeroLevel.HNSW/SmallWorld.cs
+++ /dev/null
@@ -1,375 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using System.Threading;
-using ZeroLevel.HNSW.Services;
-using ZeroLevel.Services.Serialization;
-
-namespace ZeroLevel.HNSW
-{
- public class SmallWorld
- {
- private readonly NSWOptions _options;
- private VectorSet _vectors;
- private Layer[] _layers;
- private int EntryPoint = 0;
- private int MaxLayer = 0;
- private readonly ProbabilityLayerNumberGenerator _layerLevelGenerator;
- private ReaderWriterLockSlim _lockGraph = new ReaderWriterLockSlim();
-
- public readonly Func DistanceFunction;
- public TItem GetVector(int id) => _vectors[id];
- public IDictionary> GetLinks() => _layers[0].Links;
-
- public SmallWorld(NSWOptions options)
- {
- _options = options;
- _vectors = new VectorSet();
- _layers = new Layer[_options.LayersCount];
- _layerLevelGenerator = new ProbabilityLayerNumberGenerator(_options.LayersCount, _options.M);
-
- DistanceFunction = new Func((id1, id2) => _options.Distance(_vectors[id1], _vectors[id2]));
-
- for (int i = 0; i < _options.LayersCount; i++)
- {
- _layers[i] = new Layer(_options, _vectors, i == 0);
- }
- }
-
- public SmallWorld(NSWOptions options, Stream stream)
- {
- _options = options;
- _layerLevelGenerator = new ProbabilityLayerNumberGenerator(_options.LayersCount, _options.M);
- DistanceFunction = new Func((id1, id2) => _options.Distance(_vectors[id1], _vectors[id2]));
- Deserialize(stream);
- }
-
- ///
- /// Search in the graph K for vectors closest to a given vector
- ///
- /// Given vector
- /// Count of elements for search
- ///
- ///
- public IEnumerable<(int, TItem, float)> Search(TItem vector, int k)
- {
- foreach (var pair in KNearest(vector, k))
- {
- yield return (pair.Item1, _vectors[pair.Item1], pair.Item2);
- }
- }
-
- public IEnumerable<(int, TItem, float)> Search(TItem vector, int k, SearchContext context)
- {
- if (context == null)
- {
- foreach (var pair in KNearest(vector, k))
- {
- yield return (pair.Item1, _vectors[pair.Item1], pair.Item2);
- }
- }
- else
- {
- foreach (var pair in KNearest(vector, k, context))
- {
- yield return (pair.Item1, _vectors[pair.Item1], pair.Item2);
- }
- }
- }
-
- public IEnumerable<(int, TItem, float)> Search(int k, SearchContext context)
- {
- if (context == null)
- {
- throw new ArgumentNullException(nameof(context));
- }
- else
- {
- foreach (var pair in KNearest(k, context))
- {
- yield return (pair.Item1, _vectors[pair.Item1], pair.Item2);
- }
- }
- }
-
- ///
- /// Adding vectors batch
- ///
- /// Vectors
- /// Vector identifiers in a graph
- public int[] AddItems(IEnumerable vectors)
- {
- _lockGraph.EnterWriteLock();
- try
- {
- var ids = _vectors.Append(vectors);
- for (int i = 0; i < ids.Length; i++)
- {
- INSERT(ids[i]);
- }
- return ids;
- }
- finally
- {
- _lockGraph.ExitWriteLock();
- }
- }
-
- #region https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf
- ///
- /// Algorithm 1
- ///
- private void INSERT(int q)
- {
- var distance = new Func(candidate => _options.Distance(_vectors[q], _vectors[candidate]));
- // W ← ∅ // list for the currently found nearest elements
- var W = new MinHeap(_options.EFConstruction + 1);
- // ep ← get enter point for hnsw
- var ep = _layers[MaxLayer].FindEntryPointAtLayer(distance);
- if (ep == -1)
- ep = EntryPoint;
-
- var epDist = distance(ep);
- // L ← level of ep // top layer for hnsw
- var L = MaxLayer;
- // l ← ⌊-ln(unif(0..1))∙mL⌋ // new element’s level
- int l = _layerLevelGenerator.GetRandomLayer();
-
- // Проход с верхнего уровня до уровня где появляется элемент, для нахождения точки входа
- int id;
- float value;
- // for lc ← L … l+1
- for (int lc = L; lc > l; --lc)
- {
- // W ← SEARCH-LAYER(q, ep, ef = 1, lc)
- foreach (var i in _layers[lc].KNearestAtLayer(ep, distance, 1))
- {
- W.Push(i);
- }
- // ep ← get the nearest element from W to q
- if (W.TryPeek(out id, out value))
- {
- ep = id;
- epDist = value;
- }
- W.Clear();
- }
- //for lc ← min(L, l) … 0
- // connecting new node to the small world
- for (int lc = Math.Min(L, l); lc >= 0; --lc)
- {
- _layers[lc].Push(q, ep, W, distance);
- // ep ← W
- if (W.TryPeek(out id, out value))
- {
- ep = id;
- epDist = value;
- }
- W.Clear();
- }
- // if l > L
- if (l > L)
- {
- // set enter point for hnsw to q
- L = l;
- MaxLayer = l;
- EntryPoint = ep;
- }
- }
-
- public void TestWorld()
- {
- for (var v = 0; v < _vectors.Count; v++)
- {
- var nearest = _layers[0][v].ToArray();
- if (nearest.Length > _layers[0].M)
- {
- Console.WriteLine($"V{v}. Count of links ({nearest.Length}) more than max ({_layers[0].M})");
- }
- }
- // coverage test
- var ep = 0;
- var visited = new HashSet();
- var next = new Stack