diff --git a/TestApp/Program.cs b/TestApp/Program.cs
index 06a9951..593761c 100644
--- a/TestApp/Program.cs
+++ b/TestApp/Program.cs
@@ -6,6 +6,7 @@ using ZeroLevel;
using ZeroLevel.Logging;
using ZeroLevel.Network;
using ZeroLevel.Services.Serialization;
+using ZeroLevel.Services.Trees;
namespace TestApp
{
@@ -68,5 +69,16 @@ namespace TestApp
Thread.Sleep(2000);
}
}
+
+ public static double[] Generate(int vector_size)
+ {
+ var rnd = new Random((int)Environment.TickCount);
+ var vector = new double[vector_size];
+ for (int i = 0; i < vector_size; i++)
+ {
+ vector[i] = 50.0d - rnd.NextDouble() * 100.0d;
+ }
+ return vector;
+ }
}
}
\ No newline at end of file
diff --git a/TestHNSW/HNSWDemo/HNSWDemo.csproj b/TestHNSW/HNSWDemo/HNSWDemo.csproj
new file mode 100644
index 0000000..9b31502
--- /dev/null
+++ b/TestHNSW/HNSWDemo/HNSWDemo.csproj
@@ -0,0 +1,12 @@
+
+
+
+ Exe
+ net5.0
+
+
+
+
+
+
+
diff --git a/TestHNSW/HNSWDemo/Program.cs b/TestHNSW/HNSWDemo/Program.cs
new file mode 100644
index 0000000..07df108
--- /dev/null
+++ b/TestHNSW/HNSWDemo/Program.cs
@@ -0,0 +1,121 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Threading;
+using ZeroLevel.HNSW;
+
+namespace HNSWDemo
+{
+ class Program
+ {
+ public enum Gender
+ {
+ Unknown, Male, Feemale
+ }
+
+ public class Person
+ {
+ public Gender Gender { get; set; }
+ public int Age { get; set; }
+ public long Number { get; set; }
+
+ private static (float[], Person) Generate(int vector_size)
+ {
+ var rnd = new Random((int)Environment.TickCount);
+ var vector = new float[vector_size];
+ DefaultRandomGenerator.Instance.NextFloats(vector);
+ VectorUtils.NormalizeSIMD(vector);
+ var p = new Person();
+ p.Age = rnd.Next(15, 80);
+ var gr = rnd.Next(0, 3);
+ p.Gender = (gr == 0) ? Gender.Male : (gr == 1) ? Gender.Feemale : Gender.Unknown;
+ p.Number = CreateNumber(rnd);
+ return (vector, p);
+ }
+
+ public static List<(float[], Person)> GenerateRandom(int vectorSize, int vectorsCount)
+ {
+ var vectors = new List<(float[], Person)>();
+ for (int i = 0; i < vectorsCount; i++)
+ {
+ vectors.Add(Generate(vectorSize));
+ }
+ return vectors;
+ }
+
+ static HashSet _exists = new HashSet();
+ private static long CreateNumber(Random rnd)
+ {
+ long start_number;
+ do
+ {
+ start_number = 79600000000L;
+ start_number = start_number + rnd.Next(4, 8) * 10000000;
+ start_number += rnd.Next(0, 1000000);
+ }
+ while (_exists.Add(start_number) == false);
+ return start_number;
+ }
+ }
+
+ private static List RandomVectors(int vectorSize, int vectorsCount)
+ {
+ var vectors = new List();
+ for (int i = 0; i < vectorsCount; i++)
+ {
+ var vector = new float[vectorSize];
+ DefaultRandomGenerator.Instance.NextFloats(vector);
+ VectorUtils.NormalizeSIMD(vector);
+ vectors.Add(vector);
+ }
+ return vectors;
+ }
+
+ private static Dictionary _database = new Dictionary();
+
+ static void Main(string[] args)
+ {
+ var dimensionality = 128;
+ var testCount = 1000;
+ var count = 100000;
+ var batchSize = 5000;
+ var samples = Person.GenerateRandom(dimensionality, count);
+
+ var sw = new Stopwatch();
+ var world = new SmallWorld(NSWOptions.Create(6, 4, 120, 120, CosineDistance.ForUnits));
+ for (int i = 0; i < (count / batchSize); i++)
+ {
+ var batch = samples.Skip(i * batchSize).Take(batchSize).ToArray();
+ sw.Restart();
+ var ids = world.AddItems(batch.Select(i => i.Item1).ToArray());
+ sw.Stop();
+ Console.WriteLine($"Batch [{i}]. Insert {ids.Length} items on {sw.ElapsedMilliseconds} ms");
+ for (int bi = 0; bi < batch.Length; bi++)
+ {
+ _database.Add(ids[bi], batch[bi].Item2);
+ }
+ }
+
+ var vectors = RandomVectors(dimensionality, testCount);
+
+ //HNSWFilter filter = new HNSWFilter(ids => ids.Where(id => { var p = _database[id]; return p.Age > 45 && p.Gender == Gender.Feemale; }));
+
+/*var fackupCount = 0;
+ foreach (var v in vectors)
+ {
+ var result = world.Search(v, 10, filter);
+ foreach (var r in result)
+ {
+ if (_database[r.Item1].Age <= 45 || _database[r.Item1].Gender != Gender.Feemale)
+ {
+ Interlocked.Increment(ref fackupCount);
+ }
+ }
+ }*/
+
+ //Console.WriteLine($"Completed. Fackup count: {fackupCount}");
+ Console.ReadKey();
+ }
+ }
+}
diff --git a/ZeroLevel.HNSW/Layer.cs b/ZeroLevel.HNSW/Layer.cs
new file mode 100644
index 0000000..7fb8f7c
--- /dev/null
+++ b/ZeroLevel.HNSW/Layer.cs
@@ -0,0 +1,284 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace ZeroLevel.HNSW
+{
+ ///
+ /// NSW graph
+ ///
+ internal sealed class Layer
+ {
+ private readonly NSWOptions _options;
+ private readonly VectorSet _vectors;
+ private CompactBiDirectionalLinksSet _links = new CompactBiDirectionalLinksSet();
+
+ public Layer(NSWOptions options, VectorSet vectors)
+ {
+ _options = options;
+ _vectors = vectors;
+ }
+
+ public void AddBidirectionallConnectionts(int q, int p, float qpDistance)
+ {
+ // поиск в ширину ближайших узлов к найденному
+ var nearest = _links.FindLinksForId(p).ToArray();
+ // если у найденного узла максимальное количество связей
+ // if │eConn│ > Mmax // shrink connections of e
+ if (nearest.Length >= _options.M)
+ {
+ // ищем связь с самой большой дистанцией
+ float distance = nearest[0].Item3;
+ int index = 0;
+ for (int ni = 1; ni < nearest.Length; ni++)
+ {
+ if (nearest[ni].Item3 > distance)
+ {
+ index = ni;
+ distance = nearest[ni].Item3;
+ }
+ }
+ // делаем перелинковку вставляя новый узел между найденными
+ var id1 = nearest[index].Item1;
+ var id2 = nearest[index].Item2;
+ _links.Relink(id1, id2, q, qpDistance, _options.Distance(_vectors[id2], _vectors[q]));
+ }
+ else
+ {
+ // добавляем связь нового узла к найденному
+ _links.Add(q, p, qpDistance);
+ }
+ }
+
+ public int GetEntryPointFor(int q)
+ {
+ var randomLinkId = DefaultRandomGenerator.Instance.Next(0, _links.Count);
+ var entryId = _links[randomLinkId].Item1;
+ var v = new VisitedBitSet(_vectors._set.Count, _options.M);
+ // v ← ep // set of visited elements
+ var (ep, ed) = DFS_SearchMinFrom(entryId, q, v);
+ return ep;
+ }
+
+ private (int, float) DFS_SearchMinFrom(int entryId, int id, VisitedBitSet visited)
+ {
+ visited.Add(entryId);
+ int candidate = entryId;
+ var candidateDistance = _options.Distance(_vectors[entryId], _vectors[id]);
+ int counter = 0;
+ do
+ {
+ var (mid, dist) = GetMinNearest(visited, entryId, candidate, candidateDistance);
+ if (dist > candidateDistance)
+ {
+ break;
+ }
+ candidate = mid;
+ candidateDistance = dist;
+
+ counter++;
+ } while (counter < _options.EFConstruction);
+ return (candidate, candidateDistance);
+ }
+
+ private (int, float) GetMinNearest(VisitedBitSet visited, int entryId, int id, float entryDistance)
+ {
+ var minId = entryId;
+ var minDist = entryDistance;
+ foreach (var candidate in _links.FindLinksForId(entryId).Select(l => l.Item2))
+ {
+ if (visited.Contains(candidate) == false)
+ {
+ var dist = _options.Distance(_vectors[candidate], _vectors[id]);
+ if (dist < minDist)
+ {
+ minDist = dist;
+ minId = candidate;
+ }
+ visited.Add(candidate);
+ }
+ }
+ return (minId, minDist);
+ }
+
+ #region Implementation of https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf
+
+ ///
+ /// Algorithm 2
+ ///
+ /// query element
+ /// enter points ep
+ /// Output: ef closest neighbors to q
+ public IDictionary SEARCH_LAYER(int q, int ep, int ef)
+ {
+ var v = new VisitedBitSet(_vectors._set.Count, _options.M);
+ // v ← ep // set of visited elements
+ v.Add(ep);
+ // C ← ep // set of candidates
+ var C = new Dictionary();
+ C.Add(ep, _options.Distance(_vectors[ep], _vectors[q]));
+ // W ← ep // dynamic list of found nearest neighbors
+ var W = new Dictionary();
+ W.Add(ep, C[ep]);
+ // while │C│ > 0
+ while (C.Count > 0)
+ {
+ // c ← extract nearest element from C to q
+ var nearest = W.OrderBy(p => p.Value).First();
+ var c = nearest.Key;
+ var md = nearest.Value;
+ // var (c, md) = GetMinimalDistanceIndex(C, q);
+ C.Remove(c);
+ // f ← get furthest element from W to q
+ var f = W.OrderBy(p => p.Value).First().Key;
+ //var f = GetMaximalDistanceIndex(W, q);
+ // if distance(c, q) > distance(f, q)
+ if (_options.Distance(_vectors[c], _vectors[q]) > _options.Distance(_vectors[f], _vectors[q]))
+ {
+ // break // all elements in W are evaluated
+ break;
+ }
+ // for each e ∈ neighbourhood(c) at layer lc // update C and W
+ foreach (var l in _links.FindLinksForId(c))
+ {
+ var e = l.Item2;
+ // if e ∉ v
+ if (v.Contains(e) == false)
+ {
+ // v ← v ⋃ e
+ v.Add(e);
+ // f ← get furthest element from W to q
+ f = W.OrderByDescending(p => p.Value).First().Key;
+ //f = GetMaximalDistanceIndex(W, q);
+ // if distance(e, q) < distance(f, q) or │W│ < ef
+ var ed = _options.Distance(_vectors[e], _vectors[q]);
+ if (ed > _options.Distance(_vectors[f], _vectors[q])
+ || W.Count < ef)
+ {
+ // C ← C ⋃ e
+ C.Add(e, ed);
+ // W ← W ⋃ e
+ W.Add(e, ed);
+ // if │W│ > ef
+ if (W.Count > ef)
+ {
+ // remove furthest element from W to q
+ f = W.OrderByDescending(p => p.Value).First().Key;
+ //f = GetMaximalDistanceIndex(W, q);
+ W.Remove(f);
+ }
+ }
+ }
+ }
+ }
+ // return W
+ return W;
+ }
+
+ ///
+ /// Algorithm 3
+ ///
+ /// base element
+ /// candidate elements
+ /// Output: M nearest elements to q
+ public IDictionary SELECT_NEIGHBORS_SIMPLE(int q, IDictionary C)
+ {
+ if (C.Count <= _options.M)
+ {
+ return new Dictionary(C);
+ }
+ var output = new Dictionary();
+ // return M nearest elements from C to q
+ return new Dictionary(C.OrderBy(p => p.Value).Take(_options.M));
+ }
+
+ ///
+ /// Algorithm 4
+ ///
+ /// base element
+ /// candidate elements
+ /// flag indicating whether or not to extend candidate list
+ /// flag indicating whether or not to add discarded elements
+ /// Output: M elements selected by the heuristic
+ public IDictionary SELECT_NEIGHBORS_HEURISTIC(int q, IDictionary C, bool extendCandidates, bool keepPrunedConnections)
+ {
+ // R ← ∅
+ var R = new Dictionary();
+ // W ← C // working queue for the candidates
+ var W = new List(C.Select(p => p.Key));
+ // if extendCandidates // extend candidates by their neighbors
+ if (extendCandidates)
+ {
+ // for each e ∈ C
+ foreach (var e in C)
+ {
+ // for each e_adj ∈ neighbourhood(e) at layer lc
+ foreach (var l in _links.FindLinksForId(e.Key))
+ {
+ var e_adj = l.Item2;
+ // if eadj ∉ W
+ if (W.Contains(e_adj) == false)
+ {
+ // W ← W ⋃ eadj
+ W.Add(e_adj);
+ }
+ }
+ }
+ }
+ // Wd ← ∅ // queue for the discarded candidates
+ var Wd = new Dictionary();
+ // while │W│ > 0 and │R│< M
+ while (W.Count > 0 && R.Count < _options.M)
+ {
+ // e ← extract nearest element from W to q
+ var (e, ed) = GetMinimalDistanceIndex(W, q);
+ W.Remove(e);
+ // if e is closer to q compared to any element from R
+ if (ed < R.Min(pair => pair.Value))
+ {
+ // R ← R ⋃ e
+ R.Add(e, ed);
+ }
+ // else
+ {
+ // Wd ← Wd ⋃ e
+ Wd.Add(e, ed);
+ }
+ // if keepPrunedConnections // add some of the discarded // connections from Wd
+ if (keepPrunedConnections)
+ {
+ // while │Wd│> 0 and │R│< M
+ while (Wd.Count > 0 && R.Count < _options.M)
+ {
+ // R ← R ⋃ extract nearest element from Wd to q
+ var nearest = Wd.Aggregate((l, r) => l.Value < r.Value ? l : r);
+ Wd.Remove(nearest.Key);
+ R.Add(nearest.Key, nearest.Value);
+ }
+ }
+ }
+ // return R
+ return R;
+ }
+
+
+ #endregion
+
+
+ private (int, float) GetMinimalDistanceIndex(IList self, int q)
+ {
+ float min = _options.Distance(_vectors[self[0]], _vectors[q]);
+ int minIndex = 0;
+ for (int i = 1; i < self.Count; ++i)
+ {
+ var dist = _options.Distance(_vectors[self[i]], _vectors[q]);
+ if (dist < min)
+ {
+ min = self[i];
+ minIndex = i;
+ }
+ }
+ return (minIndex, min);
+ }
+ }
+}
diff --git a/ZeroLevel.HNSW/Model/NSWOptions.cs b/ZeroLevel.HNSW/Model/NSWOptions.cs
new file mode 100644
index 0000000..c888eaa
--- /dev/null
+++ b/ZeroLevel.HNSW/Model/NSWOptions.cs
@@ -0,0 +1,42 @@
+using System;
+
+namespace ZeroLevel.HNSW
+{
+ public sealed class NSWOptions
+ {
+ public const int FARTHEST_DIVIDER = 3;
+
+ ///
+ /// Mox node connections on Layer
+ ///
+ public readonly int M;
+
+ ///
+ /// Max search buffer
+ ///
+ public readonly int EF;
+ ///
+ /// Max search buffer for inserting
+ ///
+ public readonly int EFConstruction;
+ ///
+ /// Distance function beetween vectors
+ ///
+ public readonly Func Distance;
+
+ public readonly int LayersCount;
+
+
+ private NSWOptions(int layersCount, int m, int ef, int ef_construction, Func distance)
+ {
+ LayersCount = layersCount;
+ M = m;
+ EF = ef;
+ EFConstruction = ef_construction;
+ Distance = distance;
+ }
+
+ public static NSWOptions Create(int layersCount, int M, int EF, int EF_construction, Func distance) =>
+ new NSWOptions(layersCount, M, EF, EF_construction, distance);
+ }
+}
diff --git a/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs b/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs
new file mode 100644
index 0000000..779acdd
--- /dev/null
+++ b/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs
@@ -0,0 +1,250 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+
+namespace ZeroLevel.HNSW
+{
+ internal sealed class CompactBiDirectionalLinksSet
+ : IDisposable
+ {
+ private readonly ReaderWriterLockSlim _rwLock = new ReaderWriterLockSlim();
+
+ private const int HALF_LONG_BITS = 32;
+
+ private SortedList _set = new SortedList();
+
+ public (int, int, float) this[int index]
+ {
+ get
+ {
+ var k = _set.Keys[index];
+ var d = _set.Values[index];
+ var id1 = (int)(k >> HALF_LONG_BITS);
+ var id2 = (int)(k - (((long)id1) << HALF_LONG_BITS));
+ return (id1, id2, d);
+ }
+ }
+
+ public int Count => _set.Count;
+
+ ///
+ /// Разрывает связи id1 - id2 и id2 - id1, и строит новые id1 - id, id - id1
+ ///
+ public void Relink(int id1, int id2, int id, float distance)
+ {
+ long k1old = (((long)(id1)) << HALF_LONG_BITS) + id2;
+ long k2old = (((long)(id2)) << HALF_LONG_BITS) + id1;
+
+ long k1new = (((long)(id1)) << HALF_LONG_BITS) + id;
+ long k2new = (((long)(id)) << HALF_LONG_BITS) + id1;
+
+ _rwLock.EnterWriteLock();
+ try
+ {
+ _set.Remove(k1old);
+ _set.Remove(k2old);
+ _set.Add(k1new, distance);
+ _set.Add(k2new, distance);
+ }
+ finally
+ {
+ _rwLock.ExitWriteLock();
+ }
+ }
+
+ ///
+ /// Разрывает связи id1 - id2 и id2 - id1, и строит новые id1 - id, id - id1, id2 - id, id - id2
+ ///
+ public void Relink(int id1, int id2, int id, float distanceToId1, float distanceToId2)
+ {
+ long k_id1_id2 = (((long)(id1)) << HALF_LONG_BITS) + id2;
+ long k_id2_id1 = (((long)(id2)) << HALF_LONG_BITS) + id1;
+
+ long k_id_id1 = (((long)(id)) << HALF_LONG_BITS) + id1;
+ long k_id1_id = (((long)(id1)) << HALF_LONG_BITS) + id;
+
+ long k_id_id2 = (((long)(id)) << HALF_LONG_BITS) + id2;
+ long k_id2_id = (((long)(id2)) << HALF_LONG_BITS) + id;
+
+ _rwLock.EnterWriteLock();
+ try
+ {
+ _set.Remove(k_id1_id2);
+ _set.Remove(k_id2_id1);
+ _set.Add(k_id_id1, distanceToId1);
+ _set.Add(k_id1_id, distanceToId1);
+ _set.Add(k_id_id2, distanceToId2);
+ _set.Add(k_id2_id, distanceToId2);
+ }
+ finally
+ {
+ _rwLock.ExitWriteLock();
+ }
+ }
+
+ public IEnumerable<(int, int, float)> FindLinksForId(int id)
+ {
+ _rwLock.EnterReadLock();
+ try
+ {
+ foreach (var (k, v) in Search(_set, id))
+ {
+ var id1 = (int)(k >> HALF_LONG_BITS);
+ var id2 = (int)(k - (((long)id1) << HALF_LONG_BITS));
+ yield return (id1, id2, v);
+ }
+ }
+ finally
+ {
+ _rwLock.ExitReadLock();
+ }
+ }
+
+ public IEnumerable<(int, int, float)> Items()
+ {
+ _rwLock.EnterReadLock();
+ try
+ {
+ foreach (var pair in _set)
+ {
+ var id1 = (int)(pair.Key >> HALF_LONG_BITS);
+ var id2 = (int)(pair.Key - (((long)id1) << HALF_LONG_BITS));
+ yield return (id1, id2, pair.Value);
+ }
+ }
+ finally
+ {
+ _rwLock.ExitReadLock();
+ }
+ }
+
+ public void RemoveIndex(int id)
+ {
+ long[] forward;
+ long[] backward;
+ _rwLock.EnterReadLock();
+ try
+ {
+ forward = Search(_set, id).Select(pair => pair.Item1).ToArray();
+ backward = forward.Select(k =>
+ {
+ var id1 = k >> HALF_LONG_BITS;
+ var id2 = k - (id1 << HALF_LONG_BITS);
+ return (id2 << HALF_LONG_BITS) + id1;
+ }).ToArray();
+ }
+ finally
+ {
+ _rwLock.ExitReadLock();
+ }
+ _rwLock.EnterWriteLock();
+ try
+ {
+ foreach (var k in forward)
+ {
+ _set.Remove(k);
+ }
+ foreach (var k in backward)
+ {
+ _set.Remove(k);
+ }
+ }
+ finally
+ {
+ _rwLock.ExitWriteLock();
+ }
+ }
+
+ public bool Add(int id1, int id2, float distance)
+ {
+ _rwLock.EnterWriteLock();
+ try
+ {
+ long k1 = (((long)(id1)) << HALF_LONG_BITS) + id2;
+ long k2 = (((long)(id2)) << HALF_LONG_BITS) + id1;
+ if (_set.ContainsKey(k1) == false)
+ {
+ _set.Add(k1, distance);
+ if (k1 != k2)
+ {
+ _set.Add(k2, distance);
+ }
+ return true;
+ }
+ }
+ finally
+ {
+ _rwLock.ExitWriteLock();
+ }
+ return false;
+ }
+
+ static IEnumerable<(long, float)> Search(SortedList set, int index)
+ {
+ long k = ((long)index) << HALF_LONG_BITS;
+ int left = 0;
+ int right = set.Count - 1;
+ int mid;
+ long test;
+ while (left < right)
+ {
+ mid = (right + left) / 2;
+ test = (set.Keys[mid] >> HALF_LONG_BITS) << HALF_LONG_BITS;
+
+ if (left == mid || right == mid)
+ {
+ if (test == k)
+ {
+ return SearchByPosition(set, k, mid);
+ }
+ break;
+ }
+ if (test < k)
+ {
+ left = mid;
+ }
+ else
+ {
+ if (test == k)
+ {
+ return SearchByPosition(set, k, mid);
+ }
+ else
+ {
+ right = mid;
+ }
+ }
+ }
+ return Enumerable.Empty<(long, float)>();
+ }
+
+ static IEnumerable<(long, float)> SearchByPosition(SortedList set, long k, int position)
+ {
+ var start = position;
+ var end = position;
+ do
+ {
+ position--;
+ } while (position >= 0 && ((set.Keys[position] >> HALF_LONG_BITS) << HALF_LONG_BITS) == k);
+ start = position + 1;
+ position = end + 1;
+ while (position < set.Count && ((set.Keys[position] >> HALF_LONG_BITS) << HALF_LONG_BITS) == k)
+ {
+ position++;
+ }
+ end = position - 1;
+ for (int i = start; i <= end; i++)
+ {
+ yield return (set.Keys[i], set.Values[i]);
+ }
+ }
+
+ public void Dispose()
+ {
+ _rwLock.Dispose();
+ _set.Clear();
+ _set = null;
+ }
+ }
+}
diff --git a/ZeroLevel.HNSW/Services/CosineDistance.cs b/ZeroLevel.HNSW/Services/CosineDistance.cs
new file mode 100644
index 0000000..5531294
--- /dev/null
+++ b/ZeroLevel.HNSW/Services/CosineDistance.cs
@@ -0,0 +1,184 @@
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+
+namespace ZeroLevel.HNSW
+{
+ ///
+ /// Calculates cosine similarity.
+ ///
+ ///
+ /// Intuition behind selecting float as a carrier.
+ ///
+ /// 1. In practice we work with vectors of dimensionality 100 and each component has value in range [-1; 1]
+ /// There certainly is a possibility of underflow.
+ /// But we assume that such cases are rare and we can rely on such underflow losses.
+ ///
+ /// 2. According to the article http://www.ti3.tuhh.de/paper/rump/JeaRu13.pdf
+ /// the floating point rounding error is less then 100 * 2^-24 * sqrt(100) * sqrt(100) < 0.0005960
+ /// We deem such precision is satisfactory for out needs.
+ ///
+ public static class CosineDistance
+ {
+ ///
+ /// Calculates cosine distance without making any optimizations.
+ ///
+ /// Left vector.
+ /// Right vector.
+ /// Cosine distance between u and v.
+ public static float NonOptimized(float[] u, float[] v)
+ {
+ if (u.Length != v.Length)
+ {
+ throw new ArgumentException("Vectors have non-matching dimensions");
+ }
+
+ float dot = 0.0f;
+ float nru = 0.0f;
+ float nrv = 0.0f;
+ for (int i = 0; i < u.Length; ++i)
+ {
+ dot += u[i] * v[i];
+ nru += u[i] * u[i];
+ nrv += v[i] * v[i];
+ }
+
+ var similarity = dot / (float)(Math.Sqrt(nru) * Math.Sqrt(nrv));
+ return 1 - similarity;
+ }
+
+ ///
+ /// Calculates cosine distance with assumption that u and v are unit vectors.
+ ///
+ /// Left vector.
+ /// Right vector.
+ /// Cosine distance between u and v.
+ public static float ForUnits(float[] u, float[] v)
+ {
+ if (u.Length != v.Length)
+ {
+ throw new ArgumentException("Vectors have non-matching dimensions");
+ }
+
+ float dot = 0;
+ for (int i = 0; i < u.Length; ++i)
+ {
+ dot += u[i] * v[i];
+ }
+
+ return 1 - dot;
+ }
+
+ ///
+ /// Calculates cosine distance optimized using SIMD instructions.
+ ///
+ /// Left vector.
+ /// Right vector.
+ /// Cosine distance between u and v.
+ public static float SIMD(float[] u, float[] v)
+ {
+ if (!Vector.IsHardwareAccelerated)
+ {
+ throw new NotSupportedException($"SIMD version of {nameof(CosineDistance)} is not supported");
+ }
+
+ if (u.Length != v.Length)
+ {
+ throw new ArgumentException("Vectors have non-matching dimensions");
+ }
+
+ float dot = 0;
+ var norm = default(Vector2);
+ int step = Vector.Count;
+
+ int i, to = u.Length - step;
+ for (i = 0; i <= to; i += step)
+ {
+ var ui = new Vector(u, i);
+ var vi = new Vector(v, i);
+ dot += Vector.Dot(ui, vi);
+ norm.X += Vector.Dot(ui, ui);
+ norm.Y += Vector.Dot(vi, vi);
+ }
+
+ for (; i < u.Length; ++i)
+ {
+ dot += u[i] * v[i];
+ norm.X += u[i] * u[i];
+ norm.Y += v[i] * v[i];
+ }
+
+ norm = Vector2.SquareRoot(norm);
+ float n = (norm.X * norm.Y);
+
+ if (n == 0)
+ {
+ return 1f;
+ }
+
+ var similarity = dot / n;
+ return 1f - similarity;
+ }
+
+ ///
+ /// Calculates cosine distance with assumption that u and v are unit vectors using SIMD instructions.
+ ///
+ /// Left vector.
+ /// Right vector.
+ /// Cosine distance between u and v.
+ public static float SIMDForUnits(float[] u, float[] v)
+ {
+ return 1f - DotProduct(ref u, ref v);
+ }
+
+ private static readonly int _vs1 = Vector.Count;
+ private static readonly int _vs2 = 2 * Vector.Count;
+ private static readonly int _vs3 = 3 * Vector.Count;
+ private static readonly int _vs4 = 4 * Vector.Count;
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static float DotProduct(ref float[] lhs, ref float[] rhs)
+ {
+ float result = 0f;
+
+ var count = lhs.Length;
+ var offset = 0;
+
+ while (count >= _vs4)
+ {
+ result += Vector.Dot(new Vector(lhs, offset), new Vector(rhs, offset));
+ result += Vector.Dot(new Vector(lhs, offset + _vs1), new Vector(rhs, offset + _vs1));
+ result += Vector.Dot(new Vector(lhs, offset + _vs2), new Vector(rhs, offset + _vs2));
+ result += Vector.Dot(new Vector(lhs, offset + _vs3), new Vector(rhs, offset + _vs3));
+ if (count == _vs4) return result;
+ count -= _vs4;
+ offset += _vs4;
+ }
+
+ if (count >= _vs2)
+ {
+ result += Vector.Dot(new Vector(lhs, offset), new Vector(rhs, offset));
+ result += Vector.Dot(new Vector(lhs, offset + _vs1), new Vector(rhs, offset + _vs1));
+ if (count == _vs2) return result;
+ count -= _vs2;
+ offset += _vs2;
+ }
+ if (count >= _vs1)
+ {
+ result += Vector.Dot(new Vector(lhs, offset), new Vector(rhs, offset));
+ if (count == _vs1) return result;
+ count -= _vs1;
+ offset += _vs1;
+ }
+ if (count > 0)
+ {
+ while (count > 0)
+ {
+ result += lhs[offset] * rhs[offset];
+ offset++; count--;
+ }
+ }
+ return result;
+ }
+ }
+}
diff --git a/ZeroLevel.HNSW/Services/FastRandom.cs b/ZeroLevel.HNSW/Services/FastRandom.cs
new file mode 100644
index 0000000..74ab8d5
--- /dev/null
+++ b/ZeroLevel.HNSW/Services/FastRandom.cs
@@ -0,0 +1,507 @@
+using System;
+using System.Runtime.CompilerServices;
+
+namespace ZeroLevel.HNSW
+{
+ public sealed class DefaultRandomGenerator
+ {
+ ///
+ /// This is the default configuration (it supports the optimization process to be executed on multiple threads)
+ ///
+ public static DefaultRandomGenerator Instance { get; } = new DefaultRandomGenerator(allowParallel: true);
+
+ ///
+ /// This uses the same random number generator but forces the optimization process to run on a single thread (which may be desirable if multiple requests may be processed concurrently
+ /// or if it is otherwise not desirable to let a single request access all of the CPUs)
+ ///
+ public static DefaultRandomGenerator DisableThreading { get; } = new DefaultRandomGenerator(allowParallel: false);
+
+ private DefaultRandomGenerator(bool allowParallel) => IsThreadSafe = allowParallel;
+
+ public bool IsThreadSafe { get; }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public int Next(int minValue, int maxValue) => ThreadSafeFastRandom.Next(minValue, maxValue);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public float NextFloat() => ThreadSafeFastRandom.NextFloat();
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void NextFloats(Span buffer) => ThreadSafeFastRandom.NextFloats(buffer);
+ }
+
+ internal static class ThreadSafeFastRandom
+ {
+ private static readonly Random _global = new Random();
+
+ [ThreadStatic]
+ private static FastRandom _local;
+
+ private static int GetGlobalSeed()
+ {
+ int seed;
+ lock (_global)
+ {
+ seed = _global.Next();
+ }
+ return seed;
+ }
+
+ ///
+ /// Returns a non-negative random integer.
+ ///
+ /// A 32-bit signed integer that is greater than or equal to 0 and less than System.Int32.MaxValue.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int Next()
+ {
+ var inst = _local;
+ if (inst == null)
+ {
+ int seed;
+ seed = GetGlobalSeed();
+ _local = inst = new FastRandom(seed);
+ }
+ return inst.Next();
+ }
+
+ ///
+ /// Returns a non-negative random integer that is less than the specified maximum.
+ ///
+ /// The exclusive upper bound of the random number to be generated. maxValue must be greater than or equal to 0.
+ /// A 32-bit signed integer that is greater than or equal to 0, and less than maxValue; that is, the range of return values ordinarily includes 0 but not maxValue. However,
+ // if maxValue equals 0, maxValue is returned.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int Next(int maxValue)
+ {
+ var inst = _local;
+ if (inst == null)
+ {
+ int seed;
+ seed = GetGlobalSeed();
+ _local = inst = new FastRandom(seed);
+ }
+ int ans;
+ do
+ {
+ ans = inst.Next(maxValue);
+ } while (ans == maxValue);
+
+ return ans;
+ }
+
+ ///
+ /// Returns a random integer that is within a specified range.
+ ///
+ /// The inclusive lower bound of the random number returned.
+ /// The exclusive upper bound of the random number returned. maxValue must be greater than or equal to minValue.
+ /// A 32-bit signed integer greater than or equal to minValue and less than maxValue; that is, the range of return values includes minValue but not maxValue. If minValue
+ // equals maxValue, minValue is returned.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int Next(int minValue, int maxValue)
+ {
+ var inst = _local;
+ if (inst == null)
+ {
+ int seed;
+ seed = GetGlobalSeed();
+ _local = inst = new FastRandom(seed);
+ }
+ return inst.Next(minValue, maxValue);
+ }
+
+ ///
+ /// Generates a random float. Values returned are from 0.0 up to but not including 1.0.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float NextFloat()
+ {
+ var inst = _local;
+ if (inst == null)
+ {
+ int seed;
+ seed = GetGlobalSeed();
+ _local = inst = new FastRandom(seed);
+ }
+ return inst.NextFloat();
+ }
+
+ ///
+ /// Fills the elements of a specified array of bytes with random numbers.
+ ///
+ /// An array of bytes to contain random numbers.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void NextFloats(Span buffer)
+ {
+ var inst = _local;
+ if (inst == null)
+ {
+ int seed;
+ seed = GetGlobalSeed();
+ _local = inst = new FastRandom(seed);
+ }
+ inst.NextFloats(buffer);
+ }
+ }
+
+ ///
+ /// A fast random number generator for .NET, from https://www.codeproject.com/Articles/9187/A-fast-equivalent-for-System-Random
+ /// Colin Green, January 2005
+ ///
+ /// September 4th 2005
+ /// Added NextBytesUnsafe() - commented out by default.
+ /// Fixed bug in Reinitialise() - y,z and w variables were not being reset.
+ ///
+ /// Key points:
+ /// 1) Based on a simple and fast xor-shift pseudo random number generator (RNG) specified in:
+ /// Marsaglia, George. (2003). Xorshift RNGs.
+ /// http://www.jstatsoft.org/v08/i14/xorshift.pdf
+ ///
+ /// This particular implementation of xorshift has a period of 2^128-1. See the above paper to see
+ /// how this can be easily extened if you need a longer period. At the time of writing I could find no
+ /// information on the period of System.Random for comparison.
+ ///
+ /// 2) Faster than System.Random. Up to 8x faster, depending on which methods are called.
+ ///
+ /// 3) Direct replacement for System.Random. This class implements all of the methods that System.Random
+ /// does plus some additional methods. The like named methods are functionally equivalent.
+ ///
+ /// 4) Allows fast re-initialisation with a seed, unlike System.Random which accepts a seed at construction
+ /// time which then executes a relatively expensive initialisation routine. This provides a vast speed improvement
+ /// if you need to reset the pseudo-random number sequence many times, e.g. if you want to re-generate the same
+ /// sequence many times. An alternative might be to cache random numbers in an array, but that approach is limited
+ /// by memory capacity and the fact that you may also want a large number of different sequences cached. Each sequence
+ /// can each be represented by a single seed value (int) when using FastRandom.
+ ///
+ /// Notes.
+ /// A further performance improvement can be obtained by declaring local variables as static, thus avoiding
+ /// re-allocation of variables on each call. However care should be taken if multiple instances of
+ /// FastRandom are in use or if being used in a multi-threaded environment.
+ ///
+ ///
+ internal class FastRandom
+ {
+ // The +1 ensures NextDouble doesn't generate 1.0
+ const float FLOAT_UNIT_INT = 1.0f / ((float)int.MaxValue + 1.0f);
+
+ const double REAL_UNIT_INT = 1.0 / ((double)int.MaxValue + 1.0);
+ const double REAL_UNIT_UINT = 1.0 / ((double)uint.MaxValue + 1.0);
+ const uint Y = 842502087, Z = 3579807591, W = 273326509;
+
+ uint x, y, z, w;
+
+ ///
+ /// Initialises a new instance using time dependent seed.
+ ///
+ public FastRandom()
+ {
+ // Initialise using the system tick count.
+ Reinitialise(Environment.TickCount);
+ }
+
+ ///
+ /// Initialises a new instance using an int value as seed.
+ /// This constructor signature is provided to maintain compatibility with
+ /// System.Random
+ ///
+ public FastRandom(int seed)
+ {
+ Reinitialise(seed);
+ }
+
+ ///
+ /// Reinitialises using an int value as a seed.
+ ///
+ public void Reinitialise(int seed)
+ {
+ // The only stipulation stated for the xorshift RNG is that at least one of
+ // the seeds x,y,z,w is non-zero. We fulfill that requirement by only allowing
+ // resetting of the x seed
+ x = (uint)seed;
+ y = Y;
+ z = Z;
+ w = W;
+ }
+
+ ///
+ /// Generates a random int over the range 0 to int.MaxValue-1.
+ /// MaxValue is not generated in order to remain functionally equivalent to System.Random.Next().
+ /// This does slightly eat into some of the performance gain over System.Random, but not much.
+ /// For better performance see:
+ ///
+ /// Call NextInt() for an int over the range 0 to int.MaxValue.
+ ///
+ /// Call NextUInt() and cast the result to an int to generate an int over the full Int32 value range
+ /// including negative values.
+ ///
+ public int Next()
+ {
+ uint t = (x ^ (x << 11));
+ x = y; y = z; z = w;
+ w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
+
+ // Handle the special case where the value int.MaxValue is generated. This is outside of
+ // the range of permitted values, so we therefore call Next() to try again.
+ uint rtn = w & 0x7FFFFFFF;
+ if (rtn == 0x7FFFFFFF)
+ return Next();
+ return (int)rtn;
+ }
+
+ ///
+ /// Generates a random int over the range 0 to upperBound-1, and not including upperBound.
+ ///
+ public int Next(int upperBound)
+ {
+ if (upperBound < 0)
+ throw new ArgumentOutOfRangeException("upperBound", upperBound, "upperBound must be >=0");
+
+ uint t = (x ^ (x << 11));
+ x = y; y = z; z = w;
+
+ // The explicit int cast before the first multiplication gives better performance.
+ // See comments in NextDouble.
+ return (int)((REAL_UNIT_INT * (int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))))) * upperBound);
+ }
+
+ ///
+ /// Generates a random int over the range lowerBound to upperBound-1, and not including upperBound.
+ /// upperBound must be >= lowerBound. lowerBound may be negative.
+ ///
+ public int Next(int lowerBound, int upperBound)
+ {
+ if (lowerBound > upperBound)
+ throw new ArgumentOutOfRangeException("upperBound", upperBound, "upperBound must be >=lowerBound");
+
+ uint t = (x ^ (x << 11));
+ x = y; y = z; z = w;
+
+ // The explicit int cast before the first multiplication gives better performance.
+ // See comments in NextDouble.
+ int range = upperBound - lowerBound;
+ if (range < 0)
+ { // If range is <0 then an overflow has occured and must resort to using long integer arithmetic instead (slower).
+ // We also must use all 32 bits of precision, instead of the normal 31, which again is slower.
+ return lowerBound + (int)((REAL_UNIT_UINT * (double)(w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)))) * (double)((long)upperBound - (long)lowerBound));
+ }
+
+ // 31 bits of precision will suffice if range<=int.MaxValue. This allows us to cast to an int and gain
+ // a little more performance.
+ return lowerBound + (int)((REAL_UNIT_INT * (double)(int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))))) * (double)range);
+ }
+
+ ///
+ /// Generates a random double. Values returned are from 0.0 up to but not including 1.0.
+ ///
+ public double NextDouble()
+ {
+ uint t = (x ^ (x << 11));
+ x = y; y = z; z = w;
+
+ // Here we can gain a 2x speed improvement by generating a value that can be cast to
+ // an int instead of the more easily available uint. If we then explicitly cast to an
+ // int the compiler will then cast the int to a double to perform the multiplication,
+ // this final cast is a lot faster than casting from a uint to a double. The extra cast
+ // to an int is very fast (the allocated bits remain the same) and so the overall effect
+ // of the extra cast is a significant performance improvement.
+ //
+ // Also note that the loss of one bit of precision is equivalent to what occurs within
+ // System.Random.
+ return (REAL_UNIT_INT * (int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)))));
+ }
+
+ ///
+ /// Generates a random double. Values returned are from 0.0 up to but not including 1.0.
+ ///
+ public float NextFloat()
+ {
+ uint x = this.x, y = this.y, z = this.z, w = this.w;
+ uint t = (x ^ (x << 11));
+ x = y; y = z; z = w;
+ w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
+ var value = FLOAT_UNIT_INT * (int)(0x7FFFFFFF & w);
+ this.x = x; this.y = y; this.z = z; this.w = w;
+ return value;
+ }
+
+ ///
+ /// Fills the provided byte array with random floats.
+ ///
+ public void NextFloats(Span buffer)
+ {
+ uint x = this.x, y = this.y, z = this.z, w = this.w;
+ int i = 0;
+ uint t;
+ for (int bound = buffer.Length; i < bound;)
+ {
+ t = (x ^ (x << 11));
+ x = y; y = z; z = w;
+ w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
+
+ buffer[i++] = FLOAT_UNIT_INT * (int)(0x7FFFFFFF & w);
+ }
+
+ this.x = x; this.y = y; this.z = z; this.w = w;
+ }
+
+
+ ///
+ /// Fills the provided byte array with random bytes.
+ /// This method is functionally equivalent to System.Random.NextBytes().
+ ///
+ public void NextBytes(byte[] buffer)
+ {
+ // Fill up the bulk of the buffer in chunks of 4 bytes at a time.
+ uint x = this.x, y = this.y, z = this.z, w = this.w;
+ int i = 0;
+ uint t;
+ for (int bound = buffer.Length - 3; i < bound;)
+ {
+ // Generate 4 bytes.
+ // Increased performance is achieved by generating 4 random bytes per loop.
+ // Also note that no mask needs to be applied to zero out the higher order bytes before
+ // casting because the cast ignores thos bytes. Thanks to Stefan Troschütz for pointing this out.
+ t = (x ^ (x << 11));
+ x = y; y = z; z = w;
+ w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
+
+ buffer[i++] = (byte)w;
+ buffer[i++] = (byte)(w >> 8);
+ buffer[i++] = (byte)(w >> 16);
+ buffer[i++] = (byte)(w >> 24);
+ }
+
+ // Fill up any remaining bytes in the buffer.
+ if (i < buffer.Length)
+ {
+ // Generate 4 bytes.
+ t = (x ^ (x << 11));
+ x = y; y = z; z = w;
+ w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
+
+ buffer[i++] = (byte)w;
+ if (i < buffer.Length)
+ {
+ buffer[i++] = (byte)(w >> 8);
+ if (i < buffer.Length)
+ {
+ buffer[i++] = (byte)(w >> 16);
+ if (i < buffer.Length)
+ {
+ buffer[i] = (byte)(w >> 24);
+ }
+ }
+ }
+ }
+ this.x = x; this.y = y; this.z = z; this.w = w;
+ }
+
+ ///
+ /// Fills the provided byte array with random bytes.
+ /// This method is functionally equivalent to System.Random.NextBytes().
+ ///
+ public void NextBytes(Span buffer)
+ {
+ // Fill up the bulk of the buffer in chunks of 4 bytes at a time.
+ uint x = this.x, y = this.y, z = this.z, w = this.w;
+ int i = 0;
+ uint t;
+ for (int bound = buffer.Length - 3; i < bound;)
+ {
+ // Generate 4 bytes.
+ // Increased performance is achieved by generating 4 random bytes per loop.
+ // Also note that no mask needs to be applied to zero out the higher order bytes before
+ // casting because the cast ignores thos bytes. Thanks to Stefan Troschütz for pointing this out.
+ t = (x ^ (x << 11));
+ x = y; y = z; z = w;
+ w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
+
+ buffer[i++] = (byte)w;
+ buffer[i++] = (byte)(w >> 8);
+ buffer[i++] = (byte)(w >> 16);
+ buffer[i++] = (byte)(w >> 24);
+ }
+
+ // Fill up any remaining bytes in the buffer.
+ if (i < buffer.Length)
+ {
+ // Generate 4 bytes.
+ t = (x ^ (x << 11));
+ x = y; y = z; z = w;
+ w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
+
+ buffer[i++] = (byte)w;
+ if (i < buffer.Length)
+ {
+ buffer[i++] = (byte)(w >> 8);
+ if (i < buffer.Length)
+ {
+ buffer[i++] = (byte)(w >> 16);
+ if (i < buffer.Length)
+ {
+ buffer[i] = (byte)(w >> 24);
+ }
+ }
+ }
+ }
+ this.x = x; this.y = y; this.z = z; this.w = w;
+ }
+
+ ///
+ /// Generates a uint. Values returned are over the full range of a uint,
+ /// uint.MinValue to uint.MaxValue, inclusive.
+ ///
+ /// This is the fastest method for generating a single random number because the underlying
+ /// random number generator algorithm generates 32 random bits that can be cast directly to
+ /// a uint.
+ ///
+ public uint NextUInt()
+ {
+ uint t = (x ^ (x << 11));
+ x = y; y = z; z = w;
+ return (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)));
+ }
+
+ ///
+ /// Generates a random int over the range 0 to int.MaxValue, inclusive.
+ /// This method differs from Next() only in that the range is 0 to int.MaxValue
+ /// and not 0 to int.MaxValue-1.
+ ///
+ /// The slight difference in range means this method is slightly faster than Next()
+ /// but is not functionally equivalent to System.Random.Next().
+ ///
+ public int NextInt()
+ {
+ uint t = (x ^ (x << 11));
+ x = y; y = z; z = w;
+ return (int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))));
+ }
+
+
+ // Buffer 32 bits in bitBuffer, return 1 at a time, keep track of how many have been returned
+ // with bitBufferIdx.
+ uint bitBuffer;
+ uint bitMask = 1;
+
+ ///
+ /// Generates a single random bit.
+ /// This method's performance is improved by generating 32 bits in one operation and storing them
+ /// ready for future calls.
+ ///
+ public bool NextBool()
+ {
+ if (bitMask == 1)
+ {
+ // Generate 32 more bits.
+ uint t = (x ^ (x << 11));
+ x = y; y = z; z = w;
+ bitBuffer = w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
+
+ // Reset the bitMask that tells us which bit to read next.
+ bitMask = 0x80000000;
+ return (bitBuffer & bitMask) == 0;
+ }
+
+ return (bitBuffer & (bitMask >>= 1)) == 0;
+ }
+ }
+}
diff --git a/ZeroLevel.HNSW/Services/VectorSet.cs b/ZeroLevel.HNSW/Services/VectorSet.cs
new file mode 100644
index 0000000..fd5d38f
--- /dev/null
+++ b/ZeroLevel.HNSW/Services/VectorSet.cs
@@ -0,0 +1,31 @@
+using System.Collections.Generic;
+using System.Threading;
+
+namespace ZeroLevel.HNSW
+{
+ public class VectorSet
+ {
+ public IList _set = new List();
+
+ public T this[int index] => _set[index];
+
+ SpinLock _lock = new SpinLock();
+
+ public int Append(T vector)
+ {
+ bool gotLock = false;
+ gotLock = false;
+ try
+ {
+ _lock.Enter(ref gotLock);
+ _set.Add(vector);
+ return _set.Count - 1;
+ }
+ finally
+ {
+ // Only give up the lock if you actually acquired it
+ if (gotLock) _lock.Exit();
+ }
+ }
+ }
+}
diff --git a/ZeroLevel.HNSW/Services/VectorUtils.cs b/ZeroLevel.HNSW/Services/VectorUtils.cs
new file mode 100644
index 0000000..c4a72eb
--- /dev/null
+++ b/ZeroLevel.HNSW/Services/VectorUtils.cs
@@ -0,0 +1,78 @@
+using System;
+using System.Collections.Generic;
+using System.Numerics;
+
+namespace ZeroLevel.HNSW
+{
+ public static class VectorUtils
+ {
+ public static float Magnitude(IList vector)
+ {
+ float magnitude = 0.0f;
+ for (int i = 0; i < vector.Count; ++i)
+ {
+ magnitude += vector[i] * vector[i];
+ }
+
+ return (float)Math.Sqrt(magnitude);
+ }
+
+ public static void Normalize(IList vector)
+ {
+ float normFactor = 1 / Magnitude(vector);
+ for (int i = 0; i < vector.Count; ++i)
+ {
+ vector[i] *= normFactor;
+ }
+ }
+
+ public static float MagnitudeSIMD(float[] vector)
+ {
+ if (!Vector.IsHardwareAccelerated)
+ {
+ throw new NotSupportedException($"{nameof(VectorUtils.NormalizeSIMD)} is not supported");
+ }
+
+ float magnitude = 0.0f;
+ int step = Vector.Count;
+
+ int i, to = vector.Length - step;
+ for (i = 0; i <= to; i += Vector.Count)
+ {
+ var vi = new Vector(vector, i);
+ magnitude += Vector.Dot(vi, vi);
+ }
+
+ for (; i < vector.Length; ++i)
+ {
+ magnitude += vector[i] * vector[i];
+ }
+
+ return (float)Math.Sqrt(magnitude);
+ }
+
+ public static void NormalizeSIMD(float[] vector)
+ {
+ if (!Vector.IsHardwareAccelerated)
+ {
+ throw new NotSupportedException($"{nameof(VectorUtils.NormalizeSIMD)} is not supported");
+ }
+
+ float normFactor = 1f / MagnitudeSIMD(vector);
+ int step = Vector.Count;
+
+ int i, to = vector.Length - step;
+ for (i = 0; i <= to; i += step)
+ {
+ var vi = new Vector(vector, i);
+ vi = Vector.Multiply(normFactor, vi);
+ vi.CopyTo(vector, i);
+ }
+
+ for (; i < vector.Length; ++i)
+ {
+ vector[i] *= normFactor;
+ }
+ }
+ }
+}
diff --git a/ZeroLevel.HNSW/Services/VisitedBitSet.cs b/ZeroLevel.HNSW/Services/VisitedBitSet.cs
new file mode 100644
index 0000000..16de598
--- /dev/null
+++ b/ZeroLevel.HNSW/Services/VisitedBitSet.cs
@@ -0,0 +1,32 @@
+using System;
+
+namespace ZeroLevel.HNSW
+{
+ internal class VisitedBitSet
+ {
+ // bit map
+ private int[] Buffer;
+
+ internal VisitedBitSet(int nodesCount, int M)
+ {
+ Buffer = new int[(nodesCount >> 5) + M + 1];
+ }
+
+ internal bool Contains(int nodeId)
+ {
+ int carrier = Buffer[nodeId >> 5];
+ return ((1 << (nodeId & 31)) & carrier) != 0;
+ }
+
+ internal void Add(int nodeId)
+ {
+ int mask = 1 << (nodeId & 31);
+ Buffer[nodeId >> 5] |= mask;
+ }
+
+ internal void Clear()
+ {
+ Array.Clear(Buffer, 0, Buffer.Length);
+ }
+ }
+}
diff --git a/ZeroLevel.HNSW/SmallWorld.cs b/ZeroLevel.HNSW/SmallWorld.cs
new file mode 100644
index 0000000..d177a69
--- /dev/null
+++ b/ZeroLevel.HNSW/SmallWorld.cs
@@ -0,0 +1,124 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace ZeroLevel.HNSW
+{
+ public class SmallWorld
+ {
+ private readonly NSWOptions _options;
+ private readonly VectorSet _vectors;
+ private readonly Layer[] _layers;
+
+ private Layer EnterPointsLayer => _layers[_layers.Length - 1];
+ private Layer LastLayer => _layers[0];
+
+ public SmallWorld(NSWOptions options)
+ {
+ _options = options;
+ _vectors = new VectorSet();
+ _layers = new Layer[_options.LayersCount];
+ for (int i = 0; i < _options.LayersCount; i++)
+ {
+ _layers[i] = new Layer(_options, _vectors);
+ }
+ }
+
+ public IEnumerable<(int, TItem[])> Search(TItem vector, int k, HashSet activeNodes = null)
+ {
+ return Enumerable.Empty<(int, TItem[])>();
+ }
+
+ public int[] AddItems(IEnumerable vectors)
+ {
+ var insert = vectors.ToArray();
+ var ids = new int[insert.Length];
+ for (int i = 0; i < insert.Length; i++)
+ {
+ var item = insert[i];
+ ids[i] = Insert(item);
+ }
+ return ids;
+ }
+
+ public int Insert(TItem item)
+ {
+ var id = _vectors.Append(item);
+ INSERT(id);
+ return id;
+ }
+
+ #region https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf
+ ///
+ /// Algorithm 1
+ ///
+ /// new element
+ public void INSERT(int q)
+ {
+ // W ← ∅ // list for the currently found nearest elements
+ IDictionary W;
+ // ep ← get enter point for hnsw
+ var ep = EnterPointsLayer.GetEntryPointFor(q);
+ // L ← level of ep // top layer for hnsw
+ var L = _layers.Length - 1;
+ // l ← ⌊-ln(unif(0..1))∙mL⌋ // new element’s level
+ int l = DefaultRandomGenerator.Instance.Next(0, _options.LayersCount - 1);
+ // for lc ← L … l+1
+ for (int lc = L; lc > l; lc--)
+ {
+ // W ← SEARCH-LAYER(q, ep, ef = 1, lc)
+ W = _layers[lc].SEARCH_LAYER(q, ep, 1);
+ // ep ← get the nearest element from W to q
+ ep = W.OrderBy(p => p.Value).First().Key;
+ }
+ //for lc ← min(L, l) … 0
+ for (int lc = Math.Min(L, l); lc >= 0; lc--)
+ {
+ // W ← SEARCH - LAYER(q, ep, efConstruction, lc)
+ W = _layers[lc].SEARCH_LAYER(q, ep, _options.EFConstruction);
+ // neighbors ← SELECT-NEIGHBORS(q, W, M, lc) // alg. 3 or alg. 4
+ var neighbors = _layers[lc].SELECT_NEIGHBORS_SIMPLE(q, W);
+ // add bidirectionall connectionts from neighbors to q at layer lc
+ // for each e ∈ neighbors // shrink connections if needed
+ foreach (var e in neighbors)
+ {
+ // eConn ← neighbourhood(e) at layer lc
+ _layers[lc].AddBidirectionallConnectionts(q, e.Key, e.Value);
+ }
+ // ep ← W
+ ep = W.OrderBy(p => p.Value).First().Key;
+ }
+ // if l > L
+ // set enter point for hnsw to q
+ }
+
+ ///
+ /// Algorithm 5
+ ///
+ /// query element
+ /// number of nearest neighbors to return
+ /// : K nearest elements to q
+ public IList K_NN_SEARCH(int q, int K)
+ {
+ // W ← ∅ // set for the current nearest elements
+ IDictionary W;
+ // ep ← get enter point for hnsw
+ var ep = EnterPointsLayer.GetEntryPointFor(q);
+ // L ← level of ep // top layer for hnsw
+ var L = _options.LayersCount - 1;
+ // for lc ← L … 1
+ for (var lc = L; lc > 0; lc--)
+ {
+ // W ← SEARCH-LAYER(q, ep, ef = 1, lc)
+ W = _layers[lc].SEARCH_LAYER(q, ep, 1);
+ // ep ← get nearest element from W to q
+ ep = W.OrderBy(p => p.Value).First().Key;
+ }
+ // W ← SEARCH-LAYER(q, ep, ef, lc =0)
+ W = LastLayer.SEARCH_LAYER(q, ep, _options.EF);
+ // return K nearest elements from W to q
+ return W.OrderBy(p => p.Value).Take(K).Select(p => p.Key).ToList();
+ }
+ #endregion
+ }
+}
diff --git a/ZeroLevel.HNSW/ZeroLevel.HNSW.csproj b/ZeroLevel.HNSW/ZeroLevel.HNSW.csproj
new file mode 100644
index 0000000..09920fe
--- /dev/null
+++ b/ZeroLevel.HNSW/ZeroLevel.HNSW.csproj
@@ -0,0 +1,11 @@
+
+
+
+ net5.0
+
+
+
+
+
+
+
diff --git a/ZeroLevel.sln b/ZeroLevel.sln
index 6534974..aa957a7 100644
--- a/ZeroLevel.sln
+++ b/ZeroLevel.sln
@@ -57,7 +57,11 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Client", "ConnectionTest\Cl
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Server", "ConnectionTest\Server\Server.csproj", "{3496A688-0749-48C2-BD60-ABB42A5C17C9}"
EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ZeroLevel.Qdrant", "ZeroLevel.Qdrant\ZeroLevel.Qdrant.csproj", "{7188B89E-96EB-4EFB-AAFB-D0A823031F99}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ZeroLevel.Qdrant", "ZeroLevel.Qdrant\ZeroLevel.Qdrant.csproj", "{7188B89E-96EB-4EFB-AAFB-D0A823031F99}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ZeroLevel.HNSW", "ZeroLevel.HNSW\ZeroLevel.HNSW.csproj", "{1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HNSWDemo", "TestHNSW\HNSWDemo\HNSWDemo.csproj", "{E0E9EC21-B958-4018-AE30-67DB88EFCB90}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -285,6 +289,30 @@ Global
{7188B89E-96EB-4EFB-AAFB-D0A823031F99}.Release|x64.Build.0 = Release|x64
{7188B89E-96EB-4EFB-AAFB-D0A823031F99}.Release|x86.ActiveCfg = Release|x86
{7188B89E-96EB-4EFB-AAFB-D0A823031F99}.Release|x86.Build.0 = Release|x86
+ {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Debug|x64.ActiveCfg = Debug|Any CPU
+ {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Debug|x64.Build.0 = Debug|Any CPU
+ {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Debug|x86.ActiveCfg = Debug|Any CPU
+ {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Debug|x86.Build.0 = Debug|Any CPU
+ {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Release|Any CPU.Build.0 = Release|Any CPU
+ {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Release|x64.ActiveCfg = Release|Any CPU
+ {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Release|x64.Build.0 = Release|Any CPU
+ {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Release|x86.ActiveCfg = Release|Any CPU
+ {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Release|x86.Build.0 = Release|Any CPU
+ {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Debug|x64.ActiveCfg = Debug|Any CPU
+ {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Debug|x64.Build.0 = Debug|Any CPU
+ {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Debug|x86.ActiveCfg = Debug|Any CPU
+ {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Debug|x86.Build.0 = Debug|Any CPU
+ {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Release|Any CPU.Build.0 = Release|Any CPU
+ {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Release|x64.ActiveCfg = Release|Any CPU
+ {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Release|x64.Build.0 = Release|Any CPU
+ {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Release|x86.ActiveCfg = Release|Any CPU
+ {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/ZeroLevel/Services/Extensions/NumberBitsExtensions.cs b/ZeroLevel/Services/Extensions/NumberBitsExtensions.cs
new file mode 100644
index 0000000..47876d2
--- /dev/null
+++ b/ZeroLevel/Services/Extensions/NumberBitsExtensions.cs
@@ -0,0 +1,58 @@
+namespace ZeroLevel
+{
+ public static class NumberBitsExtensions
+ {
+ private const int ONE_I = 1;
+ private const uint ONE_UI = 1U;
+ private const long ONE_L = 1L;
+ private const ulong ONE_UL = 1UL;
+
+ public static ulong SetBit(this ulong k, int position)
+ {
+ k |= (ONE_UL << position);
+ return k;
+ }
+
+ public static ulong ResetBit(this ulong k, int position)
+ {
+ k &= ~(ONE_UL << position);
+ return k;
+ }
+
+ public static long SetBit(this long k, int position)
+ {
+ k |= (ONE_L << position);
+ return k;
+ }
+
+ public static long ResetBit(this long k, int position)
+ {
+ k &= ~(ONE_L << position);
+ return k;
+ }
+
+ public static int SetBit(this int k, int position)
+ {
+ k |= (ONE_I << position);
+ return k;
+ }
+
+ public static int ResetBit(this int k, int position)
+ {
+ k &= ~(ONE_I << position);
+ return k;
+ }
+
+ public static uint SetBit(this uint k, int position)
+ {
+ k |= (ONE_UI << position);
+ return k;
+ }
+
+ public static uint ResetBit(this uint k, int position)
+ {
+ k &= ~(ONE_UI << position);
+ return k;
+ }
+ }
+}
diff --git a/ZeroLevel/Services/Math/SoftMax.cs b/ZeroLevel/Services/Mathemathics/SoftMax.cs
similarity index 92%
rename from ZeroLevel/Services/Math/SoftMax.cs
rename to ZeroLevel/Services/Mathemathics/SoftMax.cs
index ac8d1de..2d5a8f4 100644
--- a/ZeroLevel/Services/Math/SoftMax.cs
+++ b/ZeroLevel/Services/Mathemathics/SoftMax.cs
@@ -1,6 +1,6 @@
using System;
-namespace ZeroLevel.Services.Mathematic
+namespace ZeroLevel.Services.Mathemathics
{
public static class SoftMax
{
diff --git a/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs b/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs
index 8e6ebb4..b6bd01a 100644
--- a/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs
+++ b/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs
@@ -265,6 +265,7 @@ namespace ZeroLevel.Services.Serialization
public void Dispose()
{
+ _stream.Flush();
_stream.Dispose();
}
diff --git a/ZeroLevel/ZeroLevel.csproj b/ZeroLevel/ZeroLevel.csproj
index 85d2892..72cd91b 100644
--- a/ZeroLevel/ZeroLevel.csproj
+++ b/ZeroLevel/ZeroLevel.csproj
@@ -59,4 +59,8 @@
+
+
+
+