diff --git a/TestApp/Program.cs b/TestApp/Program.cs index 06a9951..593761c 100644 --- a/TestApp/Program.cs +++ b/TestApp/Program.cs @@ -6,6 +6,7 @@ using ZeroLevel; using ZeroLevel.Logging; using ZeroLevel.Network; using ZeroLevel.Services.Serialization; +using ZeroLevel.Services.Trees; namespace TestApp { @@ -68,5 +69,16 @@ namespace TestApp Thread.Sleep(2000); } } + + public static double[] Generate(int vector_size) + { + var rnd = new Random((int)Environment.TickCount); + var vector = new double[vector_size]; + for (int i = 0; i < vector_size; i++) + { + vector[i] = 50.0d - rnd.NextDouble() * 100.0d; + } + return vector; + } } } \ No newline at end of file diff --git a/TestHNSW/HNSWDemo/HNSWDemo.csproj b/TestHNSW/HNSWDemo/HNSWDemo.csproj new file mode 100644 index 0000000..9b31502 --- /dev/null +++ b/TestHNSW/HNSWDemo/HNSWDemo.csproj @@ -0,0 +1,12 @@ + + + + Exe + net5.0 + + + + + + + diff --git a/TestHNSW/HNSWDemo/Program.cs b/TestHNSW/HNSWDemo/Program.cs new file mode 100644 index 0000000..07df108 --- /dev/null +++ b/TestHNSW/HNSWDemo/Program.cs @@ -0,0 +1,121 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Threading; +using ZeroLevel.HNSW; + +namespace HNSWDemo +{ + class Program + { + public enum Gender + { + Unknown, Male, Feemale + } + + public class Person + { + public Gender Gender { get; set; } + public int Age { get; set; } + public long Number { get; set; } + + private static (float[], Person) Generate(int vector_size) + { + var rnd = new Random((int)Environment.TickCount); + var vector = new float[vector_size]; + DefaultRandomGenerator.Instance.NextFloats(vector); + VectorUtils.NormalizeSIMD(vector); + var p = new Person(); + p.Age = rnd.Next(15, 80); + var gr = rnd.Next(0, 3); + p.Gender = (gr == 0) ? Gender.Male : (gr == 1) ? Gender.Feemale : Gender.Unknown; + p.Number = CreateNumber(rnd); + return (vector, p); + } + + public static List<(float[], Person)> GenerateRandom(int vectorSize, int vectorsCount) + { + var vectors = new List<(float[], Person)>(); + for (int i = 0; i < vectorsCount; i++) + { + vectors.Add(Generate(vectorSize)); + } + return vectors; + } + + static HashSet _exists = new HashSet(); + private static long CreateNumber(Random rnd) + { + long start_number; + do + { + start_number = 79600000000L; + start_number = start_number + rnd.Next(4, 8) * 10000000; + start_number += rnd.Next(0, 1000000); + } + while (_exists.Add(start_number) == false); + return start_number; + } + } + + private static List RandomVectors(int vectorSize, int vectorsCount) + { + var vectors = new List(); + for (int i = 0; i < vectorsCount; i++) + { + var vector = new float[vectorSize]; + DefaultRandomGenerator.Instance.NextFloats(vector); + VectorUtils.NormalizeSIMD(vector); + vectors.Add(vector); + } + return vectors; + } + + private static Dictionary _database = new Dictionary(); + + static void Main(string[] args) + { + var dimensionality = 128; + var testCount = 1000; + var count = 100000; + var batchSize = 5000; + var samples = Person.GenerateRandom(dimensionality, count); + + var sw = new Stopwatch(); + var world = new SmallWorld(NSWOptions.Create(6, 4, 120, 120, CosineDistance.ForUnits)); + for (int i = 0; i < (count / batchSize); i++) + { + var batch = samples.Skip(i * batchSize).Take(batchSize).ToArray(); + sw.Restart(); + var ids = world.AddItems(batch.Select(i => i.Item1).ToArray()); + sw.Stop(); + Console.WriteLine($"Batch [{i}]. Insert {ids.Length} items on {sw.ElapsedMilliseconds} ms"); + for (int bi = 0; bi < batch.Length; bi++) + { + _database.Add(ids[bi], batch[bi].Item2); + } + } + + var vectors = RandomVectors(dimensionality, testCount); + + //HNSWFilter filter = new HNSWFilter(ids => ids.Where(id => { var p = _database[id]; return p.Age > 45 && p.Gender == Gender.Feemale; })); + +/*var fackupCount = 0; + foreach (var v in vectors) + { + var result = world.Search(v, 10, filter); + foreach (var r in result) + { + if (_database[r.Item1].Age <= 45 || _database[r.Item1].Gender != Gender.Feemale) + { + Interlocked.Increment(ref fackupCount); + } + } + }*/ + + //Console.WriteLine($"Completed. Fackup count: {fackupCount}"); + Console.ReadKey(); + } + } +} diff --git a/ZeroLevel.HNSW/Layer.cs b/ZeroLevel.HNSW/Layer.cs new file mode 100644 index 0000000..7fb8f7c --- /dev/null +++ b/ZeroLevel.HNSW/Layer.cs @@ -0,0 +1,284 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace ZeroLevel.HNSW +{ + /// + /// NSW graph + /// + internal sealed class Layer + { + private readonly NSWOptions _options; + private readonly VectorSet _vectors; + private CompactBiDirectionalLinksSet _links = new CompactBiDirectionalLinksSet(); + + public Layer(NSWOptions options, VectorSet vectors) + { + _options = options; + _vectors = vectors; + } + + public void AddBidirectionallConnectionts(int q, int p, float qpDistance) + { + // поиск в ширину ближайших узлов к найденному + var nearest = _links.FindLinksForId(p).ToArray(); + // если у найденного узла максимальное количество связей + // if │eConn│ > Mmax // shrink connections of e + if (nearest.Length >= _options.M) + { + // ищем связь с самой большой дистанцией + float distance = nearest[0].Item3; + int index = 0; + for (int ni = 1; ni < nearest.Length; ni++) + { + if (nearest[ni].Item3 > distance) + { + index = ni; + distance = nearest[ni].Item3; + } + } + // делаем перелинковку вставляя новый узел между найденными + var id1 = nearest[index].Item1; + var id2 = nearest[index].Item2; + _links.Relink(id1, id2, q, qpDistance, _options.Distance(_vectors[id2], _vectors[q])); + } + else + { + // добавляем связь нового узла к найденному + _links.Add(q, p, qpDistance); + } + } + + public int GetEntryPointFor(int q) + { + var randomLinkId = DefaultRandomGenerator.Instance.Next(0, _links.Count); + var entryId = _links[randomLinkId].Item1; + var v = new VisitedBitSet(_vectors._set.Count, _options.M); + // v ← ep // set of visited elements + var (ep, ed) = DFS_SearchMinFrom(entryId, q, v); + return ep; + } + + private (int, float) DFS_SearchMinFrom(int entryId, int id, VisitedBitSet visited) + { + visited.Add(entryId); + int candidate = entryId; + var candidateDistance = _options.Distance(_vectors[entryId], _vectors[id]); + int counter = 0; + do + { + var (mid, dist) = GetMinNearest(visited, entryId, candidate, candidateDistance); + if (dist > candidateDistance) + { + break; + } + candidate = mid; + candidateDistance = dist; + + counter++; + } while (counter < _options.EFConstruction); + return (candidate, candidateDistance); + } + + private (int, float) GetMinNearest(VisitedBitSet visited, int entryId, int id, float entryDistance) + { + var minId = entryId; + var minDist = entryDistance; + foreach (var candidate in _links.FindLinksForId(entryId).Select(l => l.Item2)) + { + if (visited.Contains(candidate) == false) + { + var dist = _options.Distance(_vectors[candidate], _vectors[id]); + if (dist < minDist) + { + minDist = dist; + minId = candidate; + } + visited.Add(candidate); + } + } + return (minId, minDist); + } + + #region Implementation of https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf + + /// + /// Algorithm 2 + /// + /// query element + /// enter points ep + /// Output: ef closest neighbors to q + public IDictionary SEARCH_LAYER(int q, int ep, int ef) + { + var v = new VisitedBitSet(_vectors._set.Count, _options.M); + // v ← ep // set of visited elements + v.Add(ep); + // C ← ep // set of candidates + var C = new Dictionary(); + C.Add(ep, _options.Distance(_vectors[ep], _vectors[q])); + // W ← ep // dynamic list of found nearest neighbors + var W = new Dictionary(); + W.Add(ep, C[ep]); + // while │C│ > 0 + while (C.Count > 0) + { + // c ← extract nearest element from C to q + var nearest = W.OrderBy(p => p.Value).First(); + var c = nearest.Key; + var md = nearest.Value; + // var (c, md) = GetMinimalDistanceIndex(C, q); + C.Remove(c); + // f ← get furthest element from W to q + var f = W.OrderBy(p => p.Value).First().Key; + //var f = GetMaximalDistanceIndex(W, q); + // if distance(c, q) > distance(f, q) + if (_options.Distance(_vectors[c], _vectors[q]) > _options.Distance(_vectors[f], _vectors[q])) + { + // break // all elements in W are evaluated + break; + } + // for each e ∈ neighbourhood(c) at layer lc // update C and W + foreach (var l in _links.FindLinksForId(c)) + { + var e = l.Item2; + // if e ∉ v + if (v.Contains(e) == false) + { + // v ← v ⋃ e + v.Add(e); + // f ← get furthest element from W to q + f = W.OrderByDescending(p => p.Value).First().Key; + //f = GetMaximalDistanceIndex(W, q); + // if distance(e, q) < distance(f, q) or │W│ < ef + var ed = _options.Distance(_vectors[e], _vectors[q]); + if (ed > _options.Distance(_vectors[f], _vectors[q]) + || W.Count < ef) + { + // C ← C ⋃ e + C.Add(e, ed); + // W ← W ⋃ e + W.Add(e, ed); + // if │W│ > ef + if (W.Count > ef) + { + // remove furthest element from W to q + f = W.OrderByDescending(p => p.Value).First().Key; + //f = GetMaximalDistanceIndex(W, q); + W.Remove(f); + } + } + } + } + } + // return W + return W; + } + + /// + /// Algorithm 3 + /// + /// base element + /// candidate elements + /// Output: M nearest elements to q + public IDictionary SELECT_NEIGHBORS_SIMPLE(int q, IDictionary C) + { + if (C.Count <= _options.M) + { + return new Dictionary(C); + } + var output = new Dictionary(); + // return M nearest elements from C to q + return new Dictionary(C.OrderBy(p => p.Value).Take(_options.M)); + } + + /// + /// Algorithm 4 + /// + /// base element + /// candidate elements + /// flag indicating whether or not to extend candidate list + /// flag indicating whether or not to add discarded elements + /// Output: M elements selected by the heuristic + public IDictionary SELECT_NEIGHBORS_HEURISTIC(int q, IDictionary C, bool extendCandidates, bool keepPrunedConnections) + { + // R ← ∅ + var R = new Dictionary(); + // W ← C // working queue for the candidates + var W = new List(C.Select(p => p.Key)); + // if extendCandidates // extend candidates by their neighbors + if (extendCandidates) + { + // for each e ∈ C + foreach (var e in C) + { + // for each e_adj ∈ neighbourhood(e) at layer lc + foreach (var l in _links.FindLinksForId(e.Key)) + { + var e_adj = l.Item2; + // if eadj ∉ W + if (W.Contains(e_adj) == false) + { + // W ← W ⋃ eadj + W.Add(e_adj); + } + } + } + } + // Wd ← ∅ // queue for the discarded candidates + var Wd = new Dictionary(); + // while │W│ > 0 and │R│< M + while (W.Count > 0 && R.Count < _options.M) + { + // e ← extract nearest element from W to q + var (e, ed) = GetMinimalDistanceIndex(W, q); + W.Remove(e); + // if e is closer to q compared to any element from R + if (ed < R.Min(pair => pair.Value)) + { + // R ← R ⋃ e + R.Add(e, ed); + } + // else + { + // Wd ← Wd ⋃ e + Wd.Add(e, ed); + } + // if keepPrunedConnections // add some of the discarded // connections from Wd + if (keepPrunedConnections) + { + // while │Wd│> 0 and │R│< M + while (Wd.Count > 0 && R.Count < _options.M) + { + // R ← R ⋃ extract nearest element from Wd to q + var nearest = Wd.Aggregate((l, r) => l.Value < r.Value ? l : r); + Wd.Remove(nearest.Key); + R.Add(nearest.Key, nearest.Value); + } + } + } + // return R + return R; + } + + + #endregion + + + private (int, float) GetMinimalDistanceIndex(IList self, int q) + { + float min = _options.Distance(_vectors[self[0]], _vectors[q]); + int minIndex = 0; + for (int i = 1; i < self.Count; ++i) + { + var dist = _options.Distance(_vectors[self[i]], _vectors[q]); + if (dist < min) + { + min = self[i]; + minIndex = i; + } + } + return (minIndex, min); + } + } +} diff --git a/ZeroLevel.HNSW/Model/NSWOptions.cs b/ZeroLevel.HNSW/Model/NSWOptions.cs new file mode 100644 index 0000000..c888eaa --- /dev/null +++ b/ZeroLevel.HNSW/Model/NSWOptions.cs @@ -0,0 +1,42 @@ +using System; + +namespace ZeroLevel.HNSW +{ + public sealed class NSWOptions + { + public const int FARTHEST_DIVIDER = 3; + + /// + /// Mox node connections on Layer + /// + public readonly int M; + + /// + /// Max search buffer + /// + public readonly int EF; + /// + /// Max search buffer for inserting + /// + public readonly int EFConstruction; + /// + /// Distance function beetween vectors + /// + public readonly Func Distance; + + public readonly int LayersCount; + + + private NSWOptions(int layersCount, int m, int ef, int ef_construction, Func distance) + { + LayersCount = layersCount; + M = m; + EF = ef; + EFConstruction = ef_construction; + Distance = distance; + } + + public static NSWOptions Create(int layersCount, int M, int EF, int EF_construction, Func distance) => + new NSWOptions(layersCount, M, EF, EF_construction, distance); + } +} diff --git a/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs b/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs new file mode 100644 index 0000000..779acdd --- /dev/null +++ b/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs @@ -0,0 +1,250 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; + +namespace ZeroLevel.HNSW +{ + internal sealed class CompactBiDirectionalLinksSet + : IDisposable + { + private readonly ReaderWriterLockSlim _rwLock = new ReaderWriterLockSlim(); + + private const int HALF_LONG_BITS = 32; + + private SortedList _set = new SortedList(); + + public (int, int, float) this[int index] + { + get + { + var k = _set.Keys[index]; + var d = _set.Values[index]; + var id1 = (int)(k >> HALF_LONG_BITS); + var id2 = (int)(k - (((long)id1) << HALF_LONG_BITS)); + return (id1, id2, d); + } + } + + public int Count => _set.Count; + + /// + /// Разрывает связи id1 - id2 и id2 - id1, и строит новые id1 - id, id - id1 + /// + public void Relink(int id1, int id2, int id, float distance) + { + long k1old = (((long)(id1)) << HALF_LONG_BITS) + id2; + long k2old = (((long)(id2)) << HALF_LONG_BITS) + id1; + + long k1new = (((long)(id1)) << HALF_LONG_BITS) + id; + long k2new = (((long)(id)) << HALF_LONG_BITS) + id1; + + _rwLock.EnterWriteLock(); + try + { + _set.Remove(k1old); + _set.Remove(k2old); + _set.Add(k1new, distance); + _set.Add(k2new, distance); + } + finally + { + _rwLock.ExitWriteLock(); + } + } + + /// + /// Разрывает связи id1 - id2 и id2 - id1, и строит новые id1 - id, id - id1, id2 - id, id - id2 + /// + public void Relink(int id1, int id2, int id, float distanceToId1, float distanceToId2) + { + long k_id1_id2 = (((long)(id1)) << HALF_LONG_BITS) + id2; + long k_id2_id1 = (((long)(id2)) << HALF_LONG_BITS) + id1; + + long k_id_id1 = (((long)(id)) << HALF_LONG_BITS) + id1; + long k_id1_id = (((long)(id1)) << HALF_LONG_BITS) + id; + + long k_id_id2 = (((long)(id)) << HALF_LONG_BITS) + id2; + long k_id2_id = (((long)(id2)) << HALF_LONG_BITS) + id; + + _rwLock.EnterWriteLock(); + try + { + _set.Remove(k_id1_id2); + _set.Remove(k_id2_id1); + _set.Add(k_id_id1, distanceToId1); + _set.Add(k_id1_id, distanceToId1); + _set.Add(k_id_id2, distanceToId2); + _set.Add(k_id2_id, distanceToId2); + } + finally + { + _rwLock.ExitWriteLock(); + } + } + + public IEnumerable<(int, int, float)> FindLinksForId(int id) + { + _rwLock.EnterReadLock(); + try + { + foreach (var (k, v) in Search(_set, id)) + { + var id1 = (int)(k >> HALF_LONG_BITS); + var id2 = (int)(k - (((long)id1) << HALF_LONG_BITS)); + yield return (id1, id2, v); + } + } + finally + { + _rwLock.ExitReadLock(); + } + } + + public IEnumerable<(int, int, float)> Items() + { + _rwLock.EnterReadLock(); + try + { + foreach (var pair in _set) + { + var id1 = (int)(pair.Key >> HALF_LONG_BITS); + var id2 = (int)(pair.Key - (((long)id1) << HALF_LONG_BITS)); + yield return (id1, id2, pair.Value); + } + } + finally + { + _rwLock.ExitReadLock(); + } + } + + public void RemoveIndex(int id) + { + long[] forward; + long[] backward; + _rwLock.EnterReadLock(); + try + { + forward = Search(_set, id).Select(pair => pair.Item1).ToArray(); + backward = forward.Select(k => + { + var id1 = k >> HALF_LONG_BITS; + var id2 = k - (id1 << HALF_LONG_BITS); + return (id2 << HALF_LONG_BITS) + id1; + }).ToArray(); + } + finally + { + _rwLock.ExitReadLock(); + } + _rwLock.EnterWriteLock(); + try + { + foreach (var k in forward) + { + _set.Remove(k); + } + foreach (var k in backward) + { + _set.Remove(k); + } + } + finally + { + _rwLock.ExitWriteLock(); + } + } + + public bool Add(int id1, int id2, float distance) + { + _rwLock.EnterWriteLock(); + try + { + long k1 = (((long)(id1)) << HALF_LONG_BITS) + id2; + long k2 = (((long)(id2)) << HALF_LONG_BITS) + id1; + if (_set.ContainsKey(k1) == false) + { + _set.Add(k1, distance); + if (k1 != k2) + { + _set.Add(k2, distance); + } + return true; + } + } + finally + { + _rwLock.ExitWriteLock(); + } + return false; + } + + static IEnumerable<(long, float)> Search(SortedList set, int index) + { + long k = ((long)index) << HALF_LONG_BITS; + int left = 0; + int right = set.Count - 1; + int mid; + long test; + while (left < right) + { + mid = (right + left) / 2; + test = (set.Keys[mid] >> HALF_LONG_BITS) << HALF_LONG_BITS; + + if (left == mid || right == mid) + { + if (test == k) + { + return SearchByPosition(set, k, mid); + } + break; + } + if (test < k) + { + left = mid; + } + else + { + if (test == k) + { + return SearchByPosition(set, k, mid); + } + else + { + right = mid; + } + } + } + return Enumerable.Empty<(long, float)>(); + } + + static IEnumerable<(long, float)> SearchByPosition(SortedList set, long k, int position) + { + var start = position; + var end = position; + do + { + position--; + } while (position >= 0 && ((set.Keys[position] >> HALF_LONG_BITS) << HALF_LONG_BITS) == k); + start = position + 1; + position = end + 1; + while (position < set.Count && ((set.Keys[position] >> HALF_LONG_BITS) << HALF_LONG_BITS) == k) + { + position++; + } + end = position - 1; + for (int i = start; i <= end; i++) + { + yield return (set.Keys[i], set.Values[i]); + } + } + + public void Dispose() + { + _rwLock.Dispose(); + _set.Clear(); + _set = null; + } + } +} diff --git a/ZeroLevel.HNSW/Services/CosineDistance.cs b/ZeroLevel.HNSW/Services/CosineDistance.cs new file mode 100644 index 0000000..5531294 --- /dev/null +++ b/ZeroLevel.HNSW/Services/CosineDistance.cs @@ -0,0 +1,184 @@ +using System; +using System.Numerics; +using System.Runtime.CompilerServices; + +namespace ZeroLevel.HNSW +{ + /// + /// Calculates cosine similarity. + /// + /// + /// Intuition behind selecting float as a carrier. + /// + /// 1. In practice we work with vectors of dimensionality 100 and each component has value in range [-1; 1] + /// There certainly is a possibility of underflow. + /// But we assume that such cases are rare and we can rely on such underflow losses. + /// + /// 2. According to the article http://www.ti3.tuhh.de/paper/rump/JeaRu13.pdf + /// the floating point rounding error is less then 100 * 2^-24 * sqrt(100) * sqrt(100) < 0.0005960 + /// We deem such precision is satisfactory for out needs. + /// + public static class CosineDistance + { + /// + /// Calculates cosine distance without making any optimizations. + /// + /// Left vector. + /// Right vector. + /// Cosine distance between u and v. + public static float NonOptimized(float[] u, float[] v) + { + if (u.Length != v.Length) + { + throw new ArgumentException("Vectors have non-matching dimensions"); + } + + float dot = 0.0f; + float nru = 0.0f; + float nrv = 0.0f; + for (int i = 0; i < u.Length; ++i) + { + dot += u[i] * v[i]; + nru += u[i] * u[i]; + nrv += v[i] * v[i]; + } + + var similarity = dot / (float)(Math.Sqrt(nru) * Math.Sqrt(nrv)); + return 1 - similarity; + } + + /// + /// Calculates cosine distance with assumption that u and v are unit vectors. + /// + /// Left vector. + /// Right vector. + /// Cosine distance between u and v. + public static float ForUnits(float[] u, float[] v) + { + if (u.Length != v.Length) + { + throw new ArgumentException("Vectors have non-matching dimensions"); + } + + float dot = 0; + for (int i = 0; i < u.Length; ++i) + { + dot += u[i] * v[i]; + } + + return 1 - dot; + } + + /// + /// Calculates cosine distance optimized using SIMD instructions. + /// + /// Left vector. + /// Right vector. + /// Cosine distance between u and v. + public static float SIMD(float[] u, float[] v) + { + if (!Vector.IsHardwareAccelerated) + { + throw new NotSupportedException($"SIMD version of {nameof(CosineDistance)} is not supported"); + } + + if (u.Length != v.Length) + { + throw new ArgumentException("Vectors have non-matching dimensions"); + } + + float dot = 0; + var norm = default(Vector2); + int step = Vector.Count; + + int i, to = u.Length - step; + for (i = 0; i <= to; i += step) + { + var ui = new Vector(u, i); + var vi = new Vector(v, i); + dot += Vector.Dot(ui, vi); + norm.X += Vector.Dot(ui, ui); + norm.Y += Vector.Dot(vi, vi); + } + + for (; i < u.Length; ++i) + { + dot += u[i] * v[i]; + norm.X += u[i] * u[i]; + norm.Y += v[i] * v[i]; + } + + norm = Vector2.SquareRoot(norm); + float n = (norm.X * norm.Y); + + if (n == 0) + { + return 1f; + } + + var similarity = dot / n; + return 1f - similarity; + } + + /// + /// Calculates cosine distance with assumption that u and v are unit vectors using SIMD instructions. + /// + /// Left vector. + /// Right vector. + /// Cosine distance between u and v. + public static float SIMDForUnits(float[] u, float[] v) + { + return 1f - DotProduct(ref u, ref v); + } + + private static readonly int _vs1 = Vector.Count; + private static readonly int _vs2 = 2 * Vector.Count; + private static readonly int _vs3 = 3 * Vector.Count; + private static readonly int _vs4 = 4 * Vector.Count; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static float DotProduct(ref float[] lhs, ref float[] rhs) + { + float result = 0f; + + var count = lhs.Length; + var offset = 0; + + while (count >= _vs4) + { + result += Vector.Dot(new Vector(lhs, offset), new Vector(rhs, offset)); + result += Vector.Dot(new Vector(lhs, offset + _vs1), new Vector(rhs, offset + _vs1)); + result += Vector.Dot(new Vector(lhs, offset + _vs2), new Vector(rhs, offset + _vs2)); + result += Vector.Dot(new Vector(lhs, offset + _vs3), new Vector(rhs, offset + _vs3)); + if (count == _vs4) return result; + count -= _vs4; + offset += _vs4; + } + + if (count >= _vs2) + { + result += Vector.Dot(new Vector(lhs, offset), new Vector(rhs, offset)); + result += Vector.Dot(new Vector(lhs, offset + _vs1), new Vector(rhs, offset + _vs1)); + if (count == _vs2) return result; + count -= _vs2; + offset += _vs2; + } + if (count >= _vs1) + { + result += Vector.Dot(new Vector(lhs, offset), new Vector(rhs, offset)); + if (count == _vs1) return result; + count -= _vs1; + offset += _vs1; + } + if (count > 0) + { + while (count > 0) + { + result += lhs[offset] * rhs[offset]; + offset++; count--; + } + } + return result; + } + } +} diff --git a/ZeroLevel.HNSW/Services/FastRandom.cs b/ZeroLevel.HNSW/Services/FastRandom.cs new file mode 100644 index 0000000..74ab8d5 --- /dev/null +++ b/ZeroLevel.HNSW/Services/FastRandom.cs @@ -0,0 +1,507 @@ +using System; +using System.Runtime.CompilerServices; + +namespace ZeroLevel.HNSW +{ + public sealed class DefaultRandomGenerator + { + /// + /// This is the default configuration (it supports the optimization process to be executed on multiple threads) + /// + public static DefaultRandomGenerator Instance { get; } = new DefaultRandomGenerator(allowParallel: true); + + /// + /// This uses the same random number generator but forces the optimization process to run on a single thread (which may be desirable if multiple requests may be processed concurrently + /// or if it is otherwise not desirable to let a single request access all of the CPUs) + /// + public static DefaultRandomGenerator DisableThreading { get; } = new DefaultRandomGenerator(allowParallel: false); + + private DefaultRandomGenerator(bool allowParallel) => IsThreadSafe = allowParallel; + + public bool IsThreadSafe { get; } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int Next(int minValue, int maxValue) => ThreadSafeFastRandom.Next(minValue, maxValue); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public float NextFloat() => ThreadSafeFastRandom.NextFloat(); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void NextFloats(Span buffer) => ThreadSafeFastRandom.NextFloats(buffer); + } + + internal static class ThreadSafeFastRandom + { + private static readonly Random _global = new Random(); + + [ThreadStatic] + private static FastRandom _local; + + private static int GetGlobalSeed() + { + int seed; + lock (_global) + { + seed = _global.Next(); + } + return seed; + } + + /// + /// Returns a non-negative random integer. + /// + /// A 32-bit signed integer that is greater than or equal to 0 and less than System.Int32.MaxValue. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Next() + { + var inst = _local; + if (inst == null) + { + int seed; + seed = GetGlobalSeed(); + _local = inst = new FastRandom(seed); + } + return inst.Next(); + } + + /// + /// Returns a non-negative random integer that is less than the specified maximum. + /// + /// The exclusive upper bound of the random number to be generated. maxValue must be greater than or equal to 0. + /// A 32-bit signed integer that is greater than or equal to 0, and less than maxValue; that is, the range of return values ordinarily includes 0 but not maxValue. However, + // if maxValue equals 0, maxValue is returned. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Next(int maxValue) + { + var inst = _local; + if (inst == null) + { + int seed; + seed = GetGlobalSeed(); + _local = inst = new FastRandom(seed); + } + int ans; + do + { + ans = inst.Next(maxValue); + } while (ans == maxValue); + + return ans; + } + + /// + /// Returns a random integer that is within a specified range. + /// + /// The inclusive lower bound of the random number returned. + /// The exclusive upper bound of the random number returned. maxValue must be greater than or equal to minValue. + /// A 32-bit signed integer greater than or equal to minValue and less than maxValue; that is, the range of return values includes minValue but not maxValue. If minValue + // equals maxValue, minValue is returned. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Next(int minValue, int maxValue) + { + var inst = _local; + if (inst == null) + { + int seed; + seed = GetGlobalSeed(); + _local = inst = new FastRandom(seed); + } + return inst.Next(minValue, maxValue); + } + + /// + /// Generates a random float. Values returned are from 0.0 up to but not including 1.0. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float NextFloat() + { + var inst = _local; + if (inst == null) + { + int seed; + seed = GetGlobalSeed(); + _local = inst = new FastRandom(seed); + } + return inst.NextFloat(); + } + + /// + /// Fills the elements of a specified array of bytes with random numbers. + /// + /// An array of bytes to contain random numbers. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void NextFloats(Span buffer) + { + var inst = _local; + if (inst == null) + { + int seed; + seed = GetGlobalSeed(); + _local = inst = new FastRandom(seed); + } + inst.NextFloats(buffer); + } + } + + /// + /// A fast random number generator for .NET, from https://www.codeproject.com/Articles/9187/A-fast-equivalent-for-System-Random + /// Colin Green, January 2005 + /// + /// September 4th 2005 + /// Added NextBytesUnsafe() - commented out by default. + /// Fixed bug in Reinitialise() - y,z and w variables were not being reset. + /// + /// Key points: + /// 1) Based on a simple and fast xor-shift pseudo random number generator (RNG) specified in: + /// Marsaglia, George. (2003). Xorshift RNGs. + /// http://www.jstatsoft.org/v08/i14/xorshift.pdf + /// + /// This particular implementation of xorshift has a period of 2^128-1. See the above paper to see + /// how this can be easily extened if you need a longer period. At the time of writing I could find no + /// information on the period of System.Random for comparison. + /// + /// 2) Faster than System.Random. Up to 8x faster, depending on which methods are called. + /// + /// 3) Direct replacement for System.Random. This class implements all of the methods that System.Random + /// does plus some additional methods. The like named methods are functionally equivalent. + /// + /// 4) Allows fast re-initialisation with a seed, unlike System.Random which accepts a seed at construction + /// time which then executes a relatively expensive initialisation routine. This provides a vast speed improvement + /// if you need to reset the pseudo-random number sequence many times, e.g. if you want to re-generate the same + /// sequence many times. An alternative might be to cache random numbers in an array, but that approach is limited + /// by memory capacity and the fact that you may also want a large number of different sequences cached. Each sequence + /// can each be represented by a single seed value (int) when using FastRandom. + /// + /// Notes. + /// A further performance improvement can be obtained by declaring local variables as static, thus avoiding + /// re-allocation of variables on each call. However care should be taken if multiple instances of + /// FastRandom are in use or if being used in a multi-threaded environment. + /// + /// + internal class FastRandom + { + // The +1 ensures NextDouble doesn't generate 1.0 + const float FLOAT_UNIT_INT = 1.0f / ((float)int.MaxValue + 1.0f); + + const double REAL_UNIT_INT = 1.0 / ((double)int.MaxValue + 1.0); + const double REAL_UNIT_UINT = 1.0 / ((double)uint.MaxValue + 1.0); + const uint Y = 842502087, Z = 3579807591, W = 273326509; + + uint x, y, z, w; + + /// + /// Initialises a new instance using time dependent seed. + /// + public FastRandom() + { + // Initialise using the system tick count. + Reinitialise(Environment.TickCount); + } + + /// + /// Initialises a new instance using an int value as seed. + /// This constructor signature is provided to maintain compatibility with + /// System.Random + /// + public FastRandom(int seed) + { + Reinitialise(seed); + } + + /// + /// Reinitialises using an int value as a seed. + /// + public void Reinitialise(int seed) + { + // The only stipulation stated for the xorshift RNG is that at least one of + // the seeds x,y,z,w is non-zero. We fulfill that requirement by only allowing + // resetting of the x seed + x = (uint)seed; + y = Y; + z = Z; + w = W; + } + + /// + /// Generates a random int over the range 0 to int.MaxValue-1. + /// MaxValue is not generated in order to remain functionally equivalent to System.Random.Next(). + /// This does slightly eat into some of the performance gain over System.Random, but not much. + /// For better performance see: + /// + /// Call NextInt() for an int over the range 0 to int.MaxValue. + /// + /// Call NextUInt() and cast the result to an int to generate an int over the full Int32 value range + /// including negative values. + /// + public int Next() + { + uint t = (x ^ (x << 11)); + x = y; y = z; z = w; + w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); + + // Handle the special case where the value int.MaxValue is generated. This is outside of + // the range of permitted values, so we therefore call Next() to try again. + uint rtn = w & 0x7FFFFFFF; + if (rtn == 0x7FFFFFFF) + return Next(); + return (int)rtn; + } + + /// + /// Generates a random int over the range 0 to upperBound-1, and not including upperBound. + /// + public int Next(int upperBound) + { + if (upperBound < 0) + throw new ArgumentOutOfRangeException("upperBound", upperBound, "upperBound must be >=0"); + + uint t = (x ^ (x << 11)); + x = y; y = z; z = w; + + // The explicit int cast before the first multiplication gives better performance. + // See comments in NextDouble. + return (int)((REAL_UNIT_INT * (int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))))) * upperBound); + } + + /// + /// Generates a random int over the range lowerBound to upperBound-1, and not including upperBound. + /// upperBound must be >= lowerBound. lowerBound may be negative. + /// + public int Next(int lowerBound, int upperBound) + { + if (lowerBound > upperBound) + throw new ArgumentOutOfRangeException("upperBound", upperBound, "upperBound must be >=lowerBound"); + + uint t = (x ^ (x << 11)); + x = y; y = z; z = w; + + // The explicit int cast before the first multiplication gives better performance. + // See comments in NextDouble. + int range = upperBound - lowerBound; + if (range < 0) + { // If range is <0 then an overflow has occured and must resort to using long integer arithmetic instead (slower). + // We also must use all 32 bits of precision, instead of the normal 31, which again is slower. + return lowerBound + (int)((REAL_UNIT_UINT * (double)(w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)))) * (double)((long)upperBound - (long)lowerBound)); + } + + // 31 bits of precision will suffice if range<=int.MaxValue. This allows us to cast to an int and gain + // a little more performance. + return lowerBound + (int)((REAL_UNIT_INT * (double)(int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))))) * (double)range); + } + + /// + /// Generates a random double. Values returned are from 0.0 up to but not including 1.0. + /// + public double NextDouble() + { + uint t = (x ^ (x << 11)); + x = y; y = z; z = w; + + // Here we can gain a 2x speed improvement by generating a value that can be cast to + // an int instead of the more easily available uint. If we then explicitly cast to an + // int the compiler will then cast the int to a double to perform the multiplication, + // this final cast is a lot faster than casting from a uint to a double. The extra cast + // to an int is very fast (the allocated bits remain the same) and so the overall effect + // of the extra cast is a significant performance improvement. + // + // Also note that the loss of one bit of precision is equivalent to what occurs within + // System.Random. + return (REAL_UNIT_INT * (int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))))); + } + + /// + /// Generates a random double. Values returned are from 0.0 up to but not including 1.0. + /// + public float NextFloat() + { + uint x = this.x, y = this.y, z = this.z, w = this.w; + uint t = (x ^ (x << 11)); + x = y; y = z; z = w; + w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); + var value = FLOAT_UNIT_INT * (int)(0x7FFFFFFF & w); + this.x = x; this.y = y; this.z = z; this.w = w; + return value; + } + + /// + /// Fills the provided byte array with random floats. + /// + public void NextFloats(Span buffer) + { + uint x = this.x, y = this.y, z = this.z, w = this.w; + int i = 0; + uint t; + for (int bound = buffer.Length; i < bound;) + { + t = (x ^ (x << 11)); + x = y; y = z; z = w; + w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); + + buffer[i++] = FLOAT_UNIT_INT * (int)(0x7FFFFFFF & w); + } + + this.x = x; this.y = y; this.z = z; this.w = w; + } + + + /// + /// Fills the provided byte array with random bytes. + /// This method is functionally equivalent to System.Random.NextBytes(). + /// + public void NextBytes(byte[] buffer) + { + // Fill up the bulk of the buffer in chunks of 4 bytes at a time. + uint x = this.x, y = this.y, z = this.z, w = this.w; + int i = 0; + uint t; + for (int bound = buffer.Length - 3; i < bound;) + { + // Generate 4 bytes. + // Increased performance is achieved by generating 4 random bytes per loop. + // Also note that no mask needs to be applied to zero out the higher order bytes before + // casting because the cast ignores thos bytes. Thanks to Stefan Troschütz for pointing this out. + t = (x ^ (x << 11)); + x = y; y = z; z = w; + w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); + + buffer[i++] = (byte)w; + buffer[i++] = (byte)(w >> 8); + buffer[i++] = (byte)(w >> 16); + buffer[i++] = (byte)(w >> 24); + } + + // Fill up any remaining bytes in the buffer. + if (i < buffer.Length) + { + // Generate 4 bytes. + t = (x ^ (x << 11)); + x = y; y = z; z = w; + w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); + + buffer[i++] = (byte)w; + if (i < buffer.Length) + { + buffer[i++] = (byte)(w >> 8); + if (i < buffer.Length) + { + buffer[i++] = (byte)(w >> 16); + if (i < buffer.Length) + { + buffer[i] = (byte)(w >> 24); + } + } + } + } + this.x = x; this.y = y; this.z = z; this.w = w; + } + + /// + /// Fills the provided byte array with random bytes. + /// This method is functionally equivalent to System.Random.NextBytes(). + /// + public void NextBytes(Span buffer) + { + // Fill up the bulk of the buffer in chunks of 4 bytes at a time. + uint x = this.x, y = this.y, z = this.z, w = this.w; + int i = 0; + uint t; + for (int bound = buffer.Length - 3; i < bound;) + { + // Generate 4 bytes. + // Increased performance is achieved by generating 4 random bytes per loop. + // Also note that no mask needs to be applied to zero out the higher order bytes before + // casting because the cast ignores thos bytes. Thanks to Stefan Troschütz for pointing this out. + t = (x ^ (x << 11)); + x = y; y = z; z = w; + w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); + + buffer[i++] = (byte)w; + buffer[i++] = (byte)(w >> 8); + buffer[i++] = (byte)(w >> 16); + buffer[i++] = (byte)(w >> 24); + } + + // Fill up any remaining bytes in the buffer. + if (i < buffer.Length) + { + // Generate 4 bytes. + t = (x ^ (x << 11)); + x = y; y = z; z = w; + w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); + + buffer[i++] = (byte)w; + if (i < buffer.Length) + { + buffer[i++] = (byte)(w >> 8); + if (i < buffer.Length) + { + buffer[i++] = (byte)(w >> 16); + if (i < buffer.Length) + { + buffer[i] = (byte)(w >> 24); + } + } + } + } + this.x = x; this.y = y; this.z = z; this.w = w; + } + + /// + /// Generates a uint. Values returned are over the full range of a uint, + /// uint.MinValue to uint.MaxValue, inclusive. + /// + /// This is the fastest method for generating a single random number because the underlying + /// random number generator algorithm generates 32 random bits that can be cast directly to + /// a uint. + /// + public uint NextUInt() + { + uint t = (x ^ (x << 11)); + x = y; y = z; z = w; + return (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))); + } + + /// + /// Generates a random int over the range 0 to int.MaxValue, inclusive. + /// This method differs from Next() only in that the range is 0 to int.MaxValue + /// and not 0 to int.MaxValue-1. + /// + /// The slight difference in range means this method is slightly faster than Next() + /// but is not functionally equivalent to System.Random.Next(). + /// + public int NextInt() + { + uint t = (x ^ (x << 11)); + x = y; y = z; z = w; + return (int)(0x7FFFFFFF & (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)))); + } + + + // Buffer 32 bits in bitBuffer, return 1 at a time, keep track of how many have been returned + // with bitBufferIdx. + uint bitBuffer; + uint bitMask = 1; + + /// + /// Generates a single random bit. + /// This method's performance is improved by generating 32 bits in one operation and storing them + /// ready for future calls. + /// + public bool NextBool() + { + if (bitMask == 1) + { + // Generate 32 more bits. + uint t = (x ^ (x << 11)); + x = y; y = z; z = w; + bitBuffer = w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); + + // Reset the bitMask that tells us which bit to read next. + bitMask = 0x80000000; + return (bitBuffer & bitMask) == 0; + } + + return (bitBuffer & (bitMask >>= 1)) == 0; + } + } +} diff --git a/ZeroLevel.HNSW/Services/VectorSet.cs b/ZeroLevel.HNSW/Services/VectorSet.cs new file mode 100644 index 0000000..fd5d38f --- /dev/null +++ b/ZeroLevel.HNSW/Services/VectorSet.cs @@ -0,0 +1,31 @@ +using System.Collections.Generic; +using System.Threading; + +namespace ZeroLevel.HNSW +{ + public class VectorSet + { + public IList _set = new List(); + + public T this[int index] => _set[index]; + + SpinLock _lock = new SpinLock(); + + public int Append(T vector) + { + bool gotLock = false; + gotLock = false; + try + { + _lock.Enter(ref gotLock); + _set.Add(vector); + return _set.Count - 1; + } + finally + { + // Only give up the lock if you actually acquired it + if (gotLock) _lock.Exit(); + } + } + } +} diff --git a/ZeroLevel.HNSW/Services/VectorUtils.cs b/ZeroLevel.HNSW/Services/VectorUtils.cs new file mode 100644 index 0000000..c4a72eb --- /dev/null +++ b/ZeroLevel.HNSW/Services/VectorUtils.cs @@ -0,0 +1,78 @@ +using System; +using System.Collections.Generic; +using System.Numerics; + +namespace ZeroLevel.HNSW +{ + public static class VectorUtils + { + public static float Magnitude(IList vector) + { + float magnitude = 0.0f; + for (int i = 0; i < vector.Count; ++i) + { + magnitude += vector[i] * vector[i]; + } + + return (float)Math.Sqrt(magnitude); + } + + public static void Normalize(IList vector) + { + float normFactor = 1 / Magnitude(vector); + for (int i = 0; i < vector.Count; ++i) + { + vector[i] *= normFactor; + } + } + + public static float MagnitudeSIMD(float[] vector) + { + if (!Vector.IsHardwareAccelerated) + { + throw new NotSupportedException($"{nameof(VectorUtils.NormalizeSIMD)} is not supported"); + } + + float magnitude = 0.0f; + int step = Vector.Count; + + int i, to = vector.Length - step; + for (i = 0; i <= to; i += Vector.Count) + { + var vi = new Vector(vector, i); + magnitude += Vector.Dot(vi, vi); + } + + for (; i < vector.Length; ++i) + { + magnitude += vector[i] * vector[i]; + } + + return (float)Math.Sqrt(magnitude); + } + + public static void NormalizeSIMD(float[] vector) + { + if (!Vector.IsHardwareAccelerated) + { + throw new NotSupportedException($"{nameof(VectorUtils.NormalizeSIMD)} is not supported"); + } + + float normFactor = 1f / MagnitudeSIMD(vector); + int step = Vector.Count; + + int i, to = vector.Length - step; + for (i = 0; i <= to; i += step) + { + var vi = new Vector(vector, i); + vi = Vector.Multiply(normFactor, vi); + vi.CopyTo(vector, i); + } + + for (; i < vector.Length; ++i) + { + vector[i] *= normFactor; + } + } + } +} diff --git a/ZeroLevel.HNSW/Services/VisitedBitSet.cs b/ZeroLevel.HNSW/Services/VisitedBitSet.cs new file mode 100644 index 0000000..16de598 --- /dev/null +++ b/ZeroLevel.HNSW/Services/VisitedBitSet.cs @@ -0,0 +1,32 @@ +using System; + +namespace ZeroLevel.HNSW +{ + internal class VisitedBitSet + { + // bit map + private int[] Buffer; + + internal VisitedBitSet(int nodesCount, int M) + { + Buffer = new int[(nodesCount >> 5) + M + 1]; + } + + internal bool Contains(int nodeId) + { + int carrier = Buffer[nodeId >> 5]; + return ((1 << (nodeId & 31)) & carrier) != 0; + } + + internal void Add(int nodeId) + { + int mask = 1 << (nodeId & 31); + Buffer[nodeId >> 5] |= mask; + } + + internal void Clear() + { + Array.Clear(Buffer, 0, Buffer.Length); + } + } +} diff --git a/ZeroLevel.HNSW/SmallWorld.cs b/ZeroLevel.HNSW/SmallWorld.cs new file mode 100644 index 0000000..d177a69 --- /dev/null +++ b/ZeroLevel.HNSW/SmallWorld.cs @@ -0,0 +1,124 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace ZeroLevel.HNSW +{ + public class SmallWorld + { + private readonly NSWOptions _options; + private readonly VectorSet _vectors; + private readonly Layer[] _layers; + + private Layer EnterPointsLayer => _layers[_layers.Length - 1]; + private Layer LastLayer => _layers[0]; + + public SmallWorld(NSWOptions options) + { + _options = options; + _vectors = new VectorSet(); + _layers = new Layer[_options.LayersCount]; + for (int i = 0; i < _options.LayersCount; i++) + { + _layers[i] = new Layer(_options, _vectors); + } + } + + public IEnumerable<(int, TItem[])> Search(TItem vector, int k, HashSet activeNodes = null) + { + return Enumerable.Empty<(int, TItem[])>(); + } + + public int[] AddItems(IEnumerable vectors) + { + var insert = vectors.ToArray(); + var ids = new int[insert.Length]; + for (int i = 0; i < insert.Length; i++) + { + var item = insert[i]; + ids[i] = Insert(item); + } + return ids; + } + + public int Insert(TItem item) + { + var id = _vectors.Append(item); + INSERT(id); + return id; + } + + #region https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf + /// + /// Algorithm 1 + /// + /// new element + public void INSERT(int q) + { + // W ← ∅ // list for the currently found nearest elements + IDictionary W; + // ep ← get enter point for hnsw + var ep = EnterPointsLayer.GetEntryPointFor(q); + // L ← level of ep // top layer for hnsw + var L = _layers.Length - 1; + // l ← ⌊-ln(unif(0..1))∙mL⌋ // new element’s level + int l = DefaultRandomGenerator.Instance.Next(0, _options.LayersCount - 1); + // for lc ← L … l+1 + for (int lc = L; lc > l; lc--) + { + // W ← SEARCH-LAYER(q, ep, ef = 1, lc) + W = _layers[lc].SEARCH_LAYER(q, ep, 1); + // ep ← get the nearest element from W to q + ep = W.OrderBy(p => p.Value).First().Key; + } + //for lc ← min(L, l) … 0 + for (int lc = Math.Min(L, l); lc >= 0; lc--) + { + // W ← SEARCH - LAYER(q, ep, efConstruction, lc) + W = _layers[lc].SEARCH_LAYER(q, ep, _options.EFConstruction); + // neighbors ← SELECT-NEIGHBORS(q, W, M, lc) // alg. 3 or alg. 4 + var neighbors = _layers[lc].SELECT_NEIGHBORS_SIMPLE(q, W); + // add bidirectionall connectionts from neighbors to q at layer lc + // for each e ∈ neighbors // shrink connections if needed + foreach (var e in neighbors) + { + // eConn ← neighbourhood(e) at layer lc + _layers[lc].AddBidirectionallConnectionts(q, e.Key, e.Value); + } + // ep ← W + ep = W.OrderBy(p => p.Value).First().Key; + } + // if l > L + // set enter point for hnsw to q + } + + /// + /// Algorithm 5 + /// + /// query element + /// number of nearest neighbors to return + /// : K nearest elements to q + public IList K_NN_SEARCH(int q, int K) + { + // W ← ∅ // set for the current nearest elements + IDictionary W; + // ep ← get enter point for hnsw + var ep = EnterPointsLayer.GetEntryPointFor(q); + // L ← level of ep // top layer for hnsw + var L = _options.LayersCount - 1; + // for lc ← L … 1 + for (var lc = L; lc > 0; lc--) + { + // W ← SEARCH-LAYER(q, ep, ef = 1, lc) + W = _layers[lc].SEARCH_LAYER(q, ep, 1); + // ep ← get nearest element from W to q + ep = W.OrderBy(p => p.Value).First().Key; + } + // W ← SEARCH-LAYER(q, ep, ef, lc =0) + W = LastLayer.SEARCH_LAYER(q, ep, _options.EF); + // return K nearest elements from W to q + return W.OrderBy(p => p.Value).Take(K).Select(p => p.Key).ToList(); + } + #endregion + } +} diff --git a/ZeroLevel.HNSW/ZeroLevel.HNSW.csproj b/ZeroLevel.HNSW/ZeroLevel.HNSW.csproj new file mode 100644 index 0000000..09920fe --- /dev/null +++ b/ZeroLevel.HNSW/ZeroLevel.HNSW.csproj @@ -0,0 +1,11 @@ + + + + net5.0 + + + + + + + diff --git a/ZeroLevel.sln b/ZeroLevel.sln index 6534974..aa957a7 100644 --- a/ZeroLevel.sln +++ b/ZeroLevel.sln @@ -57,7 +57,11 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Client", "ConnectionTest\Cl EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Server", "ConnectionTest\Server\Server.csproj", "{3496A688-0749-48C2-BD60-ABB42A5C17C9}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ZeroLevel.Qdrant", "ZeroLevel.Qdrant\ZeroLevel.Qdrant.csproj", "{7188B89E-96EB-4EFB-AAFB-D0A823031F99}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ZeroLevel.Qdrant", "ZeroLevel.Qdrant\ZeroLevel.Qdrant.csproj", "{7188B89E-96EB-4EFB-AAFB-D0A823031F99}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ZeroLevel.HNSW", "ZeroLevel.HNSW\ZeroLevel.HNSW.csproj", "{1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HNSWDemo", "TestHNSW\HNSWDemo\HNSWDemo.csproj", "{E0E9EC21-B958-4018-AE30-67DB88EFCB90}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -285,6 +289,30 @@ Global {7188B89E-96EB-4EFB-AAFB-D0A823031F99}.Release|x64.Build.0 = Release|x64 {7188B89E-96EB-4EFB-AAFB-D0A823031F99}.Release|x86.ActiveCfg = Release|x86 {7188B89E-96EB-4EFB-AAFB-D0A823031F99}.Release|x86.Build.0 = Release|x86 + {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Debug|x64.ActiveCfg = Debug|Any CPU + {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Debug|x64.Build.0 = Debug|Any CPU + {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Debug|x86.ActiveCfg = Debug|Any CPU + {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Debug|x86.Build.0 = Debug|Any CPU + {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Release|Any CPU.Build.0 = Release|Any CPU + {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Release|x64.ActiveCfg = Release|Any CPU + {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Release|x64.Build.0 = Release|Any CPU + {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Release|x86.ActiveCfg = Release|Any CPU + {1EAC0A2C-B00F-4353-94D3-3BB4DC5C92AE}.Release|x86.Build.0 = Release|Any CPU + {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Debug|x64.ActiveCfg = Debug|Any CPU + {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Debug|x64.Build.0 = Debug|Any CPU + {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Debug|x86.ActiveCfg = Debug|Any CPU + {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Debug|x86.Build.0 = Debug|Any CPU + {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Release|Any CPU.Build.0 = Release|Any CPU + {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Release|x64.ActiveCfg = Release|Any CPU + {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Release|x64.Build.0 = Release|Any CPU + {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Release|x86.ActiveCfg = Release|Any CPU + {E0E9EC21-B958-4018-AE30-67DB88EFCB90}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/ZeroLevel/Services/Extensions/NumberBitsExtensions.cs b/ZeroLevel/Services/Extensions/NumberBitsExtensions.cs new file mode 100644 index 0000000..47876d2 --- /dev/null +++ b/ZeroLevel/Services/Extensions/NumberBitsExtensions.cs @@ -0,0 +1,58 @@ +namespace ZeroLevel +{ + public static class NumberBitsExtensions + { + private const int ONE_I = 1; + private const uint ONE_UI = 1U; + private const long ONE_L = 1L; + private const ulong ONE_UL = 1UL; + + public static ulong SetBit(this ulong k, int position) + { + k |= (ONE_UL << position); + return k; + } + + public static ulong ResetBit(this ulong k, int position) + { + k &= ~(ONE_UL << position); + return k; + } + + public static long SetBit(this long k, int position) + { + k |= (ONE_L << position); + return k; + } + + public static long ResetBit(this long k, int position) + { + k &= ~(ONE_L << position); + return k; + } + + public static int SetBit(this int k, int position) + { + k |= (ONE_I << position); + return k; + } + + public static int ResetBit(this int k, int position) + { + k &= ~(ONE_I << position); + return k; + } + + public static uint SetBit(this uint k, int position) + { + k |= (ONE_UI << position); + return k; + } + + public static uint ResetBit(this uint k, int position) + { + k &= ~(ONE_UI << position); + return k; + } + } +} diff --git a/ZeroLevel/Services/Math/SoftMax.cs b/ZeroLevel/Services/Mathemathics/SoftMax.cs similarity index 92% rename from ZeroLevel/Services/Math/SoftMax.cs rename to ZeroLevel/Services/Mathemathics/SoftMax.cs index ac8d1de..2d5a8f4 100644 --- a/ZeroLevel/Services/Math/SoftMax.cs +++ b/ZeroLevel/Services/Mathemathics/SoftMax.cs @@ -1,6 +1,6 @@ using System; -namespace ZeroLevel.Services.Mathematic +namespace ZeroLevel.Services.Mathemathics { public static class SoftMax { diff --git a/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs b/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs index 8e6ebb4..b6bd01a 100644 --- a/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs +++ b/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs @@ -265,6 +265,7 @@ namespace ZeroLevel.Services.Serialization public void Dispose() { + _stream.Flush(); _stream.Dispose(); } diff --git a/ZeroLevel/ZeroLevel.csproj b/ZeroLevel/ZeroLevel.csproj index 85d2892..72cd91b 100644 --- a/ZeroLevel/ZeroLevel.csproj +++ b/ZeroLevel/ZeroLevel.csproj @@ -59,4 +59,8 @@ + + + +