From 0cef043b26466d19c38a93af6d8bc82e1eba9416 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 10 Dec 2021 13:05:45 +0300 Subject: [PATCH] Append modification for LookAlike --- ZeroLevel.HNSW/Model/SearchContext.cs | 2 + .../Services/CompactBiDirectionalLinksSet.cs | 10 ++ ZeroLevel.HNSW/Services/Layer.cs | 122 ++++++++++++++++++ ZeroLevel.HNSW/SmallWorld.cs | 52 ++++++++ 4 files changed, 186 insertions(+) diff --git a/ZeroLevel.HNSW/Model/SearchContext.cs b/ZeroLevel.HNSW/Model/SearchContext.cs index 13661b4..12b336f 100644 --- a/ZeroLevel.HNSW/Model/SearchContext.cs +++ b/ZeroLevel.HNSW/Model/SearchContext.cs @@ -36,6 +36,8 @@ namespace ZeroLevel.HNSW return nodeId >= 0; } + public IEnumerable EntryPoints => _entryNodes; + public SearchContext SetActiveNodes(IEnumerable activeNodes) { if (activeNodes != null && activeNodes.Any()) diff --git a/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs b/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs index 5c9f81e..dd58909 100644 --- a/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs +++ b/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs @@ -254,6 +254,16 @@ namespace ZeroLevel.HNSW } } + internal float Distance(int id1, int id2) + { + long k = (((long)(id1)) << HALF_LONG_BITS) + id2; + if (_set.ContainsKey(k)) + { + return _set[k]; + } + return float.MaxValue; + } + public void Dispose() { _rwLock.Dispose(); diff --git a/ZeroLevel.HNSW/Services/Layer.cs b/ZeroLevel.HNSW/Services/Layer.cs index 6e5c5ea..713d6ea 100644 --- a/ZeroLevel.HNSW/Services/Layer.cs +++ b/ZeroLevel.HNSW/Services/Layer.cs @@ -248,6 +248,128 @@ namespace ZeroLevel.HNSW v.Clear(); } + /// + /// Algorithm 2, modified for LookAlike + /// + /// query element + /// enter points ep + /// Output: ef closest neighbors to q + internal void KNearestAtLayer(IDictionary W, int ef, SearchContext context) + { + /* + * v ← ep // set of visited elements + * C ← ep // set of candidates + * W ← ep // dynamic list of found nearest neighbors + * while │C│ > 0 + * c ← extract nearest element from C to q + * f ← get furthest element from W to q + * if distance(c, q) > distance(f, q) + * break // all elements in W are evaluated + * for each e ∈ neighbourhood(c) at layer lc // update C and W + * if e ∉ v + * v ← v ⋃ e + * f ← get furthest element from W to q + * if distance(e, q) < distance(f, q) or │W│ < ef + * C ← C ⋃ e + * W ← W ⋃ e + * if │W│ > ef + * remove furthest element from W to q + * return W + */ + // v ← ep // set of visited elements + var v = new VisitedBitSet(_vectors.Count, _options.M); + // C ← ep // set of candidates + var C = new Dictionary(); + foreach (var ep in context.EntryPoints) + { + var neighboursIds = GetNeighbors(ep).ToArray(); + for (int i = 0; i < neighboursIds.Length; ++i) + { + C.Add(ep, _links.Distance(ep, neighboursIds[i])); + } + v.Add(ep); + } + // W ← ep // dynamic list of found nearest neighbors + + var popCandidate = new Func<(int, float)>(() => { var pair = C.OrderBy(e => e.Value).First(); C.Remove(pair.Key); return (pair.Key, pair.Value); }); + var farthestDistance = new Func(() => { var pair = W.OrderByDescending(e => e.Value).First(); return pair.Value; }); + var fartherPopFromResult = new Action(() => { var pair = W.OrderByDescending(e => e.Value).First(); W.Remove(pair.Key); }); + // run bfs + while (C.Count > 0) + { + // get next candidate to check and expand + var toExpand = popCandidate(); + if (W.Count > 0) + { + if (toExpand.Item2 > farthestDistance()) + { + // the closest candidate is farther than farthest result + break; + } + } + if (context.IsActiveNode(toExpand.Item1)) + { + if (W.Count < ef || W.Count == 0 || (W.Count > 0 && toExpand.Item2 < farthestDistance())) + { + W.Add(toExpand.Item1, toExpand.Item2); + if (W.Count > ef) + { + fartherPopFromResult(); + } + } + } + } + if (W.Count > ef) + { + while (W.Count > ef) + { + fartherPopFromResult(); + } + return; + } + else + { + foreach (var c in W) + { + C.Add(c.Key, c.Value); + } + } + while (C.Count > 0) + { + // get next candidate to check and expand + var toExpand = popCandidate(); + // expand candidate + var neighboursIds = GetNeighbors(toExpand.Item1).ToArray(); + for (int i = 0; i < neighboursIds.Length; ++i) + { + int neighbourId = neighboursIds[i]; + if (!v.Contains(neighbourId)) + { + // enqueue perspective neighbours to expansion list + var neighbourDistance = _links.Distance(toExpand.Item1, neighbourId); + if (context.IsActiveNode(neighbourId)) + { + if (W.Count < ef || (W.Count > 0 && neighbourDistance < farthestDistance())) + { + W.Add(neighbourId, neighbourDistance); + if (W.Count > ef) + { + fartherPopFromResult(); + } + } + } + if (W.Count < ef) + { + C.Add(neighbourId, neighbourDistance); + } + v.Add(neighbourId); + } + } + } + C.Clear(); + v.Clear(); + } + /// /// Algorithm 3 /// diff --git a/ZeroLevel.HNSW/SmallWorld.cs b/ZeroLevel.HNSW/SmallWorld.cs index fa57eef..7f66ee1 100644 --- a/ZeroLevel.HNSW/SmallWorld.cs +++ b/ZeroLevel.HNSW/SmallWorld.cs @@ -69,6 +69,21 @@ namespace ZeroLevel.HNSW } } + public IEnumerable<(int, TItem, float)> Search(int k, SearchContext context) + { + if (context == null) + { + throw new ArgumentNullException(nameof(context)); + } + else + { + foreach (var pair in KNearest(k, context)) + { + yield return (pair.Item1, _vectors[pair.Item1], pair.Item2); + } + } + } + /// /// Adding vectors batch /// @@ -272,6 +287,43 @@ namespace ZeroLevel.HNSW _lockGraph.ExitReadLock(); } } + + private IEnumerable<(int, float)> KNearest(int k, SearchContext context) + { + _lockGraph.EnterReadLock(); + try + { + if (_vectors.Count == 0) + { + return Enumerable.Empty<(int, float)>(); + } + var distance = new Func((id1, id2) => _options.Distance(_vectors[id1], _vectors[id2])); + + // W ← ∅ // set for the current nearest elements + var W = new Dictionary(k + 1); + // ep ← get enter point for hnsw + var ep = EntryPoint; + // L ← level of ep // top layer for hnsw + var L = MaxLayer; + // for lc ← L … 1 + for (int layer = L; layer > 0; --layer) + { + // W ← SEARCH-LAYER(q, ep, ef = 1, lc) + _layers[layer].KNearestAtLayer(ep, W, 1, context); + // ep ← get nearest element from W to q + ep = W.OrderBy(p => p.Value).First().Key; + W.Clear(); + } + // W ← SEARCH-LAYER(q, ep, ef, lc =0) + _layers[0].KNearestAtLayer(ep, W, k, context); + // return K nearest elements from W to q + return W.Select(p => (p.Key, p.Value)); + } + finally + { + _lockGraph.ExitReadLock(); + } + } #endregion public void Serialize(Stream stream)