HNSW. Optimized version

pull/1/head
unknown 3 years ago
parent c37207ad6a
commit 50e2e90251

@ -6,8 +6,6 @@ using System.IO;
using System.Linq;
using ZeroLevel.HNSW;
using ZeroLevel.HNSW.Services;
using ZeroLevel.HNSW.Services.OPT;
using ZeroLevel.Services.Serialization;
namespace HNSWDemo
{
@ -167,7 +165,7 @@ namespace HNSWDemo
static void Main(string[] args)
{
OptAccuracityTest();
AccuracityTest();
Console.WriteLine("Completed");
Console.ReadKey();
}
@ -611,27 +609,17 @@ namespace HNSWDemo
var timewatchesNP = new List<float>();
var timewatchesHNSW = new List<float>();
var totalOptHits = new List<int>();
var timewatchesOptHNSW = new List<float>();
var samples = RandomVectors(dimensionality, count);
var sw = new Stopwatch();
var test = new VectorsDirectCompare(samples, Metrics.L2Euclidean);
var world = new SmallWorld<float[]>(NSWOptions<float[]>.Create(8, 15, 200, 200, Metrics.L2Euclidean, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple));
var opt_world = new OptWorld<float[]>(NSWOptions<float[]>.Create(8, 15, 200, 200, Metrics.L2Euclidean, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple));
var test = new VectorsDirectCompare(samples, CosineDistance.NonOptimized);
var world = new SmallWorld<float[]>(NSWOptions<float[]>.Create(6, 12, 100, 100, CosineDistance.NonOptimized, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple));
sw.Start();
var ids = world.AddItems(samples.ToArray());
sw.Stop();
Console.WriteLine($"Insert {ids.Length} items: {sw.ElapsedMilliseconds} ms");
sw.Restart();
opt_world.AddItems(samples.ToArray());
sw.Stop();
Console.WriteLine($"Insert {ids.Length} items in OPT: {sw.ElapsedMilliseconds} ms");
Console.WriteLine("Start test");
@ -657,142 +645,35 @@ namespace HNSWDemo
}
}
totalHits.Add(hits);
sw.Restart();
result = opt_world.Search(v, K);
sw.Stop();
timewatchesOptHNSW.Add(sw.ElapsedMilliseconds);
hits = 0;
foreach (var r in result)
{
if (gt.ContainsKey(r.Item1))
{
hits++;
}
}
totalOptHits.Add(hits);
}
Console.WriteLine($"MIN Accuracity: {totalHits.Min() * 100 / K}%");
Console.WriteLine($"AVG Accuracity: {totalHits.Average() * 100 / K}%");
Console.WriteLine($"MAX Accuracity: {totalHits.Max() * 100 / K}%");
Console.WriteLine($"MIN Opt Accuracity: {totalOptHits.Min() * 100 / K}%");
Console.WriteLine($"AVG Opt Accuracity: {totalOptHits.Average() * 100 / K}%");
Console.WriteLine($"MAX Opt Accuracity: {totalOptHits.Max() * 100 / K}%");
Console.WriteLine($"MIN HNSW TIME: {timewatchesHNSW.Min()} ms");
Console.WriteLine($"AVG HNSW TIME: {timewatchesHNSW.Average()} ms");
Console.WriteLine($"MAX HNSW TIME: {timewatchesHNSW.Max()} ms");
Console.WriteLine($"MIN Opt HNSW TIME: {timewatchesOptHNSW.Min()} ms");
Console.WriteLine($"AVG Opt HNSW TIME: {timewatchesOptHNSW.Average()} ms");
Console.WriteLine($"MAX Opt HNSW TIME: {timewatchesOptHNSW.Max()} ms");
Console.WriteLine($"MIN NP TIME: {timewatchesNP.Min()} ms");
Console.WriteLine($"AVG NP TIME: {timewatchesNP.Average()} ms");
Console.WriteLine($"MAX NP TIME: {timewatchesNP.Max()} ms");
}
static void OptAccuracityTest()
static void InsertTimeExplosionTest()
{
int K = 200;
var count = 5000;
var testCount = 1000;
var count = 1000;
var iterationCount = 1000;
var dimensionality = 128;
var timewatchesNP = new List<float>();
var totalOptHits = new List<int>();
var timewatchesOptHNSW = new List<float>();
var totalRestoredHits = new List<int>();
var timewatchesRestoredHNSW = new List<float>();
var samples = RandomVectors(dimensionality, count);
var sw = new Stopwatch();
var test = new VectorsDirectCompare(samples, Metrics.L2Euclidean);
var opt_world = new OptWorld<float[]>(NSWOptions<float[]>.Create(8, 16, 200, 200, Metrics.L2Euclidean, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple));
sw.Restart();
var ids = opt_world.AddItems(samples.ToArray());
sw.Stop();
Console.WriteLine($"Insert {ids.Length} items in OPT: {sw.ElapsedMilliseconds} ms");
byte[] dump;
using (var ms = new MemoryStream())
{
opt_world.Serialize(ms);
dump = ms.ToArray();
}
SmallWorld<float[]> compactWorld;
using (var ms = new MemoryStream(dump))
var world = new SmallWorld<float[]>(NSWOptions<float[]>.Create(6, 12, 100, 100, CosineDistance.NonOptimized, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple));
for (int i = 0; i < iterationCount; i++)
{
compactWorld = SmallWorld.CreateWorldFrom<float[]>(NSWOptions<float[]>.Create(8, 16, 200, 200, Metrics.L2Euclidean, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple), ms);
}
Console.WriteLine("Start test");
var test_vectors = RandomVectors(dimensionality, testCount);
foreach (var v in test_vectors)
{
sw.Restart();
var gt = test.KNearest(v, K).ToDictionary(p => p.Item1, p => p.Item2);
sw.Stop();
timewatchesNP.Add(sw.ElapsedMilliseconds);
sw.Restart();
var result = opt_world.Search(v, K).ToArray();
sw.Stop();
timewatchesOptHNSW.Add(sw.ElapsedMilliseconds);
var hits = 0;
foreach (var r in result)
{
if (gt.ContainsKey(r.Item1))
{
hits++;
}
}
totalOptHits.Add(hits);
var samples = RandomVectors(dimensionality, count);
sw.Restart();
result = compactWorld.Search(v, K).ToArray();
var ids = world.AddItems(samples.ToArray());
sw.Stop();
timewatchesRestoredHNSW.Add(sw.ElapsedMilliseconds);
hits = 0;
foreach (var r in result)
{
if (gt.ContainsKey(r.Item1))
{
hits++;
}
}
totalRestoredHits.Add(hits);
Console.WriteLine($"ITERATION: [{i.ToString("D4")}] COUNT: [{ids.Length}] ELAPSEF [{sw.ElapsedMilliseconds} ms]");
}
Console.WriteLine($"MIN Opt Accuracity: {totalOptHits.Min() * 100 / K}%");
Console.WriteLine($"AVG Opt Accuracity: {totalOptHits.Average() * 100 / K}%");
Console.WriteLine($"MAX Opt Accuracity: {totalOptHits.Max() * 100 / K}%");
Console.WriteLine($"MIN Test Accuracity: {totalRestoredHits.Min() * 100 / K}%");
Console.WriteLine($"AVG Test Accuracity: {totalRestoredHits.Average() * 100 / K}%");
Console.WriteLine($"MAX Test Accuracity: {totalRestoredHits.Max() * 100 / K}%");
Console.WriteLine($"MIN Opt HNSW TIME: {timewatchesOptHNSW.Min()} ms");
Console.WriteLine($"AVG Opt HNSW TIME: {timewatchesOptHNSW.Average()} ms");
Console.WriteLine($"MAX Opt HNSW TIME: {timewatchesOptHNSW.Max()} ms");
Console.WriteLine($"MIN Test HNSW TIME: {timewatchesRestoredHNSW.Min()} ms");
Console.WriteLine($"AVG Test HNSW TIME: {timewatchesRestoredHNSW.Average()} ms");
Console.WriteLine($"MAX Test HNSW TIME: {timewatchesRestoredHNSW.Max()} ms");
Console.WriteLine($"MIN NP TIME: {timewatchesNP.Min()} ms");
Console.WriteLine($"AVG NP TIME: {timewatchesNP.Average()} ms");
Console.WriteLine($"MAX NP TIME: {timewatchesNP.Max()} ms");
}
}
}

@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using ZeroLevel.HNSW.Services;
using ZeroLevel.Services.Serialization;
namespace ZeroLevel.HNSW
@ -71,11 +72,21 @@ namespace ZeroLevel.HNSW
_links.Relink(id1, id2, q, qpDistance, _options.Distance(_vectors[id2], _vectors[q]));
}
else
{
if (nearest.Length == 1 && nearest[0].Item1 == nearest[0].Item2)
{
// убираем связи на самих себя
var id1 = nearest[0].Item1;
var id2 = nearest[0].Item2;
_links.Relink(id1, id2, q, qpDistance, _options.Distance(_vectors[id2], _vectors[q]));
}
else
{
// добавляем связь нового узла к найденному
_links.Add(q, p, qpDistance);
}
}
}
/// <summary>
/// Adding a node with a connection to itself
@ -87,7 +98,7 @@ namespace ZeroLevel.HNSW
}
#region Implementation of https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf
internal int FingEntryPointAtLayer(Func<int, float> targetCosts)
internal int FindEntryPointAtLayer(Func<int, float> targetCosts)
{
var set = new HashSet<int>(_links.Items().Select(p => p.Item1));
int minId = -1;
@ -110,7 +121,7 @@ namespace ZeroLevel.HNSW
/// <param name="q">query element</param>
/// <param name="ep">enter points ep</param>
/// <returns>Output: ef closest neighbors to q</returns>
internal void KNearestAtLayer(int entryPointId, Func<int, float> targetCosts, IDictionary<int, float> W, int ef)
internal IEnumerable<(int, float)> KNearestAtLayer(int entryPointId, Func<int, float> targetCosts, IEnumerable<(int, float)> w, int ef)
{
/*
* v ep // set of visited elements
@ -135,22 +146,25 @@ namespace ZeroLevel.HNSW
var v = new VisitedBitSet(_vectors.Count, _options.M);
// v ← ep // set of visited elements
v.Add(entryPointId);
var W = new MaxHeap(ef + 1);
foreach (var i in w) W.Push(i);
var d = targetCosts(entryPointId);
// C ← ep // set of candidates
var C = new Dictionary<int, float>();
C.Add(entryPointId, targetCosts(entryPointId));
var C = new MinHeap(ef);
C.Push((entryPointId, d));
// W ← ep // dynamic list of found nearest neighbors
W.Add(entryPointId, C[entryPointId]);
W.Push((entryPointId, d));
int farthestId;
float farthestDistance;
var popCandidate = new Func<(int, float)>(() => { var pair = C.OrderBy(e => e.Value).First(); C.Remove(pair.Key); return (pair.Key, pair.Value); });
var fartherFromResult = new Func<(int, float)>(() => { var pair = W.OrderByDescending(e => e.Value).First(); return (pair.Key, pair.Value); });
var fartherPopFromResult = new Action(() => { var pair = W.OrderByDescending(e => e.Value).First(); W.Remove(pair.Key); });
// run bfs
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = popCandidate();
var farthestResult = fartherFromResult();
if (toExpand.Item2 > farthestResult.Item2)
var toExpand = C.Pop();
if (W.TryPeek(out _, out farthestDistance) && toExpand.Item2 > farthestDistance)
{
// the closest candidate is farther than farthest result
break;
@ -164,16 +178,17 @@ namespace ZeroLevel.HNSW
if (!v.Contains(neighbourId))
{
// enqueue perspective neighbours to expansion list
farthestResult = fartherFromResult();
W.TryPeek(out farthestId, out farthestDistance);
var neighbourDistance = targetCosts(neighbourId);
if (W.Count < ef || neighbourDistance < farthestResult.Item2)
if (W.Count < ef || (farthestId >= 0 && neighbourDistance < farthestDistance))
{
C.Add(neighbourId, neighbourDistance);
W.Add(neighbourId, neighbourDistance);
C.Push((neighbourId, neighbourDistance));
W.Push((neighbourId, neighbourDistance));
if (W.Count > ef)
{
fartherPopFromResult();
W.Pop();
}
}
v.Add(neighbourId);
@ -182,6 +197,7 @@ namespace ZeroLevel.HNSW
}
C.Clear();
v.Clear();
return W;
}
/// <summary>
@ -190,7 +206,7 @@ namespace ZeroLevel.HNSW
/// <param name="q">query element</param>
/// <param name="ep">enter points ep</param>
/// <returns>Output: ef closest neighbors to q</returns>
internal void KNearestAtLayer(int entryPointId, Func<int, float> targetCosts, IDictionary<int, float> W, int ef, SearchContext context)
internal IEnumerable<(int, float)> KNearestAtLayer(int entryPointId, Func<int, float> targetCosts, IEnumerable<(int, float)> w, int ef, SearchContext context)
{
/*
* v ep // set of visited elements
@ -215,25 +231,28 @@ namespace ZeroLevel.HNSW
var v = new VisitedBitSet(_vectors.Count, _options.M);
// v ← ep // set of visited elements
v.Add(entryPointId);
var W = new MaxHeap(ef + 1);
foreach (var i in w) W.Push(i);
// C ← ep // set of candidates
var C = new Dictionary<int, float>();
C.Add(entryPointId, targetCosts(entryPointId));
var C = new MinHeap(ef);
var d = targetCosts(entryPointId);
C.Push((entryPointId, d));
// W ← ep // dynamic list of found nearest neighbors
if (context.IsActiveNode(entryPointId))
{
W.Add(entryPointId, C[entryPointId]);
W.Push((entryPointId, d));
}
var popCandidate = new Func<(int, float)>(() => { var pair = C.OrderBy(e => e.Value).First(); C.Remove(pair.Key); return (pair.Key, pair.Value); });
var farthestDistance = new Func<float>(() => { var pair = W.OrderByDescending(e => e.Value).First(); return pair.Value; });
var fartherPopFromResult = new Action(() => { var pair = W.OrderByDescending(e => e.Value).First(); W.Remove(pair.Key); });
// run bfs
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = popCandidate();
var toExpand = C.Pop();
if (W.Count > 0)
{
if (toExpand.Item2 > farthestDistance())
if(W.TryPeek(out _, out var dist ))
if (toExpand.Item2 > dist)
{
// the closest candidate is farther than farthest result
break;
@ -251,18 +270,18 @@ namespace ZeroLevel.HNSW
var neighbourDistance = targetCosts(neighbourId);
if (context.IsActiveNode(neighbourId))
{
if (W.Count < ef || (W.Count > 0 && neighbourDistance < farthestDistance()))
if (W.Count < ef || (W.Count > 0 && (W.TryPeek(out _, out var dist) && neighbourDistance < dist)))
{
W.Add(neighbourId, neighbourDistance);
W.Push((neighbourId, neighbourDistance));
if (W.Count > ef)
{
fartherPopFromResult();
W.Pop();
}
}
}
if (W.Count < ef)
{
C.Add(neighbourId, neighbourDistance);
C.Push((neighbourId, neighbourDistance));
}
v.Add(neighbourId);
}
@ -270,6 +289,7 @@ namespace ZeroLevel.HNSW
}
C.Clear();
v.Clear();
return W;
}
/// <summary>
@ -278,7 +298,7 @@ namespace ZeroLevel.HNSW
/// <param name="q">query element</param>
/// <param name="ep">enter points ep</param>
/// <returns>Output: ef closest neighbors to q</returns>
internal void KNearestAtLayer(IDictionary<int, float> W, int ef, SearchContext context)
internal IEnumerable<(int, float)> KNearestAtLayer(IEnumerable<(int, float)> w, int ef, SearchContext context)
{
/*
* v ep // set of visited elements
@ -303,29 +323,28 @@ namespace ZeroLevel.HNSW
// v ← ep // set of visited elements
var v = new VisitedBitSet(_vectors.Count, _options.M);
// C ← ep // set of candidates
var C = new Dictionary<int, float>();
var C = new MinHeap(ef);
foreach (var ep in context.EntryPoints)
{
var neighboursIds = GetNeighbors(ep).ToArray();
for (int i = 0; i < neighboursIds.Length; ++i)
{
C.Add(ep, _links.Distance(ep, neighboursIds[i]));
C.Push((ep, _links.Distance(ep, neighboursIds[i])));
}
v.Add(ep);
}
// W ← ep // dynamic list of found nearest neighbors
var W = new MaxHeap(ef + 1);
foreach (var i in w) W.Push(i);
var popCandidate = new Func<(int, float)>(() => { var pair = C.OrderBy(e => e.Value).First(); C.Remove(pair.Key); return (pair.Key, pair.Value); });
var farthestDistance = new Func<float>(() => { var pair = W.OrderByDescending(e => e.Value).First(); return pair.Value; });
var fartherPopFromResult = new Action(() => { var pair = W.OrderByDescending(e => e.Value).First(); W.Remove(pair.Key); });
// run bfs
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = popCandidate();
var toExpand = C.Pop();
if (W.Count > 0)
{
if (toExpand.Item2 > farthestDistance())
if (W.TryPeek(out _, out var dist) && toExpand.Item2 > dist)
{
// the closest candidate is farther than farthest result
break;
@ -333,12 +352,12 @@ namespace ZeroLevel.HNSW
}
if (context.IsActiveNode(toExpand.Item1))
{
if (W.Count < ef || W.Count == 0 || (W.Count > 0 && toExpand.Item2 < farthestDistance()))
if (W.Count < ef || W.Count == 0 || (W.Count > 0 && (W.TryPeek(out _, out var dist) && toExpand.Item2 < dist)))
{
W.Add(toExpand.Item1, toExpand.Item2);
W.Push((toExpand.Item1, toExpand.Item2));
if (W.Count > ef)
{
fartherPopFromResult();
W.Pop();
}
}
}
@ -347,21 +366,21 @@ namespace ZeroLevel.HNSW
{
while (W.Count > ef)
{
fartherPopFromResult();
W.Pop();
}
return;
return W;
}
else
{
foreach (var c in W)
{
C.Add(c.Key, c.Value);
C.Push((c.Item1, c.Item2));
}
}
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = popCandidate();
var toExpand = C.Pop();
// expand candidate
var neighboursIds = GetNeighbors(toExpand.Item1).ToArray();
for (int i = 0; i < neighboursIds.Length; ++i)
@ -373,18 +392,18 @@ namespace ZeroLevel.HNSW
var neighbourDistance = _links.Distance(toExpand.Item1, neighbourId);
if (context.IsActiveNode(neighbourId))
{
if (W.Count < ef || (W.Count > 0 && neighbourDistance < farthestDistance()))
if (W.Count < ef || (W.Count > 0 && (W.TryPeek(out _, out var dist) && neighbourDistance < dist)))
{
W.Add(neighbourId, neighbourDistance);
W.Push((neighbourId, neighbourDistance));
if (W.Count > ef)
{
fartherPopFromResult();
W.Pop();
}
}
}
if (W.Count < ef)
{
C.Add(neighbourId, neighbourDistance);
C.Push((neighbourId, neighbourDistance));
}
v.Add(neighbourId);
}
@ -392,24 +411,24 @@ namespace ZeroLevel.HNSW
}
C.Clear();
v.Clear();
return W;
}
/// <summary>
/// Algorithm 3
/// </summary>
internal IDictionary<int, float> SELECT_NEIGHBORS_SIMPLE(Func<int, float> distance, IDictionary<int, float> candidates, int M)
internal MaxHeap SELECT_NEIGHBORS_SIMPLE(IEnumerable<(int, float)> w, int M)
{
var W = new MaxHeap(w.Count());
foreach (var i in w) W.Push(i);
var bestN = M;
var W = new Dictionary<int, float>(candidates);
if (W.Count > bestN)
{
var popFarther = new Action(() => { var pair = W.OrderByDescending(e => e.Value).First(); W.Remove(pair.Key); });
while (W.Count > bestN)
{
popFarther();
W.Pop();
}
}
// return M nearest elements from C to q
return W;
}
@ -423,12 +442,13 @@ namespace ZeroLevel.HNSW
/// <param name="extendCandidates">flag indicating whether or not to extend candidate list</param>
/// <param name="keepPrunedConnections">flag indicating whether or not to add discarded elements</param>
/// <returns>Output: M elements selected by the heuristic</returns>
internal IDictionary<int, float> SELECT_NEIGHBORS_HEURISTIC(Func<int, float> distance, IDictionary<int, float> candidates, int M)
internal MaxHeap SELECT_NEIGHBORS_HEURISTIC(Func<int, float> distance, IEnumerable<(int, float)> w, int M)
{
// R ← ∅
var R = new Dictionary<int, float>();
var R = new MaxHeap(_options.EFConstruction);
// W ← C // working queue for the candidates
var W = new Dictionary<int, float>(candidates);
var W = new MaxHeap(_options.EFConstruction + 1);
foreach (var i in w) W.Push(i);
// if extendCandidates // extend candidates by their neighbors
if (_options.ExpandBestSelection)
{
@ -436,7 +456,7 @@ namespace ZeroLevel.HNSW
// for each e ∈ C
foreach (var e in W)
{
var neighbors = GetNeighbors(e.Key);
var neighbors = GetNeighbors(e.Item1);
// for each e_adj ∈ neighbourhood(e) at layer lc
foreach (var e_adj in neighbors)
{
@ -450,37 +470,30 @@ namespace ZeroLevel.HNSW
// W ← W eadj
foreach (var id in extendBuffer)
{
W[id] = distance(id);
W.Push((id, distance(id)));
}
}
// Wd ← ∅ // queue for the discarded candidates
var Wd = new Dictionary<int, float>();
var popCandidate = new Func<(int, float)>(() => { var pair = W.OrderBy(e => e.Value).First(); W.Remove(pair.Key); return (pair.Key, pair.Value); });
var fartherFromResult = new Func<(int, float)>(() => { if (R.Count == 0) return (-1, 0f); var pair = R.OrderByDescending(e => e.Value).First(); return (pair.Key, pair.Value); });
var popNearestDiscarded = new Func<(int, float)>(() => { var pair = Wd.OrderBy(e => e.Value).First(); Wd.Remove(pair.Key); return (pair.Key, pair.Value); });
var Wd = new MinHeap(_options.EFConstruction);
// while │W│ > 0 and │R│< M
while (W.Count > 0 && R.Count < M)
{
// e ← extract nearest element from W to q
var (e, ed) = popCandidate();
var (fe, fd) = fartherFromResult();
var (e, ed) = W.Pop();
var (fe, fd) = R.Pop();
// if e is closer to q compared to any element from R
if (R.Count == 0 ||
ed < fd)
{
// R ← R e
R.Add(e, ed);
R.Push((e, ed));
}
else
{
// Wd ← Wd e
Wd.Add(e, ed);
Wd.Push((e, ed));
}
}
// if keepPrunedConnections // add some of the discarded // connections from Wd
@ -490,8 +503,8 @@ namespace ZeroLevel.HNSW
while (Wd.Count > 0 && R.Count < M)
{
// R ← R extract nearest element from Wd to q
var nearest = popNearestDiscarded();
R[nearest.Item1] = nearest.Item2;
var nearest = Wd.Pop();
R.Push((nearest.Item1, nearest.Item2));
}
}
// return R

@ -1,511 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using ZeroLevel.Services.Serialization;
namespace ZeroLevel.HNSW.Services.OPT
{
/// <summary>
/// NSW graph
/// </summary>
internal sealed class OptLayer<TItem>
: IBinarySerializable
{
private readonly NSWOptions<TItem> _options;
private readonly VectorSet<TItem> _vectors;
private readonly CompactBiDirectionalLinksSet _links;
internal SortedList<long, float> Links => _links.Links;
/// <summary>
/// There are links е the layer
/// </summary>
internal bool HasLinks => (_links.Count > 0);
/// <summary>
/// HNSW layer
/// </summary>
/// <param name="options">HNSW graph options</param>
/// <param name="vectors">General vector set</param>
internal OptLayer(NSWOptions<TItem> options, VectorSet<TItem> vectors)
{
_options = options;
_vectors = vectors;
_links = new CompactBiDirectionalLinksSet();
}
/// <summary>
/// Adding new bidirectional link
/// </summary>
/// <param name="q">New node</param>
/// <param name="p">The node with which the connection will be made</param>
/// <param name="qpDistance"></param>
/// <param name="isMapLayer"></param>
internal void AddBidirectionallConnections(int q, int p, float qpDistance, bool isMapLayer)
{
// поиск в ширину ближайших узлов к найденному
var nearest = _links.FindLinksForId(p).ToArray();
// если у найденного узла максимальное количество связей
// if │eConn│ > Mmax // shrink connections of e
if (nearest.Length >= (isMapLayer ? _options.M * 2 : _options.M))
{
// ищем связь с самой большой дистанцией
float distance = nearest[0].Item3;
int index = 0;
for (int ni = 1; ni < nearest.Length; ni++)
{
// Если осталась ссылка узла на себя, удаляем ее в первую очередь
if (nearest[ni].Item1 == nearest[ni].Item2)
{
index = ni;
break;
}
if (nearest[ni].Item3 > distance)
{
index = ni;
distance = nearest[ni].Item3;
}
}
// делаем перелинковку вставляя новый узел между найденными
var id1 = nearest[index].Item1;
var id2 = nearest[index].Item2;
_links.Relink(id1, id2, q, qpDistance, _options.Distance(_vectors[id2], _vectors[q]));
}
else
{
if (nearest.Length == 1 && nearest[0].Item1 == nearest[0].Item2)
{
// убираем связи на самих себя
var id1 = nearest[0].Item1;
var id2 = nearest[0].Item2;
_links.Relink(id1, id2, q, qpDistance, _options.Distance(_vectors[id2], _vectors[q]));
}
else
{
// добавляем связь нового узла к найденному
_links.Add(q, p, qpDistance);
}
}
}
/// <summary>
/// Adding a node with a connection to itself
/// </summary>
/// <param name="q"></param>
internal void Append(int q)
{
_links.Add(q, q, 0);
}
#region Implementation of https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf
/// <summary>
/// Algorithm 2
/// </summary>
/// <param name="q">query element</param>
/// <param name="ep">enter points ep</param>
/// <returns>Output: ef closest neighbors to q</returns>
internal IEnumerable<(int, float)> KNearestAtLayer(int entryPointId, Func<int, float> targetCosts, IEnumerable<(int, float)> w, int ef)
{
/*
* v ep // set of visited elements
* C ep // set of candidates
* W ep // dynamic list of found nearest neighbors
* while C > 0
* c extract nearest element from C to q
* f get furthest element from W to q
* if distance(c, q) > distance(f, q)
* break // all elements in W are evaluated
* for each e neighbourhood(c) at layer lc // update C and W
* if e v
* v v e
* f get furthest element from W to q
* if distance(e, q) < distance(f, q) or W < ef
* C C e
* W W e
* if W > ef
* remove furthest element from W to q
* return W
*/
var v = new VisitedBitSet(_vectors.Count, _options.M);
// v ← ep // set of visited elements
v.Add(entryPointId);
var W = new MaxHeap(ef + 1);
foreach (var i in w) W.Push(i);
var d = targetCosts(entryPointId);
// C ← ep // set of candidates
var C = new MinHeap(ef);
C.Push((entryPointId, d));
// W ← ep // dynamic list of found nearest neighbors
W.Push((entryPointId, d));
int farthestId;
float farthestDistance;
// run bfs
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = C.Pop();
if (W.TryPeek(out _, out farthestDistance) && toExpand.Item2 > farthestDistance)
{
// the closest candidate is farther than farthest result
break;
}
// expand candidate
var neighboursIds = GetNeighbors(toExpand.Item1).ToArray();
for (int i = 0; i < neighboursIds.Length; ++i)
{
int neighbourId = neighboursIds[i];
if (!v.Contains(neighbourId))
{
// enqueue perspective neighbours to expansion list
W.TryPeek(out farthestId, out farthestDistance);
var neighbourDistance = targetCosts(neighbourId);
if (W.Count < ef || (farthestId >= 0 && neighbourDistance < farthestDistance))
{
C.Push((neighbourId, neighbourDistance));
W.Push((neighbourId, neighbourDistance));
if (W.Count > ef)
{
W.Pop();
}
}
v.Add(neighbourId);
}
}
}
C.Clear();
v.Clear();
return W;
}
/// <summary>
/// Algorithm 2
/// </summary>
/// <param name="q">query element</param>
/// <param name="ep">enter points ep</param>
/// <returns>Output: ef closest neighbors to q</returns>
internal IEnumerable<(int, float)> KNearestAtLayer(int entryPointId, Func<int, float> targetCosts, IEnumerable<(int, float)> w, int ef, SearchContext context)
{
/*
* v ep // set of visited elements
* C ep // set of candidates
* W ep // dynamic list of found nearest neighbors
* while C > 0
* c extract nearest element from C to q
* f get furthest element from W to q
* if distance(c, q) > distance(f, q)
* break // all elements in W are evaluated
* for each e neighbourhood(c) at layer lc // update C and W
* if e v
* v v e
* f get furthest element from W to q
* if distance(e, q) < distance(f, q) or W < ef
* C C e
* W W e
* if W > ef
* remove furthest element from W to q
* return W
*/
var v = new VisitedBitSet(_vectors.Count, _options.M);
// v ← ep // set of visited elements
v.Add(entryPointId);
var W = new MaxHeap(ef + 1);
foreach (var i in w) W.Push(i);
// C ← ep // set of candidates
var C = new MinHeap(ef);
var d = targetCosts(entryPointId);
C.Push((entryPointId, d));
// W ← ep // dynamic list of found nearest neighbors
if (context.IsActiveNode(entryPointId))
{
W.Push((entryPointId, d));
}
// run bfs
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = C.Pop();
if (W.Count > 0)
{
if(W.TryPeek(out _, out var dist ))
if (toExpand.Item2 > dist)
{
// the closest candidate is farther than farthest result
break;
}
}
// expand candidate
var neighboursIds = GetNeighbors(toExpand.Item1).ToArray();
for (int i = 0; i < neighboursIds.Length; ++i)
{
int neighbourId = neighboursIds[i];
if (!v.Contains(neighbourId))
{
// enqueue perspective neighbours to expansion list
var neighbourDistance = targetCosts(neighbourId);
if (context.IsActiveNode(neighbourId))
{
if (W.Count < ef || (W.Count > 0 && (W.TryPeek(out _, out var dist) && neighbourDistance < dist)))
{
W.Push((neighbourId, neighbourDistance));
if (W.Count > ef)
{
W.Pop();
}
}
}
if (W.Count < ef)
{
C.Push((neighbourId, neighbourDistance));
}
v.Add(neighbourId);
}
}
}
C.Clear();
v.Clear();
return W;
}
/// <summary>
/// Algorithm 2, modified for LookAlike
/// </summary>
/// <param name="q">query element</param>
/// <param name="ep">enter points ep</param>
/// <returns>Output: ef closest neighbors to q</returns>
internal IEnumerable<(int, float)> KNearestAtLayer(IEnumerable<(int, float)> w, int ef, SearchContext context)
{
/*
* v ep // set of visited elements
* C ep // set of candidates
* W ep // dynamic list of found nearest neighbors
* while C > 0
* c extract nearest element from C to q
* f get furthest element from W to q
* if distance(c, q) > distance(f, q)
* break // all elements in W are evaluated
* for each e neighbourhood(c) at layer lc // update C and W
* if e v
* v v e
* f get furthest element from W to q
* if distance(e, q) < distance(f, q) or W < ef
* C C e
* W W e
* if W > ef
* remove furthest element from W to q
* return W
*/
// v ← ep // set of visited elements
var v = new VisitedBitSet(_vectors.Count, _options.M);
// C ← ep // set of candidates
var C = new MinHeap(ef);
foreach (var ep in context.EntryPoints)
{
var neighboursIds = GetNeighbors(ep).ToArray();
for (int i = 0; i < neighboursIds.Length; ++i)
{
C.Push((ep, _links.Distance(ep, neighboursIds[i])));
}
v.Add(ep);
}
// W ← ep // dynamic list of found nearest neighbors
var W = new MaxHeap(ef + 1);
foreach (var i in w) W.Push(i);
// run bfs
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = C.Pop();
if (W.Count > 0)
{
if (W.TryPeek(out _, out var dist) && toExpand.Item2 > dist)
{
// the closest candidate is farther than farthest result
break;
}
}
if (context.IsActiveNode(toExpand.Item1))
{
if (W.Count < ef || W.Count == 0 || (W.Count > 0 && (W.TryPeek(out _, out var dist) && toExpand.Item2 < dist)))
{
W.Push((toExpand.Item1, toExpand.Item2));
if (W.Count > ef)
{
W.Pop();
}
}
}
}
if (W.Count > ef)
{
while (W.Count > ef)
{
W.Pop();
}
return W;
}
else
{
foreach (var c in W)
{
C.Push((c.Item1, c.Item2));
}
}
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = C.Pop();
// expand candidate
var neighboursIds = GetNeighbors(toExpand.Item1).ToArray();
for (int i = 0; i < neighboursIds.Length; ++i)
{
int neighbourId = neighboursIds[i];
if (!v.Contains(neighbourId))
{
// enqueue perspective neighbours to expansion list
var neighbourDistance = _links.Distance(toExpand.Item1, neighbourId);
if (context.IsActiveNode(neighbourId))
{
if (W.Count < ef || (W.Count > 0 && (W.TryPeek(out _, out var dist) && neighbourDistance < dist)))
{
W.Push((neighbourId, neighbourDistance));
if (W.Count > ef)
{
W.Pop();
}
}
}
if (W.Count < ef)
{
C.Push((neighbourId, neighbourDistance));
}
v.Add(neighbourId);
}
}
}
C.Clear();
v.Clear();
return W;
}
/// <summary>
/// Algorithm 3
/// </summary>
internal MaxHeap SELECT_NEIGHBORS_SIMPLE(IEnumerable<(int, float)> w, int M)
{
var W = new MaxHeap(w.Count());
foreach (var i in w) W.Push(i);
var bestN = M;
if (W.Count > bestN)
{
while (W.Count > bestN)
{
W.Pop();
}
}
return W;
}
/// <summary>
/// Algorithm 4
/// </summary>
/// <param name="q">base element</param>
/// <param name="C">candidate elements</param>
/// <param name="extendCandidates">flag indicating whether or not to extend candidate list</param>
/// <param name="keepPrunedConnections">flag indicating whether or not to add discarded elements</param>
/// <returns>Output: M elements selected by the heuristic</returns>
internal MaxHeap SELECT_NEIGHBORS_HEURISTIC(Func<int, float> distance, IEnumerable<(int, float)> w, int M)
{
// R ← ∅
var R = new MaxHeap(_options.EFConstruction);
// W ← C // working queue for the candidates
var W = new MaxHeap(_options.EFConstruction + 1);
foreach (var i in w) W.Push(i);
// if extendCandidates // extend candidates by their neighbors
if (_options.ExpandBestSelection)
{
var extendBuffer = new HashSet<int>();
// for each e ∈ C
foreach (var e in W)
{
var neighbors = GetNeighbors(e.Item1);
// for each e_adj ∈ neighbourhood(e) at layer lc
foreach (var e_adj in neighbors)
{
// if eadj ∉ W
if (extendBuffer.Contains(e_adj) == false)
{
extendBuffer.Add(e_adj);
}
}
}
// W ← W eadj
foreach (var id in extendBuffer)
{
W.Push((id, distance(id)));
}
}
// Wd ← ∅ // queue for the discarded candidates
var Wd = new MinHeap(_options.EFConstruction);
// while │W│ > 0 and │R│< M
while (W.Count > 0 && R.Count < M)
{
// e ← extract nearest element from W to q
var (e, ed) = W.Pop();
var (fe, fd) = R.Pop();
// if e is closer to q compared to any element from R
if (R.Count == 0 ||
ed < fd)
{
// R ← R e
R.Push((e, ed));
}
else
{
// Wd ← Wd e
Wd.Push((e, ed));
}
}
// if keepPrunedConnections // add some of the discarded // connections from Wd
if (_options.KeepPrunedConnections)
{
// while │Wd│> 0 and │R│< M
while (Wd.Count > 0 && R.Count < M)
{
// R ← R extract nearest element from Wd to q
var nearest = Wd.Pop();
R.Push((nearest.Item1, nearest.Item2));
}
}
// return R
return R;
}
#endregion
private IEnumerable<int> GetNeighbors(int id) => _links.FindLinksForId(id).Select(d => d.Item2);
public void Serialize(IBinaryWriter writer)
{
_links.Serialize(writer);
}
public void Deserialize(IBinaryReader reader)
{
_links.Deserialize(reader);
}
internal Histogram GetHistogram(HistogramMode mode) => _links.CalculateHistogram(mode);
}
}

@ -1,386 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
using ZeroLevel.Services.Serialization;
namespace ZeroLevel.HNSW.Services.OPT
{
public class OptWorld<TItem>
{
private readonly NSWOptions<TItem> _options;
private VectorSet<TItem> _vectors;
private OptLayer<TItem>[] _layers;
private int EntryPoint = 0;
private int MaxLayer = 0;
private readonly ProbabilityLayerNumberGenerator _layerLevelGenerator;
private ReaderWriterLockSlim _lockGraph = new ReaderWriterLockSlim();
internal SortedList<long, float> GetNSWLinks() => _layers[0].Links;
public OptWorld(NSWOptions<TItem> options)
{
_options = options;
_vectors = new VectorSet<TItem>();
_layers = new OptLayer<TItem>[_options.LayersCount];
_layerLevelGenerator = new ProbabilityLayerNumberGenerator(_options.LayersCount, _options.M);
for (int i = 0; i < _options.LayersCount; i++)
{
_layers[i] = new OptLayer<TItem>(_options, _vectors);
}
}
public OptWorld(NSWOptions<TItem> options, Stream stream)
{
_options = options;
Deserialize(stream);
}
/// <summary>
/// Search in the graph K for vectors closest to a given vector
/// </summary>
/// <param name="vector">Given vector</param>
/// <param name="k">Count of elements for search</param>
/// <param name="activeNodes"></param>
/// <returns></returns>
public IEnumerable<(int, TItem, float)> Search(TItem vector, int k)
{
foreach (var pair in KNearest(vector, k))
{
yield return (pair.Item1, _vectors[pair.Item1], pair.Item2);
}
}
public IEnumerable<(int, TItem, float)> Search(TItem vector, int k, SearchContext context)
{
if (context == null)
{
foreach (var pair in KNearest(vector, k))
{
yield return (pair.Item1, _vectors[pair.Item1], pair.Item2);
}
}
else
{
foreach (var pair in KNearest(vector, k, context))
{
yield return (pair.Item1, _vectors[pair.Item1], pair.Item2);
}
}
}
public IEnumerable<(int, TItem, float)> Search(int k, SearchContext context)
{
if (context == null)
{
throw new ArgumentNullException(nameof(context));
}
else
{
foreach (var pair in KNearest(k, context))
{
yield return (pair.Item1, _vectors[pair.Item1], pair.Item2);
}
}
}
/// <summary>
/// Adding vectors batch
/// </summary>
/// <param name="vectors">Vectors</param>
/// <returns>Vector identifiers in a graph</returns>
public int[] AddItems(IEnumerable<TItem> vectors)
{
_lockGraph.EnterWriteLock();
try
{
var ids = _vectors.Append(vectors);
for (int i = 0; i < ids.Length; i++)
{
INSERT(ids[i]);
}
return ids;
}
finally
{
_lockGraph.ExitWriteLock();
}
}
#region https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf
/// <summary>
/// Algorithm 1
/// </summary>
private void INSERT(int q)
{
var distance = new Func<int, float>(candidate => _options.Distance(_vectors[q], _vectors[candidate]));
// W ← ∅ // list for the currently found nearest elements
var W = new MinHeap();
// ep ← get enter point for hnsw
//var ep = _layers[MaxLayer].FingEntryPointAtLayer(distance);
//if(ep == -1) ep = EntryPoint;
var ep = EntryPoint;
var epDist = distance(ep);
// L ← level of ep // top layer for hnsw
var L = MaxLayer;
// l ← ⌊-ln(unif(0..1))∙mL⌋ // new elements level
int l = _layerLevelGenerator.GetRandomLayer();
// for lc ← L … l+1
// Проход с верхнего уровня до уровня где появляется элемент, для нахождения точки входа
int id;
float value;
for (int lc = L; lc > l; --lc)
{
// W ← SEARCH-LAYER(q, ep, ef = 1, lc)
foreach (var i in _layers[lc].KNearestAtLayer(ep, distance, W, 1))
{
W.Push(i);
}
// ep ← get the nearest element from W to q
if (W.TryPeek(out id, out value))
{
ep = id;
epDist = value;
}
W.Clear();
}
//for lc ← min(L, l) … 0
// connecting new node to the small world
for (int lc = Math.Min(L, l); lc >= 0; --lc)
{
if (_layers[lc].HasLinks == false)
{
_layers[lc].Append(q);
}
else
{
// W ← SEARCH - LAYER(q, ep, efConstruction, lc)
foreach (var i in _layers[lc].KNearestAtLayer(ep, distance, W, _options.EFConstruction))
{
W.Push(i);
}
// ep ← W
if (W.TryPeek(out id, out value))
{
ep = id;
epDist = value;
}
// neighbors ← SELECT-NEIGHBORS(q, W, M, lc) // alg. 3 or alg. 4
var neighbors = SelectBestForConnecting(lc, distance, W);
// add bidirectionall connectionts from neighbors to q at layer lc
// for each e ∈ neighbors // shrink connections if needed
foreach (var e in neighbors)
{
// eConn ← neighbourhood(e) at layer lc
_layers[lc].AddBidirectionallConnections(q, e.Item1, e.Item2, lc == 0);
// if distance from newNode to newNeighbour is better than to bestPeer => update bestPeer
if (e.Item2 < epDist)
{
ep = e.Item1;
epDist = e.Item2;
}
}
W.Clear();
}
}
// if l > L
if (l > L)
{
// set enter point for hnsw to q
L = l;
MaxLayer = l;
EntryPoint = ep;
}
}
/// <summary>
/// Get maximum allowed connections for the given level.
/// </summary>
/// <remarks>
/// Article: Section 4.1:
/// "Selection of the Mmax0 (the maximum number of connections that an element can have in the zero layer) also
/// has a strong influence on the search performance, especially in case of high quality(high recall) search.
/// Simulations show that setting Mmax0 to M(this corresponds to kNN graphs on each layer if the neighbors
/// selection heuristic is not used) leads to a very strong performance penalty at high recall.
/// Simulations also suggest that 2∙M is a good choice for Mmax0;
/// setting the parameter higher leads to performance degradation and excessive memory usage."
/// </remarks>
/// <param name="layer">The level of the layer.</param>
/// <returns>The maximum number of connections.</returns>
private int GetM(int layer)
{
return layer == 0 ? 2 * _options.M : _options.M;
}
private IEnumerable<(int, float)> SelectBestForConnecting(int layer, Func<int, float> distance, IEnumerable<(int, float)> candidates)
{
if (_options.SelectionHeuristic == NeighbourSelectionHeuristic.SelectSimple)
return _layers[layer].SELECT_NEIGHBORS_SIMPLE(candidates, GetM(layer));
return _layers[layer].SELECT_NEIGHBORS_HEURISTIC(distance, candidates, GetM(layer));
}
/// <summary>
/// Algorithm 5
/// </summary>
private IEnumerable<(int, float)> KNearest(TItem q, int k)
{
_lockGraph.EnterReadLock();
try
{
if (_vectors.Count == 0)
{
return Enumerable.Empty<(int, float)>();
}
int id;
float value;
var distance = new Func<int, float>(candidate => _options.Distance(q, _vectors[candidate]));
// W ← ∅ // set for the current nearest elements
var W = new MinHeap(k + 1);
// ep ← get enter point for hnsw
var ep = EntryPoint;
// L ← level of ep // top layer for hnsw
var L = MaxLayer;
// for lc ← L … 1
for (int layer = L; layer > 0; --layer)
{
// W ← SEARCH-LAYER(q, ep, ef = 1, lc)
foreach (var i in _layers[layer].KNearestAtLayer(ep, distance, W, 1))
{
W.Push(i);
}
// ep ← get nearest element from W to q
if (W.TryPeek(out id, out value))
{
ep = id;
}
W.Clear();
}
// W ← SEARCH-LAYER(q, ep, ef, lc =0)
foreach (var i in _layers[0].KNearestAtLayer(ep, distance, W, k))
{
W.Push(i);
}
// return K nearest elements from W to q
return W;
}
finally
{
_lockGraph.ExitReadLock();
}
}
private IEnumerable<(int, float)> KNearest(TItem q, int k, SearchContext context)
{
_lockGraph.EnterReadLock();
try
{
if (_vectors.Count == 0)
{
return Enumerable.Empty<(int, float)>();
}
int id;
float value;
var distance = new Func<int, float>(candidate => _options.Distance(q, _vectors[candidate]));
// W ← ∅ // set for the current nearest elements
var W = new MinHeap(k + 1);
// ep ← get enter point for hnsw
var ep = EntryPoint;
// L ← level of ep // top layer for hnsw
var L = MaxLayer;
// for lc ← L … 1
for (int layer = L; layer > 0; --layer)
{
// W ← SEARCH-LAYER(q, ep, ef = 1, lc)
foreach (var i in _layers[layer].KNearestAtLayer(ep, distance, W, 1))
{
W.Push(i);
}
// ep ← get nearest element from W to q
if (W.TryPeek(out id, out value))
{
ep = id;
}
W.Clear();
}
// W ← SEARCH-LAYER(q, ep, ef, lc =0)
foreach (var i in _layers[0].KNearestAtLayer(ep, distance, W, k, context))
{
W.Push(i);
}
// return K nearest elements from W to q
return W;
}
finally
{
_lockGraph.ExitReadLock();
}
}
private IEnumerable<(int, float)> KNearest(int k, SearchContext context)
{
_lockGraph.EnterReadLock();
try
{
if (_vectors.Count == 0)
{
return Enumerable.Empty<(int, float)>();
}
// W ← ∅ // set for the current nearest elements
var W = new MaxHeap(k + 1);
// W ← SEARCH-LAYER(q, ep, ef, lc =0)
foreach (var i in _layers[0].KNearestAtLayer(W, k, context))
{
W.Push(i);
}
// return K nearest elements from W to q
return W;
}
finally
{
_lockGraph.ExitReadLock();
}
}
#endregion
public void Serialize(Stream stream)
{
using (var writer = new MemoryStreamWriter(stream))
{
writer.WriteInt32(EntryPoint);
writer.WriteInt32(MaxLayer);
_vectors.Serialize(writer);
writer.WriteInt32(_layers.Length);
foreach (var l in _layers)
{
l.Serialize(writer);
}
}
}
public void Deserialize(Stream stream)
{
using (var reader = new MemoryStreamReader(stream))
{
this.EntryPoint = reader.ReadInt32();
this.MaxLayer = reader.ReadInt32();
_vectors = new VectorSet<TItem>();
_vectors.Deserialize(reader);
var countLayers = reader.ReadInt32();
_layers = new OptLayer<TItem>[countLayers];
for (int i = 0; i < countLayers; i++)
{
_layers[i] = new OptLayer<TItem>(_options, _vectors);
_layers[i].Deserialize(reader);
}
}
}
public Histogram GetHistogram(HistogramMode mode = HistogramMode.SQRT)
=> _layers[0].GetHistogram(mode);
}
}

@ -31,7 +31,7 @@ namespace ZeroLevel.HNSW
}
}
internal SmallWorld(NSWOptions<TItem> options, Stream stream)
public SmallWorld(NSWOptions<TItem> options, Stream stream)
{
_options = options;
Deserialize(stream);
@ -116,11 +116,10 @@ namespace ZeroLevel.HNSW
{
var distance = new Func<int, float>(candidate => _options.Distance(_vectors[q], _vectors[candidate]));
// W ← ∅ // list for the currently found nearest elements
IDictionary<int, float> W = new Dictionary<int, float>();
var W = new MinHeap(_options.EFConstruction + 1);
// ep ← get enter point for hnsw
//var ep = _layers[MaxLayer].FingEntryPointAtLayer(distance);
//if(ep == -1) ep = EntryPoint;
var ep = EntryPoint;
var ep = _layers[MaxLayer].FindEntryPointAtLayer(distance);
if(ep == -1) ep = EntryPoint;
var epDist = distance(ep);
// L ← level of ep // top layer for hnsw
var L = MaxLayer;
@ -128,23 +127,23 @@ namespace ZeroLevel.HNSW
int l = _layerLevelGenerator.GetRandomLayer();
// for lc ← L … l+1
// Проход с верхнего уровня до уровня где появляется элемент, для нахождения точки входа
int id;
float value;
for (int lc = L; lc > l; --lc)
{
if (_layers[lc].HasLinks == false)
// W ← SEARCH-LAYER(q, ep, ef = 1, lc)
foreach (var i in _layers[lc].KNearestAtLayer(ep, distance, W, 1))
{
_layers[lc].Append(q);
ep = q;
W.Push(i);
}
else
{
// W ← SEARCH-LAYER(q, ep, ef = 1, lc)
_layers[lc].KNearestAtLayer(ep, distance, W, 1);
// ep ← get the nearest element from W to q
var nearest = W.OrderBy(p => p.Value).First();
ep = nearest.Key;
epDist = nearest.Value;
W.Clear();
if (W.TryPeek(out id, out value))
{
ep = id;
epDist = value;
}
W.Clear();
}
//for lc ← min(L, l) … 0
// connecting new node to the small world
@ -153,12 +152,22 @@ namespace ZeroLevel.HNSW
if (_layers[lc].HasLinks == false)
{
_layers[lc].Append(q);
ep = q;
}
else
{
// W ← SEARCH - LAYER(q, ep, efConstruction, lc)
_layers[lc].KNearestAtLayer(ep, distance, W, _options.EFConstruction);
foreach (var i in _layers[lc].KNearestAtLayer(ep, distance, W, _options.EFConstruction))
{
W.Push(i);
}
// ep ← W
if (W.TryPeek(out id, out value))
{
ep = id;
epDist = value;
}
// neighbors ← SELECT-NEIGHBORS(q, W, M, lc) // alg. 3 or alg. 4
var neighbors = SelectBestForConnecting(lc, distance, W);
// add bidirectionall connectionts from neighbors to q at layer lc
@ -166,16 +175,14 @@ namespace ZeroLevel.HNSW
foreach (var e in neighbors)
{
// eConn ← neighbourhood(e) at layer lc
_layers[lc].AddBidirectionallConnections(q, e.Key, e.Value, lc == 0);
_layers[lc].AddBidirectionallConnections(q, e.Item1, e.Item2, lc == 0);
// if distance from newNode to newNeighbour is better than to bestPeer => update bestPeer
if (e.Value < epDist)
if (e.Item2 < epDist)
{
ep = e.Key;
epDist = e.Value;
ep = e.Item1;
epDist = e.Item2;
}
}
// ep ← W
ep = W.OrderBy(p => p.Value).First().Key;
W.Clear();
}
}
@ -208,10 +215,10 @@ namespace ZeroLevel.HNSW
return layer == 0 ? 2 * _options.M : _options.M;
}
private IDictionary<int, float> SelectBestForConnecting(int layer, Func<int, float> distance, IDictionary<int, float> candidates)
private IEnumerable<(int, float)> SelectBestForConnecting(int layer, Func<int, float> distance, IEnumerable<(int, float)> candidates)
{
if (_options.SelectionHeuristic == NeighbourSelectionHeuristic.SelectSimple)
return _layers[layer].SELECT_NEIGHBORS_SIMPLE(distance, candidates, GetM(layer));
return _layers[layer].SELECT_NEIGHBORS_SIMPLE(candidates, GetM(layer));
return _layers[layer].SELECT_NEIGHBORS_HEURISTIC(distance, candidates, GetM(layer));
}
@ -227,10 +234,13 @@ namespace ZeroLevel.HNSW
{
return Enumerable.Empty<(int, float)>();
}
int id;
float value;
var distance = new Func<int, float>(candidate => _options.Distance(q, _vectors[candidate]));
// W ← ∅ // set for the current nearest elements
var W = new Dictionary<int, float>(k + 1);
var W = new MinHeap(k + 1);
// ep ← get enter point for hnsw
var ep = EntryPoint;
// L ← level of ep // top layer for hnsw
@ -239,21 +249,31 @@ namespace ZeroLevel.HNSW
for (int layer = L; layer > 0; --layer)
{
// W ← SEARCH-LAYER(q, ep, ef = 1, lc)
_layers[layer].KNearestAtLayer(ep, distance, W, 1);
foreach (var i in _layers[layer].KNearestAtLayer(ep, distance, W, 1))
{
W.Push(i);
}
// ep ← get nearest element from W to q
ep = W.OrderBy(p => p.Value).First().Key;
if (W.TryPeek(out id, out value))
{
ep = id;
}
W.Clear();
}
// W ← SEARCH-LAYER(q, ep, ef, lc =0)
_layers[0].KNearestAtLayer(ep, distance, W, k);
foreach (var i in _layers[0].KNearestAtLayer(ep, distance, W, k))
{
W.Push(i);
}
// return K nearest elements from W to q
return W.Select(p => (p.Key, p.Value));
return W;
}
finally
{
_lockGraph.ExitReadLock();
}
}
private IEnumerable<(int, float)> KNearest(TItem q, int k, SearchContext context)
{
_lockGraph.EnterReadLock();
@ -263,10 +283,12 @@ namespace ZeroLevel.HNSW
{
return Enumerable.Empty<(int, float)>();
}
int id;
float value;
var distance = new Func<int, float>(candidate => _options.Distance(q, _vectors[candidate]));
// W ← ∅ // set for the current nearest elements
var W = new Dictionary<int, float>(k + 1);
var W = new MinHeap(k + 1);
// ep ← get enter point for hnsw
var ep = EntryPoint;
// L ← level of ep // top layer for hnsw
@ -275,15 +297,24 @@ namespace ZeroLevel.HNSW
for (int layer = L; layer > 0; --layer)
{
// W ← SEARCH-LAYER(q, ep, ef = 1, lc)
_layers[layer].KNearestAtLayer(ep, distance, W, 1);
foreach (var i in _layers[layer].KNearestAtLayer(ep, distance, W, 1))
{
W.Push(i);
}
// ep ← get nearest element from W to q
ep = W.OrderBy(p => p.Value).First().Key;
if (W.TryPeek(out id, out value))
{
ep = id;
}
W.Clear();
}
// W ← SEARCH-LAYER(q, ep, ef, lc =0)
_layers[0].KNearestAtLayer(ep, distance, W, k, context);
foreach (var i in _layers[0].KNearestAtLayer(ep, distance, W, k, context))
{
W.Push(i);
}
// return K nearest elements from W to q
return W.Select(p => (p.Key, p.Value));
return W;
}
finally
{
@ -300,13 +331,15 @@ namespace ZeroLevel.HNSW
{
return Enumerable.Empty<(int, float)>();
}
var distance = new Func<int, int, float>((id1, id2) => _options.Distance(_vectors[id1], _vectors[id2]));
// W ← ∅ // set for the current nearest elements
var W = new Dictionary<int, float>(k + 1);
var W = new MinHeap(k + 1);
// W ← SEARCH-LAYER(q, ep, ef, lc =0)
_layers[0].KNearestAtLayer(W, k, context);
foreach (var i in _layers[0].KNearestAtLayer(W, k, context))
{
W.Push(i);
}
// return K nearest elements from W to q
return W.Select(p => (p.Key, p.Value));
return W;
}
finally
{

@ -1,166 +1,287 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Threading;
namespace ZeroLevel.Services.Pools
{
/// <summary>
/// Steal from Roslyn
/// https://github.com/dotnet/roslyn/blob/master/src/Dependencies/PooledObjects/ObjectPool%601.cs
/// </summary>
public class ObjectPool<T> where T : class
public enum LoadingMode { Eager, Lazy, LazyExpanding };
public enum AccessMode { FIFO, LIFO, Circular };
public sealed class Pool<T> : IDisposable
{
[DebuggerDisplay("{Value,nq}")]
private struct Element
private bool isDisposed;
private Func<Pool<T>, T> factory;
private LoadingMode loadingMode;
private IItemStore itemStore;
private int size;
private int count;
private Semaphore sync;
public Pool(int size, Func<Pool<T>, T> factory)
: this(size, factory, LoadingMode.Lazy, AccessMode.FIFO)
{
internal T Value;
}
/// <remarks>
/// Not using System.Func{T} because this file is linked into the (debugger) Formatter,
/// which does not have that type (since it compiles against .NET 2.0).
/// </remarks>
public delegate T Factory();
public Pool(int size, Func<Pool<T>, T> factory,
LoadingMode loadingMode, AccessMode accessMode)
{
if (size <= 0)
throw new ArgumentOutOfRangeException("size", size,
"Argument 'size' must be greater than zero.");
if (factory == null)
throw new ArgumentNullException("factory");
// Storage for the pool objects. The first item is stored in a dedicated field because we
// expect to be able to satisfy most requests from it.
private T _firstItem;
this.size = size;
this.factory = factory;
sync = new Semaphore(size, size);
this.loadingMode = loadingMode;
this.itemStore = CreateItemStore(accessMode, size);
if (loadingMode == LoadingMode.Eager)
{
PreloadItems();
}
}
private readonly Element[] _items;
public T Acquire()
{
sync.WaitOne();
switch (loadingMode)
{
case LoadingMode.Eager:
return AcquireEager();
case LoadingMode.Lazy:
return AcquireLazy();
default:
Debug.Assert(loadingMode == LoadingMode.LazyExpanding,
"Unknown LoadingMode encountered in Acquire method.");
return AcquireLazyExpanding();
}
}
// factory is stored for the lifetime of the pool. We will call this only when pool needs to
// expand. compared to "new T()", Func gives more flexibility to implementers and faster
// than "new T()".
private readonly Factory _factory;
public void Release(T item)
{
lock (itemStore)
{
itemStore.Store(item);
}
sync.Release();
}
public int Count => _items?.Length ?? 0;
public void Dispose()
{
if (isDisposed)
{
return;
}
isDisposed = true;
if (typeof(IDisposable).IsAssignableFrom(typeof(T)))
{
lock (itemStore)
{
while (itemStore.Count > 0)
{
IDisposable disposable = (IDisposable)itemStore.Fetch();
disposable.Dispose();
}
}
}
sync.Close();
}
public ObjectPool(Factory factory)
: this(factory, Environment.ProcessorCount * 2)
{ }
#region Acquisition
public ObjectPool(Factory factory, int size)
private T AcquireEager()
{
lock (itemStore)
{
Debug.Assert(size >= 1);
_factory = factory;
_items = new Element[size - 1];
return itemStore.Fetch();
}
}
private T CreateInstance()
private T AcquireLazy()
{
lock (itemStore)
{
if (itemStore.Count > 0)
{
var inst = _factory();
return inst;
return itemStore.Fetch();
}
}
Interlocked.Increment(ref count);
return factory(this);
}
/// <summary>
/// Produces an instance.
/// </summary>
/// <remarks>
/// Search strategy is a simple linear probing which is chosen for it cache-friendliness.
/// Note that Free will try to store recycled objects close to the start thus statistically
/// reducing how far we will typically search.
/// </remarks>
public T Allocate()
private T AcquireLazyExpanding()
{
// PERF: Examine the first element. If that fails, AllocateSlow will look at the remaining elements.
// Note that the initial read is optimistically not synchronized. That is intentional.
// We will interlock only when we have a candidate. in a worst case we may miss some
// recently returned objects. Not a big deal.
T inst = _firstItem;
if (inst == null || inst != Interlocked.CompareExchange(ref _firstItem, null, inst))
bool shouldExpand = false;
if (count < size)
{
inst = AllocateSlow();
int newCount = Interlocked.Increment(ref count);
if (newCount <= size)
{
shouldExpand = true;
}
return inst;
else
{
// Another thread took the last spot - use the store instead
Interlocked.Decrement(ref count);
}
private T AllocateSlow()
}
if (shouldExpand)
{
var items = _items;
for (int i = 0; i < items.Length; i++)
return factory(this);
}
else
{
// Note that the initial read is optimistically not synchronized. That is intentional.
// We will interlock only when we have a candidate. in a worst case we may miss some
// recently returned objects. Not a big deal.
T inst = items[i].Value;
if (inst != null)
lock (itemStore)
{
return itemStore.Fetch();
}
}
}
private void PreloadItems()
{
if (inst == Interlocked.CompareExchange(ref items[i].Value, null, inst))
for (int i = 0; i < size; i++)
{
return inst;
T item = factory(this);
itemStore.Store(item);
}
count = size;
}
#endregion
#region Collection Wrappers
interface IItemStore
{
T Fetch();
void Store(T item);
int Count { get; }
}
return CreateInstance();
private IItemStore CreateItemStore(AccessMode mode, int capacity)
{
switch (mode)
{
case AccessMode.FIFO:
return new QueueStore(capacity);
case AccessMode.LIFO:
return new StackStore(capacity);
default:
Debug.Assert(mode == AccessMode.Circular,
"Invalid AccessMode in CreateItemStore");
return new CircularStore(capacity);
}
}
/// <summary>
/// Returns objects to the pool.
/// </summary>
/// <remarks>
/// Search strategy is a simple linear probing which is chosen for it cache-friendliness.
/// Note that Free will try to store recycled objects close to the start thus statistically
/// reducing how far we will typically search in Allocate.
/// </remarks>
public void Free(T obj)
class QueueStore : Queue<T>, IItemStore
{
if (!Validate(obj))
public QueueStore(int capacity) : base(capacity)
{
return;
}
if (_firstItem == null)
public T Fetch()
{
// Intentionally not using interlocked here.
// In a worst case scenario two objects may be stored into same slot.
// It is very unlikely to happen and will only mean that one of the objects will get collected.
_firstItem = obj;
return Dequeue();
}
else
public void Store(T item)
{
FreeSlow(obj);
Enqueue(item);
}
}
private void FreeSlow(T obj)
class StackStore : Stack<T>, IItemStore
{
var items = _items;
for (int i = 0; i < items.Length; i++)
public StackStore(int capacity) : base(capacity)
{
}
public T Fetch()
{
if (items[i].Value == null)
return Pop();
}
public void Store(T item)
{
// Intentionally not using interlocked here.
// In a worst case scenario two objects may be stored into same slot.
// It is very unlikely to happen and will only mean that one of the objects will get collected.
items[i].Value = obj;
break;
Push(item);
}
}
class CircularStore : IItemStore
{
private List<Slot> slots;
private int freeSlotCount;
private int position = -1;
public CircularStore(int capacity)
{
slots = new List<Slot>(capacity);
}
private bool Validate(object obj)
public T Fetch()
{
if (obj == null) return false;
if (_firstItem == obj) return false;
var items = _items;
for (int i = 0; i < items.Length; i++)
if (Count == 0)
throw new InvalidOperationException("The buffer is empty.");
int startPosition = position;
do
{
var value = items[i].Value;
if (value == null)
Advance();
Slot slot = slots[position];
if (!slot.IsInUse)
{
return true;
slot.IsInUse = true;
--freeSlotCount;
return slot.Item;
}
if (value == obj) return false;
} while (startPosition != position);
throw new InvalidOperationException("No free slots.");
}
return true;
public void Store(T item)
{
Slot slot = slots.Find(s => object.Equals(s.Item, item));
if (slot == null)
{
slot = new Slot(item);
slots.Add(slot);
}
slot.IsInUse = false;
++freeSlotCount;
}
/*
public int Count
{
get { return freeSlotCount; }
}
Alternate
https://stackoverflow.com/questions/1698738/objectpoolt-or-similar-for-net-already-in-a-library
private void Advance()
{
position = (position + 1) % slots.Count;
}
*/
class Slot
{
public Slot(T item)
{
this.Item = item;
}
public T Item { get; private set; }
public bool IsInUse { get; set; }
}
}
#endregion
public bool IsDisposed
{
get { return isDisposed; }
}
}
}

@ -12,10 +12,10 @@ namespace temp2
{
static void Main(string[] args)
{
OptWorld<float[]> world;
SmallWorld<float[]> world;
using (var ms = new FileStream(@"F:\graph_test.bin", FileMode.Open, FileAccess.Read, FileShare.None))
{
world = new OptWorld<float[]>(NSWOptions<float[]>.Create(6, 12, 100, 10, Metrics.L2Euclidean, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple), ms);
world = new SmallWorld<float[]>(NSWOptions<float[]>.Create(6, 12, 100, 10, Metrics.L2Euclidean, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple), ms);
}
var test_vectors = new List<float[]>();
@ -34,7 +34,7 @@ namespace temp2
Console.WriteLine("Completed");
}
static void Forward(OptWorld<float[]> world, List<float[]> test_vectors)
static void Forward(SmallWorld<float[]> world, List<float[]> test_vectors)
{
int K = 10;
foreach (var v in test_vectors)

Loading…
Cancel
Save

Powered by TurnKey Linux.