From 1b6c56033ff06bec21b75322448b0ce0981e7810 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 18 Dec 2021 21:31:19 +0300 Subject: [PATCH] HNSW Links trimming --- TestHNSW/HNSWDemo/Program.cs | 10 ++-- .../Services/CompactBiDirectionalLinksSet.cs | 22 +++++++++ ZeroLevel.HNSW/Services/Layer.cs | 48 +++++++++++++++---- ZeroLevel.HNSW/SmallWorld.cs | 7 ++- .../DOM/DSL/Services/TContainerFactory.cs | 10 ++-- .../Network/Extensions/ExchangeExtension.cs | 34 ++++++------- .../Network/FileTransfer/FileSender.cs | 6 +-- ZeroLevel/Services/Semantic/WordTokenizer.cs | 6 +-- temp2/Program.cs | 1 - 9 files changed, 100 insertions(+), 44 deletions(-) diff --git a/TestHNSW/HNSWDemo/Program.cs b/TestHNSW/HNSWDemo/Program.cs index 5078938..7263796 100644 --- a/TestHNSW/HNSWDemo/Program.cs +++ b/TestHNSW/HNSWDemo/Program.cs @@ -165,7 +165,7 @@ namespace HNSWDemo static void Main(string[] args) { - AccuracityTest(); + InsertTimeExplosionTest(); Console.WriteLine("Completed"); Console.ReadKey(); } @@ -661,18 +661,18 @@ namespace HNSWDemo static void InsertTimeExplosionTest() { - var count = 1000; - var iterationCount = 1000; + var count = 20000; + var iterationCount = 100; var dimensionality = 128; var sw = new Stopwatch(); - var world = new SmallWorld(NSWOptions.Create(6, 12, 100, 100, CosineDistance.NonOptimized, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple)); + var world = new SmallWorld(NSWOptions.Create(6, 8, 150, 150, Metrics.L2Euclidean, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple)); for (int i = 0; i < iterationCount; i++) { var samples = RandomVectors(dimensionality, count); sw.Restart(); var ids = world.AddItems(samples.ToArray()); sw.Stop(); - Console.WriteLine($"ITERATION: [{i.ToString("D4")}] COUNT: [{ids.Length}] ELAPSEF [{sw.ElapsedMilliseconds} ms]"); + Console.WriteLine($"ITERATION: [{i.ToString("D4")}] COUNT: [{ids.Length}] ELAPSED [{sw.ElapsedMilliseconds} ms]"); } } } diff --git a/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs b/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs index 72ff287..10b5f42 100644 --- a/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs +++ b/ZeroLevel.HNSW/Services/CompactBiDirectionalLinksSet.cs @@ -184,6 +184,28 @@ namespace ZeroLevel.HNSW } } + internal void RemoveIndex(int id1, int id2) + { + long k1 = (((long)(id1)) << HALF_LONG_BITS) + id2; + long k2 = (((long)(id2)) << HALF_LONG_BITS) + id1; + _rwLock.EnterWriteLock(); + try + { + if (_set.ContainsKey(k1)) + { + _set.Remove(k1); + } + if (_set.ContainsKey(k2)) + { + _set.Remove(k2); + } + } + finally + { + _rwLock.ExitWriteLock(); + } + } + internal bool Add(int id1, int id2, float distance) { _rwLock.EnterWriteLock(); diff --git a/ZeroLevel.HNSW/Services/Layer.cs b/ZeroLevel.HNSW/Services/Layer.cs index 3071721..50d8168 100644 --- a/ZeroLevel.HNSW/Services/Layer.cs +++ b/ZeroLevel.HNSW/Services/Layer.cs @@ -41,7 +41,7 @@ namespace ZeroLevel.HNSW /// The node with which the connection will be made /// /// - internal void AddBidirectionallConnections(int q, int p, float qpDistance, bool isMapLayer) + /*internal void AddBidirectionallConnections(int q, int p, float qpDistance, bool isMapLayer) { // поиск в ширину ближайших узлов к найденному var nearest = _links.FindLinksForId(p).ToArray(); @@ -86,8 +86,40 @@ namespace ZeroLevel.HNSW _links.Add(q, p, qpDistance); } } + }*/ + + internal void AddBidirectionallConnections(int q, int p, float qpDistance) + { + _links.Add(q, p, qpDistance); } + internal void TrimLinks(int q, bool isMapLayer) + { + var M = (isMapLayer ? _options.M * 2 : _options.M); + // поиск в ширину ближайших узлов к найденному + var nearest = _links.FindLinksForId(q).ToArray(); + + if (nearest.Length <= M && nearest.Length > 1) + { + foreach (var l in nearest) + { + if (l.Item1 == l.Item2) + { + _links.RemoveIndex(l.Item1, l.Item2); + } + } + } + else if (nearest.Length > M) + { + var removeCount = nearest.Length - M; + foreach (var l in nearest.OrderByDescending(n => n.Item3).Take(removeCount)) + { + _links.RemoveIndex(l.Item1, l.Item2); + } + } + } + + /// /// Adding a node with a connection to itself /// @@ -251,12 +283,12 @@ namespace ZeroLevel.HNSW var toExpand = C.Pop(); if (W.Count > 0) { - if(W.TryPeek(out _, out var dist )) - if (toExpand.Item2 > dist) - { - // the closest candidate is farther than farthest result - break; - } + if (W.TryPeek(out _, out var dist)) + if (toExpand.Item2 > dist) + { + // the closest candidate is farther than farthest result + break; + } } // expand candidate @@ -270,7 +302,7 @@ namespace ZeroLevel.HNSW var neighbourDistance = targetCosts(neighbourId); if (context.IsActiveNode(neighbourId)) { - if (W.Count < ef || (W.Count > 0 && (W.TryPeek(out _, out var dist) && neighbourDistance < dist))) + if (W.Count < ef || (W.Count > 0 && (W.TryPeek(out _, out var dist) && neighbourDistance < dist))) { W.Push((neighbourId, neighbourDistance)); if (W.Count > ef) diff --git a/ZeroLevel.HNSW/SmallWorld.cs b/ZeroLevel.HNSW/SmallWorld.cs index d70733a..1be1e5f 100644 --- a/ZeroLevel.HNSW/SmallWorld.cs +++ b/ZeroLevel.HNSW/SmallWorld.cs @@ -119,7 +119,7 @@ namespace ZeroLevel.HNSW var W = new MinHeap(_options.EFConstruction + 1); // ep ← get enter point for hnsw var ep = _layers[MaxLayer].FindEntryPointAtLayer(distance); - if(ep == -1) ep = EntryPoint; + if (ep == -1) ep = EntryPoint; var epDist = distance(ep); // L ← level of ep // top layer for hnsw var L = MaxLayer; @@ -143,6 +143,7 @@ namespace ZeroLevel.HNSW ep = id; epDist = value; } + _layers[lc].TrimLinks(q, lc == 0); W.Clear(); } //for lc ← min(L, l) … 0 @@ -175,7 +176,7 @@ namespace ZeroLevel.HNSW foreach (var e in neighbors) { // eConn ← neighbourhood(e) at layer lc - _layers[lc].AddBidirectionallConnections(q, e.Item1, e.Item2, lc == 0); + _layers[lc].AddBidirectionallConnections(q, e.Item1, e.Item2); // if distance from newNode to newNeighbour is better than to bestPeer => update bestPeer if (e.Item2 < epDist) { @@ -183,6 +184,8 @@ namespace ZeroLevel.HNSW epDist = e.Item2; } } + + _layers[lc].TrimLinks(q, lc == 0); W.Clear(); } } diff --git a/ZeroLevel/Services/DOM/DSL/Services/TContainerFactory.cs b/ZeroLevel/Services/DOM/DSL/Services/TContainerFactory.cs index 49414be..1d292e1 100644 --- a/ZeroLevel/Services/DOM/DSL/Services/TContainerFactory.cs +++ b/ZeroLevel/Services/DOM/DSL/Services/TContainerFactory.cs @@ -5,20 +5,20 @@ namespace DOM.DSL.Services { public class TContainerFactory { - private readonly ObjectPool _pool; + private readonly Pool _pool; private static int _get_count = 0; private static int _release_count = 0; internal TContainerFactory(TRender render) { - _pool = new ObjectPool(() => new TContainer(this, render), 64); + _pool = new Pool(64, p => new TContainer(this, render)); } internal TContainer Get(object value) { Interlocked.Increment(ref _get_count); - var c = _pool.Allocate(); + var c = _pool.Acquire(); c.Reset(value); return c; } @@ -26,7 +26,7 @@ namespace DOM.DSL.Services internal TContainer Get(object value, int index) { Interlocked.Increment(ref _get_count); - var c = _pool.Allocate(); + var c = _pool.Acquire(); c.Reset(value); c.Index = index; return c; @@ -37,7 +37,7 @@ namespace DOM.DSL.Services if (container != null) { Interlocked.Increment(ref _release_count); - _pool.Free(container); + _pool.Release(container); } } diff --git a/ZeroLevel/Services/Network/Extensions/ExchangeExtension.cs b/ZeroLevel/Services/Network/Extensions/ExchangeExtension.cs index d6c06ea..9cf1f74 100644 --- a/ZeroLevel/Services/Network/Extensions/ExchangeExtension.cs +++ b/ZeroLevel/Services/Network/Extensions/ExchangeExtension.cs @@ -6,12 +6,12 @@ namespace ZeroLevel.Network { public static class ExchangeExtension { - static ObjectPool _mrePool = new ObjectPool(() => new AutoResetEvent(false), 16); + static Pool _mrePool = new Pool(16, (p) => new AutoResetEvent(false)); public static Tresponse Request(this IClientSet exchange, string alias, TimeSpan timeout) { Tresponse response = default; - var ev = _mrePool.Allocate(); + var ev = _mrePool.Acquire(); try { if (exchange.Request(alias, @@ -22,7 +22,7 @@ namespace ZeroLevel.Network } finally { - _mrePool.Free(ev); + _mrePool.Release(ev); } return response; } @@ -30,7 +30,7 @@ namespace ZeroLevel.Network public static Tresponse Request(this IClientSet exchange, string alias, string inbox, TimeSpan timeout) { Tresponse response = default; - var ev = _mrePool.Allocate(); + var ev = _mrePool.Acquire(); try { if (exchange.Request(alias, inbox, @@ -44,7 +44,7 @@ namespace ZeroLevel.Network } finally { - _mrePool.Free(ev); + _mrePool.Release(ev); } return response; } @@ -52,7 +52,7 @@ namespace ZeroLevel.Network public static Tresponse Request(this IClientSet exchange, string alias, Trequest request, TimeSpan timeout) { Tresponse response = default; - var ev = _mrePool.Allocate(); + var ev = _mrePool.Acquire(); try { if (exchange.Request(alias, request, @@ -63,7 +63,7 @@ namespace ZeroLevel.Network } finally { - _mrePool.Free(ev); + _mrePool.Release(ev); } return response; } @@ -72,7 +72,7 @@ namespace ZeroLevel.Network , Trequest request, TimeSpan timeout) { Tresponse response = default; - var ev = _mrePool.Allocate(); + var ev = _mrePool.Acquire(); try { if (exchange.Request(alias, inbox, request, @@ -83,7 +83,7 @@ namespace ZeroLevel.Network } finally { - _mrePool.Free(ev); + _mrePool.Release(ev); } return response; } @@ -91,7 +91,7 @@ namespace ZeroLevel.Network public static Tresponse Request(this IClientSet exchange, string alias) { Tresponse response = default; - var ev = _mrePool.Allocate(); + var ev = _mrePool.Acquire(); try { if (exchange.Request(alias, @@ -102,7 +102,7 @@ namespace ZeroLevel.Network } finally { - _mrePool.Free(ev); + _mrePool.Release(ev); } return response; } @@ -110,7 +110,7 @@ namespace ZeroLevel.Network public static Tresponse Request(this IClientSet exchange, string alias, string inbox) { Tresponse response = default; - var ev = _mrePool.Allocate(); + var ev = _mrePool.Acquire(); try { if (exchange.Request(alias, inbox, @@ -124,7 +124,7 @@ namespace ZeroLevel.Network } finally { - _mrePool.Free(ev); + _mrePool.Release(ev); } return response; } @@ -132,7 +132,7 @@ namespace ZeroLevel.Network public static Tresponse Request(this IClientSet exchange, string alias, Trequest request) { Tresponse response = default; - var ev = _mrePool.Allocate(); + var ev = _mrePool.Acquire(); try { if (exchange.Request(alias, request, @@ -143,7 +143,7 @@ namespace ZeroLevel.Network } finally { - _mrePool.Free(ev); + _mrePool.Release(ev); } return response; } @@ -152,7 +152,7 @@ namespace ZeroLevel.Network , Trequest request) { Tresponse response = default; - var ev = _mrePool.Allocate(); + var ev = _mrePool.Acquire(); try { if (exchange.Request(alias, inbox, request, @@ -163,7 +163,7 @@ namespace ZeroLevel.Network } finally { - _mrePool.Free(ev); + _mrePool.Release(ev); } return response; } diff --git a/ZeroLevel/Services/Network/FileTransfer/FileSender.cs b/ZeroLevel/Services/Network/FileTransfer/FileSender.cs index f4c0347..51e2d64 100644 --- a/ZeroLevel/Services/Network/FileTransfer/FileSender.cs +++ b/ZeroLevel/Services/Network/FileTransfer/FileSender.cs @@ -10,7 +10,7 @@ namespace ZeroLevel.Network.FileTransfer public sealed class FileSender { private BlockingCollection _tasks = new BlockingCollection(); - private ObjectPool _taskPool = new ObjectPool(() => new FileTransferTask(), 100); + private Pool _taskPool = new Pool(100, (p) => new FileTransferTask()); private readonly Thread _uploadFileThread; private bool _resendWhenServerError = false; private bool _resendWhenClientError = false; @@ -45,7 +45,7 @@ namespace ZeroLevel.Network.FileTransfer { throw new FileNotFoundException(filePath); } - var task = _taskPool.Allocate(); + var task = _taskPool.Acquire(); task.CompletedHandler = completeHandler; task.ErrorHandler = errorHandler; task.FilePath = filePath; @@ -69,7 +69,7 @@ namespace ZeroLevel.Network.FileTransfer } finally { - _taskPool.Free(task); + _taskPool.Release(task); } } } diff --git a/ZeroLevel/Services/Semantic/WordTokenizer.cs b/ZeroLevel/Services/Semantic/WordTokenizer.cs index b5fa5b0..0e15413 100644 --- a/ZeroLevel/Services/Semantic/WordTokenizer.cs +++ b/ZeroLevel/Services/Semantic/WordTokenizer.cs @@ -6,13 +6,13 @@ namespace ZeroLevel.Services.Semantic { public static class WordTokenizer { - static ObjectPool _pool = new ObjectPool(() => new char[2048]); + static Pool _pool = new Pool(64 ,(p) => new char[2048]); public static IEnumerable Tokenize(string text) { int index = 0; bool first = true; - var buffer = _pool.Allocate(); + var buffer = _pool.Acquire(); try { for (int i = 0; i < text?.Length; i++) @@ -40,7 +40,7 @@ namespace ZeroLevel.Services.Semantic } finally { - _pool.Free(buffer); + _pool.Release(buffer); } } } diff --git a/temp2/Program.cs b/temp2/Program.cs index 25c7945..6eec272 100644 --- a/temp2/Program.cs +++ b/temp2/Program.cs @@ -3,7 +3,6 @@ using System.Collections.Generic; using System.IO; using System.Linq; using ZeroLevel.HNSW; -using ZeroLevel.HNSW.Services.OPT; using ZeroLevel.Services.Serialization; namespace temp2