From ac20e9cacb1ce757e62bda7d3b5b957858780ff7 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 29 Dec 2021 02:30:45 +0300 Subject: [PATCH] Updates --- TestHNSW/HNSWDemo/Program.cs | 3 + .../HNSWDemo/Tests/AutoClusteringMNISTTest.cs | 23 ++++- .../Services/AutomaticGraphClusterer.cs | 88 +++++++++++++++++-- ZeroLevel.HNSW/ZeroLevel.HNSW.csproj | 5 +- 4 files changed, 109 insertions(+), 10 deletions(-) diff --git a/TestHNSW/HNSWDemo/Program.cs b/TestHNSW/HNSWDemo/Program.cs index 39e2d60..5779715 100644 --- a/TestHNSW/HNSWDemo/Program.cs +++ b/TestHNSW/HNSWDemo/Program.cs @@ -1,5 +1,6 @@ using HNSWDemo.Tests; using System; +using ZeroLevel.Services.Web; namespace HNSWDemo { @@ -7,6 +8,8 @@ namespace HNSWDemo { static void Main(string[] args) { + var uri = new Uri("https://hack33d.ru/bpla/upload.php?path=128111&get=0J/QuNC70LjQv9C10L3QutC+INCS0LvQsNC00LjQvNC40YAg0JzQuNGF0LDQudC70L7QstC40Yc7MDQuMDkuMTk1NCAoNjYg0LvQtdGCKTvQnNC+0YHQutC+0LLRgdC60LDRjzsxMjgxMTE7TEFfUkVaVVM7RkxZXzAy"); + var parts = UrlUtility.ParseQueryString(uri.Query); new AutoClusteringMNISTTest().Run(); //new HistogramTest().Run(); Console.WriteLine("Completed"); diff --git a/TestHNSW/HNSWDemo/Tests/AutoClusteringMNISTTest.cs b/TestHNSW/HNSWDemo/Tests/AutoClusteringMNISTTest.cs index 0e26a81..eabfa4a 100644 --- a/TestHNSW/HNSWDemo/Tests/AutoClusteringMNISTTest.cs +++ b/TestHNSW/HNSWDemo/Tests/AutoClusteringMNISTTest.cs @@ -81,13 +81,30 @@ namespace HNSWDemo.Tests var links = world.GetLinks().SelectMany(pair => pair.Value.Select(p=> distance(pair.Key, p))).ToList(); var exists = links.Where(n => n > 0).ToArray(); - var histogram = new Histogram(HistogramMode.SQRT, links); + var histogram = new Histogram(HistogramMode.LOG, links); DrawHistogram(histogram, @"D:\Mnist\histogram.jpg"); var clusters = AutomaticGraphClusterer.DetectClusters(world); Console.WriteLine($"Found {clusters.Count} clusters"); - - + + while (clusters.Count > 10) + { + var last = clusters[clusters.Count - 1]; + var testDistance = clusters[0].MinDistance(distance, last); + var index = 0; + for (int i = 1; i < clusters.Count - 1; i++) + { + var d = clusters[i].MinDistance(distance, last); + if (d < testDistance) + { + testDistance = d; + index = i; + } + } + clusters[index].Merge(last); + clusters.RemoveAt(clusters.Count - 1); + } + for (int i = 0; i < clusters.Count; i++) { var ouput = Path.Combine(folder, i.ToString("D3")); diff --git a/ZeroLevel.HNSW/Services/AutomaticGraphClusterer.cs b/ZeroLevel.HNSW/Services/AutomaticGraphClusterer.cs index 9994134..e8e0d9c 100644 --- a/ZeroLevel.HNSW/Services/AutomaticGraphClusterer.cs +++ b/ZeroLevel.HNSW/Services/AutomaticGraphClusterer.cs @@ -1,13 +1,89 @@ using System; +using System.Collections; using System.Collections.Generic; using System.Linq; namespace ZeroLevel.HNSW.Services { - public static class AutomaticGraphClusterer + public class Cluster + : IEnumerable { - private const int HALF_LONG_BITS = 32; + private HashSet _elements = new HashSet(); + + public int Count => _elements.Count; + + public bool Contains(int id) => _elements.Contains(id); + + public bool Add(int id) => _elements.Add(id); + + public IEnumerator GetEnumerator() + { + return _elements.GetEnumerator(); + } + + IEnumerator IEnumerable.GetEnumerator() + { + return _elements.GetEnumerator(); + } + + public void Merge(Cluster cluster) + { + foreach (var e in cluster) + { + this._elements.Add(e); + } + } + public float MaxDistance(Func distance, Cluster other) + { + var max = float.MinValue; + foreach (var e in this._elements) + { + foreach (var o in other) + { + var d = distance(e, o); + if (d > max) + { + max = d; + } + } + } + return max; + } + + public float MinDistance(Func distance, Cluster other) + { + var min = float.MaxValue; + foreach (var e in this._elements) + { + foreach (var o in other) + { + var d = distance(e, o); + if (d < min) + { + min = d; + } + } + } + return min; + } + + public float AvgDistance(Func distance, Cluster other) + { + var dist = new List(); + foreach (var e in this._elements) + { + foreach (var o in other) + { + dist.Add(distance(e, o)); + } + } + return dist.Average(); + } + } + + public static class AutomaticGraphClusterer + { private class Link { public int Id1; @@ -15,13 +91,13 @@ namespace ZeroLevel.HNSW.Services public float Distance; } - public static List> DetectClusters(SmallWorld world) + public static List DetectClusters(SmallWorld world) { var distance = world.DistanceFunction; var links = world.GetLinks().SelectMany(pair => pair.Value.Select(id => new Link { Id1 = pair.Key, Id2 = id, Distance = distance(pair.Key, id) })).ToList(); // 1. Find R - bound between intra-cluster distances and out-of-cluster distances - var histogram = new Histogram(HistogramMode.SQRT, links.Select(l => l.Distance)); + var histogram = new Histogram(HistogramMode.LOG, links.Select(l => l.Distance)); int threshold = histogram.CuttOff(); var min = histogram.Bounds[threshold - 1]; var max = histogram.Bounds[threshold]; @@ -39,7 +115,7 @@ namespace ZeroLevel.HNSW.Services } // 3. Extract clusters - List> clusters = new List>(); + List clusters = new List(); foreach (var l in resultLinks) { var id1 = l.Id1; @@ -62,7 +138,7 @@ namespace ZeroLevel.HNSW.Services } if (found == false) { - var c = new HashSet(); + var c = new Cluster(); c.Add(id1); c.Add(id2); clusters.Add(c); diff --git a/ZeroLevel.HNSW/ZeroLevel.HNSW.csproj b/ZeroLevel.HNSW/ZeroLevel.HNSW.csproj index 69295c1..24d2e0b 100644 --- a/ZeroLevel.HNSW/ZeroLevel.HNSW.csproj +++ b/ZeroLevel.HNSW/ZeroLevel.HNSW.csproj @@ -7,7 +7,10 @@ - + + + +