|  |  |  | @ -1,13 +1,89 @@ | 
			
		
	
		
			
				
					|  |  |  |  | using System; | 
			
		
	
		
			
				
					|  |  |  |  | using System.Collections; | 
			
		
	
		
			
				
					|  |  |  |  | using System.Collections.Generic; | 
			
		
	
		
			
				
					|  |  |  |  | using System.Linq; | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | namespace ZeroLevel.HNSW.Services | 
			
		
	
		
			
				
					|  |  |  |  | { | 
			
		
	
		
			
				
					|  |  |  |  |     public static class AutomaticGraphClusterer | 
			
		
	
		
			
				
					|  |  |  |  |     public class Cluster | 
			
		
	
		
			
				
					|  |  |  |  |         : IEnumerable<int> | 
			
		
	
		
			
				
					|  |  |  |  |     { | 
			
		
	
		
			
				
					|  |  |  |  |         private const int HALF_LONG_BITS = 32; | 
			
		
	
		
			
				
					|  |  |  |  |         private HashSet<int> _elements = new HashSet<int>(); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         public int Count => _elements.Count; | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         public bool Contains(int id) => _elements.Contains(id); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         public bool Add(int id) => _elements.Add(id); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         public IEnumerator<int> GetEnumerator() | 
			
		
	
		
			
				
					|  |  |  |  |         { | 
			
		
	
		
			
				
					|  |  |  |  |             return _elements.GetEnumerator(); | 
			
		
	
		
			
				
					|  |  |  |  |         } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         IEnumerator IEnumerable.GetEnumerator() | 
			
		
	
		
			
				
					|  |  |  |  |         { | 
			
		
	
		
			
				
					|  |  |  |  |             return _elements.GetEnumerator(); | 
			
		
	
		
			
				
					|  |  |  |  |         } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         public void Merge(Cluster cluster) | 
			
		
	
		
			
				
					|  |  |  |  |         { | 
			
		
	
		
			
				
					|  |  |  |  |             foreach (var e in cluster) | 
			
		
	
		
			
				
					|  |  |  |  |             { | 
			
		
	
		
			
				
					|  |  |  |  |                 this._elements.Add(e); | 
			
		
	
		
			
				
					|  |  |  |  |             } | 
			
		
	
		
			
				
					|  |  |  |  |         } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         public float MaxDistance(Func<int, int, float> distance, Cluster other) | 
			
		
	
		
			
				
					|  |  |  |  |         { | 
			
		
	
		
			
				
					|  |  |  |  |             var max = float.MinValue; | 
			
		
	
		
			
				
					|  |  |  |  |             foreach (var e in this._elements) | 
			
		
	
		
			
				
					|  |  |  |  |             { | 
			
		
	
		
			
				
					|  |  |  |  |                 foreach (var o in other) | 
			
		
	
		
			
				
					|  |  |  |  |                 { | 
			
		
	
		
			
				
					|  |  |  |  |                     var d = distance(e, o); | 
			
		
	
		
			
				
					|  |  |  |  |                     if (d > max) | 
			
		
	
		
			
				
					|  |  |  |  |                     { | 
			
		
	
		
			
				
					|  |  |  |  |                         max = d; | 
			
		
	
		
			
				
					|  |  |  |  |                     } | 
			
		
	
		
			
				
					|  |  |  |  |                 } | 
			
		
	
		
			
				
					|  |  |  |  |             } | 
			
		
	
		
			
				
					|  |  |  |  |             return max; | 
			
		
	
		
			
				
					|  |  |  |  |         } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         public float MinDistance(Func<int, int, float> distance, Cluster other) | 
			
		
	
		
			
				
					|  |  |  |  |         { | 
			
		
	
		
			
				
					|  |  |  |  |             var min = float.MaxValue; | 
			
		
	
		
			
				
					|  |  |  |  |             foreach (var e in this._elements) | 
			
		
	
		
			
				
					|  |  |  |  |             { | 
			
		
	
		
			
				
					|  |  |  |  |                 foreach (var o in other) | 
			
		
	
		
			
				
					|  |  |  |  |                 { | 
			
		
	
		
			
				
					|  |  |  |  |                     var d = distance(e, o); | 
			
		
	
		
			
				
					|  |  |  |  |                     if (d < min) | 
			
		
	
		
			
				
					|  |  |  |  |                     { | 
			
		
	
		
			
				
					|  |  |  |  |                         min = d; | 
			
		
	
		
			
				
					|  |  |  |  |                     } | 
			
		
	
		
			
				
					|  |  |  |  |                 } | 
			
		
	
		
			
				
					|  |  |  |  |             } | 
			
		
	
		
			
				
					|  |  |  |  |             return min; | 
			
		
	
		
			
				
					|  |  |  |  |         } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         public float AvgDistance(Func<int, int, float> distance, Cluster other) | 
			
		
	
		
			
				
					|  |  |  |  |         { | 
			
		
	
		
			
				
					|  |  |  |  |             var dist = new List<float>(); | 
			
		
	
		
			
				
					|  |  |  |  |             foreach (var e in this._elements) | 
			
		
	
		
			
				
					|  |  |  |  |             { | 
			
		
	
		
			
				
					|  |  |  |  |                 foreach (var o in other) | 
			
		
	
		
			
				
					|  |  |  |  |                 { | 
			
		
	
		
			
				
					|  |  |  |  |                     dist.Add(distance(e, o)); | 
			
		
	
		
			
				
					|  |  |  |  |                 } | 
			
		
	
		
			
				
					|  |  |  |  |             } | 
			
		
	
		
			
				
					|  |  |  |  |             return dist.Average(); | 
			
		
	
		
			
				
					|  |  |  |  |         } | 
			
		
	
		
			
				
					|  |  |  |  |     } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     public static class AutomaticGraphClusterer | 
			
		
	
		
			
				
					|  |  |  |  |     { | 
			
		
	
		
			
				
					|  |  |  |  |         private class Link | 
			
		
	
		
			
				
					|  |  |  |  |         { | 
			
		
	
		
			
				
					|  |  |  |  |             public int Id1; | 
			
		
	
	
		
			
				
					|  |  |  | @ -15,13 +91,13 @@ namespace ZeroLevel.HNSW.Services | 
			
		
	
		
			
				
					|  |  |  |  |             public float Distance; | 
			
		
	
		
			
				
					|  |  |  |  |         } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         public static List<HashSet<int>> DetectClusters<T>(SmallWorld<T> world) | 
			
		
	
		
			
				
					|  |  |  |  |         public static List<Cluster> DetectClusters<T>(SmallWorld<T> world) | 
			
		
	
		
			
				
					|  |  |  |  |         { | 
			
		
	
		
			
				
					|  |  |  |  |             var distance = world.DistanceFunction; | 
			
		
	
		
			
				
					|  |  |  |  |             var links = world.GetLinks().SelectMany(pair => pair.Value.Select(id => new Link { Id1 = pair.Key, Id2 = id, Distance = distance(pair.Key, id) })).ToList(); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |             // 1. Find R - bound between intra-cluster distances and out-of-cluster distances | 
			
		
	
		
			
				
					|  |  |  |  |             var histogram = new Histogram(HistogramMode.SQRT, links.Select(l => l.Distance)); | 
			
		
	
		
			
				
					|  |  |  |  |             var histogram = new Histogram(HistogramMode.LOG, links.Select(l => l.Distance)); | 
			
		
	
		
			
				
					|  |  |  |  |             int threshold = histogram.CuttOff(); | 
			
		
	
		
			
				
					|  |  |  |  |             var min = histogram.Bounds[threshold - 1]; | 
			
		
	
		
			
				
					|  |  |  |  |             var max = histogram.Bounds[threshold]; | 
			
		
	
	
		
			
				
					|  |  |  | @ -39,7 +115,7 @@ namespace ZeroLevel.HNSW.Services | 
			
		
	
		
			
				
					|  |  |  |  |             } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |             // 3. Extract clusters | 
			
		
	
		
			
				
					|  |  |  |  |             List<HashSet<int>> clusters = new List<HashSet<int>>(); | 
			
		
	
		
			
				
					|  |  |  |  |             List<Cluster> clusters = new List<Cluster>(); | 
			
		
	
		
			
				
					|  |  |  |  |             foreach (var l in resultLinks) | 
			
		
	
		
			
				
					|  |  |  |  |             { | 
			
		
	
		
			
				
					|  |  |  |  |                 var id1 = l.Id1; | 
			
		
	
	
		
			
				
					|  |  |  | @ -62,7 +138,7 @@ namespace ZeroLevel.HNSW.Services | 
			
		
	
		
			
				
					|  |  |  |  |                 } | 
			
		
	
		
			
				
					|  |  |  |  |                 if (found == false) | 
			
		
	
		
			
				
					|  |  |  |  |                 { | 
			
		
	
		
			
				
					|  |  |  |  |                     var c = new HashSet<int>(); | 
			
		
	
		
			
				
					|  |  |  |  |                     var c = new Cluster(); | 
			
		
	
		
			
				
					|  |  |  |  |                     c.Add(id1); | 
			
		
	
		
			
				
					|  |  |  |  |                     c.Add(id2); | 
			
		
	
		
			
				
					|  |  |  |  |                     clusters.Add(c); | 
			
		
	
	
		
			
				
					|  |  |  | 
 |