Add AutomaticGraphClusterer
pull/1/head
unknown 3 years ago
parent 33e2048862
commit 2d2616ddce

@ -5,6 +5,7 @@ using System.Drawing;
using System.IO;
using System.Linq;
using ZeroLevel.HNSW;
using ZeroLevel.HNSW.Services;
namespace HNSWDemo
{
@ -98,18 +99,42 @@ namespace HNSWDemo
static void Main(string[] args)
{
AutoClusteringTest();
Console.WriteLine("Completed");
Console.ReadKey();
}
static void AutoClusteringTest()
{
var vectors = RandomVectors(128, 3000);
var world = SmallWorld.CreateWorld<float[]>(NSWOptions<float[]>.Create(8, 16, 200, 200, Metrics.L2Euclidean, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple));
world.AddItems(vectors);
DrawHistogram(world, @"D:\hist.jpg");
Console.WriteLine("Completed");
Console.ReadKey();
var clusters = AutomaticGraphClusterer.DetectClusters(world);
Console.WriteLine($"Found {clusters.Count} clusters");
for (int i = 0; i < clusters.Count; i++)
{
Console.WriteLine($"Cluster {i+1} countains {clusters[i].Count} items");
}
}
static void DrawHistogram(SmallWorld<float[]> world, string filename)
static void HistogramTest()
{
var vectors = RandomVectors(128, 3000);
var world = SmallWorld.CreateWorld<float[]>(NSWOptions<float[]>.Create(8, 16, 200, 200, Metrics.L2Euclidean, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple));
world.AddItems(vectors);
var histogram = world.GetHistogram();
int threshold = histogram.OTSU();
var min = histogram.Bounds[threshold - 1];
var max = histogram.Bounds[threshold];
var R = (max + min) / 2;
DrawHistogram(histogram, @"D:\hist.jpg");
}
static void DrawHistogram(Histogram histogram, string filename)
{
/* while (histogram.CountSignChanges() > 3)
{
histogram.Smooth();

@ -0,0 +1,64 @@
using System.Collections.Generic;
namespace ZeroLevel.HNSW.Services
{
public static class AutomaticGraphClusterer
{
private const int HALF_LONG_BITS = 32;
public static List<HashSet<int>> DetectClusters<T>(SmallWorld<T> world)
{
var links = world.GetNSWLinks();
// 1. Find R - bound between intra-cluster distances and out-of-cluster distances
var histogram = new Histogram(HistogramMode.SQRT, links.Values);
int threshold = histogram.OTSU();
var min = histogram.Bounds[threshold - 1];
var max = histogram.Bounds[threshold];
var R = (max + min) / 2;
// 2. Get links with distances less than R
var resultLinks = new SortedList<long, float>();
foreach (var pair in links)
{
if (pair.Value < R)
{
resultLinks.Add(pair.Key, pair.Value);
}
}
// 3. Extract clusters
List<HashSet<int>> clusters = new List<HashSet<int>>();
foreach (var pair in resultLinks)
{
var k = pair.Key;
var id1 = (int)(k >> HALF_LONG_BITS);
var id2 = (int)(k - (((long)id1) << HALF_LONG_BITS));
bool found = false;
foreach (var c in clusters)
{
if (c.Contains(id1))
{
c.Add(id2);
found = true;
break;
}
else if (c.Contains(id2))
{
c.Add(id1);
found = true;
break;
}
}
if (found == false)
{
var c = new HashSet<int>();
c.Add(id1);
c.Add(id2);
clusters.Add(c);
}
}
return clusters;
}
}
}

@ -15,6 +15,8 @@ namespace ZeroLevel.HNSW
private SortedList<long, float> _set = new SortedList<long, float>();
internal SortedList<long, float> Links => _set;
internal (int, int) this[int index]
{
get

@ -14,6 +14,7 @@ namespace ZeroLevel.HNSW
private readonly NSWOptions<TItem> _options;
private readonly VectorSet<TItem> _vectors;
private readonly CompactBiDirectionalLinksSet _links;
internal SortedList<long, float> Links => _links.Links;
/// <summary>
/// There are links е the layer

@ -17,6 +17,7 @@ namespace ZeroLevel.HNSW
private int MaxLayer = 0;
private readonly ProbabilityLayerNumberGenerator _layerLevelGenerator;
private ReaderWriterLockSlim _lockGraph = new ReaderWriterLockSlim();
internal SortedList<long, float> GetNSWLinks() => _layers[0].Links;
public SmallWorld(NSWOptions<TItem> options)
{

Loading…
Cancel
Save

Powered by TurnKey Linux.