Add AutomaticGraphClusterer
pull/1/head
unknown 3 years ago
parent 33e2048862
commit 2d2616ddce

@ -5,6 +5,7 @@ using System.Drawing;
using System.IO; using System.IO;
using System.Linq; using System.Linq;
using ZeroLevel.HNSW; using ZeroLevel.HNSW;
using ZeroLevel.HNSW.Services;
namespace HNSWDemo namespace HNSWDemo
{ {
@ -98,18 +99,42 @@ namespace HNSWDemo
static void Main(string[] args) static void Main(string[] args)
{
AutoClusteringTest();
Console.WriteLine("Completed");
Console.ReadKey();
}
static void AutoClusteringTest()
{ {
var vectors = RandomVectors(128, 3000); var vectors = RandomVectors(128, 3000);
var world = SmallWorld.CreateWorld<float[]>(NSWOptions<float[]>.Create(8, 16, 200, 200, Metrics.L2Euclidean, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple)); var world = SmallWorld.CreateWorld<float[]>(NSWOptions<float[]>.Create(8, 16, 200, 200, Metrics.L2Euclidean, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple));
world.AddItems(vectors); world.AddItems(vectors);
DrawHistogram(world, @"D:\hist.jpg"); var clusters = AutomaticGraphClusterer.DetectClusters(world);
Console.WriteLine("Completed"); Console.WriteLine($"Found {clusters.Count} clusters");
Console.ReadKey(); for (int i = 0; i < clusters.Count; i++)
{
Console.WriteLine($"Cluster {i+1} countains {clusters[i].Count} items");
}
} }
static void DrawHistogram(SmallWorld<float[]> world, string filename) static void HistogramTest()
{ {
var vectors = RandomVectors(128, 3000);
var world = SmallWorld.CreateWorld<float[]>(NSWOptions<float[]>.Create(8, 16, 200, 200, Metrics.L2Euclidean, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple));
world.AddItems(vectors);
var histogram = world.GetHistogram(); var histogram = world.GetHistogram();
int threshold = histogram.OTSU();
var min = histogram.Bounds[threshold - 1];
var max = histogram.Bounds[threshold];
var R = (max + min) / 2;
DrawHistogram(histogram, @"D:\hist.jpg");
}
static void DrawHistogram(Histogram histogram, string filename)
{
/* while (histogram.CountSignChanges() > 3) /* while (histogram.CountSignChanges() > 3)
{ {
histogram.Smooth(); histogram.Smooth();

@ -0,0 +1,64 @@
using System.Collections.Generic;
namespace ZeroLevel.HNSW.Services
{
public static class AutomaticGraphClusterer
{
private const int HALF_LONG_BITS = 32;
public static List<HashSet<int>> DetectClusters<T>(SmallWorld<T> world)
{
var links = world.GetNSWLinks();
// 1. Find R - bound between intra-cluster distances and out-of-cluster distances
var histogram = new Histogram(HistogramMode.SQRT, links.Values);
int threshold = histogram.OTSU();
var min = histogram.Bounds[threshold - 1];
var max = histogram.Bounds[threshold];
var R = (max + min) / 2;
// 2. Get links with distances less than R
var resultLinks = new SortedList<long, float>();
foreach (var pair in links)
{
if (pair.Value < R)
{
resultLinks.Add(pair.Key, pair.Value);
}
}
// 3. Extract clusters
List<HashSet<int>> clusters = new List<HashSet<int>>();
foreach (var pair in resultLinks)
{
var k = pair.Key;
var id1 = (int)(k >> HALF_LONG_BITS);
var id2 = (int)(k - (((long)id1) << HALF_LONG_BITS));
bool found = false;
foreach (var c in clusters)
{
if (c.Contains(id1))
{
c.Add(id2);
found = true;
break;
}
else if (c.Contains(id2))
{
c.Add(id1);
found = true;
break;
}
}
if (found == false)
{
var c = new HashSet<int>();
c.Add(id1);
c.Add(id2);
clusters.Add(c);
}
}
return clusters;
}
}
}

@ -15,6 +15,8 @@ namespace ZeroLevel.HNSW
private SortedList<long, float> _set = new SortedList<long, float>(); private SortedList<long, float> _set = new SortedList<long, float>();
internal SortedList<long, float> Links => _set;
internal (int, int) this[int index] internal (int, int) this[int index]
{ {
get get

@ -14,6 +14,7 @@ namespace ZeroLevel.HNSW
private readonly NSWOptions<TItem> _options; private readonly NSWOptions<TItem> _options;
private readonly VectorSet<TItem> _vectors; private readonly VectorSet<TItem> _vectors;
private readonly CompactBiDirectionalLinksSet _links; private readonly CompactBiDirectionalLinksSet _links;
internal SortedList<long, float> Links => _links.Links;
/// <summary> /// <summary>
/// There are links е the layer /// There are links е the layer

@ -17,6 +17,7 @@ namespace ZeroLevel.HNSW
private int MaxLayer = 0; private int MaxLayer = 0;
private readonly ProbabilityLayerNumberGenerator _layerLevelGenerator; private readonly ProbabilityLayerNumberGenerator _layerLevelGenerator;
private ReaderWriterLockSlim _lockGraph = new ReaderWriterLockSlim(); private ReaderWriterLockSlim _lockGraph = new ReaderWriterLockSlim();
internal SortedList<long, float> GetNSWLinks() => _layers[0].Links;
public SmallWorld(NSWOptions<TItem> options) public SmallWorld(NSWOptions<TItem> options)
{ {

Loading…
Cancel
Save

Powered by TurnKey Linux.