|
|
@ -99,6 +99,182 @@ namespace HNSWDemo
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public class QVectorsDirectCompare
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
private const int HALF_LONG_BITS = 32;
|
|
|
|
|
|
|
|
private readonly IList<byte[]> _vectors;
|
|
|
|
|
|
|
|
private readonly Func<byte[], byte[], float> _distance;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public QVectorsDirectCompare(List<byte[]> vectors, Func<byte[], byte[], float> distance)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
_vectors = vectors;
|
|
|
|
|
|
|
|
_distance = distance;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public IEnumerable<(int, float)> KNearest(byte[] v, int k)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var weights = new Dictionary<int, float>();
|
|
|
|
|
|
|
|
for (int i = 0; i < _vectors.Count; i++)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var d = _distance(v, _vectors[i]);
|
|
|
|
|
|
|
|
weights[i] = d;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return weights.OrderBy(p => p.Value).Take(k).Select(p => (p.Key, p.Value));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public List<HashSet<int>> DetectClusters()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var links = new SortedList<long, float>();
|
|
|
|
|
|
|
|
for (int i = 0; i < _vectors.Count; i++)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
for (int j = i + 1; j < _vectors.Count; j++)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
long k = (((long)(i)) << HALF_LONG_BITS) + j;
|
|
|
|
|
|
|
|
links.Add(k, _distance(_vectors[i], _vectors[j]));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 1. Find R - bound between intra-cluster distances and out-of-cluster distances
|
|
|
|
|
|
|
|
var histogram = new Histogram(HistogramMode.SQRT, links.Values);
|
|
|
|
|
|
|
|
int threshold = histogram.OTSU();
|
|
|
|
|
|
|
|
var min = histogram.Bounds[threshold - 1];
|
|
|
|
|
|
|
|
var max = histogram.Bounds[threshold];
|
|
|
|
|
|
|
|
var R = (max + min) / 2;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 2. Get links with distances less than R
|
|
|
|
|
|
|
|
var resultLinks = new SortedList<long, float>();
|
|
|
|
|
|
|
|
foreach (var pair in links)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
if (pair.Value < R)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
resultLinks.Add(pair.Key, pair.Value);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 3. Extract clusters
|
|
|
|
|
|
|
|
List<HashSet<int>> clusters = new List<HashSet<int>>();
|
|
|
|
|
|
|
|
foreach (var pair in resultLinks)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var k = pair.Key;
|
|
|
|
|
|
|
|
var id1 = (int)(k >> HALF_LONG_BITS);
|
|
|
|
|
|
|
|
var id2 = (int)(k - (((long)id1) << HALF_LONG_BITS));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool found = false;
|
|
|
|
|
|
|
|
foreach (var c in clusters)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
if (c.Contains(id1))
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
c.Add(id2);
|
|
|
|
|
|
|
|
found = true;
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (c.Contains(id2))
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
c.Add(id1);
|
|
|
|
|
|
|
|
found = true;
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (found == false)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var c = new HashSet<int>();
|
|
|
|
|
|
|
|
c.Add(id1);
|
|
|
|
|
|
|
|
c.Add(id2);
|
|
|
|
|
|
|
|
clusters.Add(c);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return clusters;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public class QLVectorsDirectCompare
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
private const int HALF_LONG_BITS = 32;
|
|
|
|
|
|
|
|
private readonly IList<long[]> _vectors;
|
|
|
|
|
|
|
|
private readonly Func<long[], long[], float> _distance;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public QLVectorsDirectCompare(List<long[]> vectors, Func<long[], long[], float> distance)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
_vectors = vectors;
|
|
|
|
|
|
|
|
_distance = distance;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public IEnumerable<(int, float)> KNearest(long[] v, int k)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var weights = new Dictionary<int, float>();
|
|
|
|
|
|
|
|
for (int i = 0; i < _vectors.Count; i++)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var d = _distance(v, _vectors[i]);
|
|
|
|
|
|
|
|
weights[i] = d;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return weights.OrderBy(p => p.Value).Take(k).Select(p => (p.Key, p.Value));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public List<HashSet<int>> DetectClusters()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var links = new SortedList<long, float>();
|
|
|
|
|
|
|
|
for (int i = 0; i < _vectors.Count; i++)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
for (int j = i + 1; j < _vectors.Count; j++)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
long k = (((long)(i)) << HALF_LONG_BITS) + j;
|
|
|
|
|
|
|
|
links.Add(k, _distance(_vectors[i], _vectors[j]));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 1. Find R - bound between intra-cluster distances and out-of-cluster distances
|
|
|
|
|
|
|
|
var histogram = new Histogram(HistogramMode.SQRT, links.Values);
|
|
|
|
|
|
|
|
int threshold = histogram.OTSU();
|
|
|
|
|
|
|
|
var min = histogram.Bounds[threshold - 1];
|
|
|
|
|
|
|
|
var max = histogram.Bounds[threshold];
|
|
|
|
|
|
|
|
var R = (max + min) / 2;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 2. Get links with distances less than R
|
|
|
|
|
|
|
|
var resultLinks = new SortedList<long, float>();
|
|
|
|
|
|
|
|
foreach (var pair in links)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
if (pair.Value < R)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
resultLinks.Add(pair.Key, pair.Value);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 3. Extract clusters
|
|
|
|
|
|
|
|
List<HashSet<int>> clusters = new List<HashSet<int>>();
|
|
|
|
|
|
|
|
foreach (var pair in resultLinks)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var k = pair.Key;
|
|
|
|
|
|
|
|
var id1 = (int)(k >> HALF_LONG_BITS);
|
|
|
|
|
|
|
|
var id2 = (int)(k - (((long)id1) << HALF_LONG_BITS));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool found = false;
|
|
|
|
|
|
|
|
foreach (var c in clusters)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
if (c.Contains(id1))
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
c.Add(id2);
|
|
|
|
|
|
|
|
found = true;
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (c.Contains(id2))
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
c.Add(id1);
|
|
|
|
|
|
|
|
found = true;
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (found == false)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var c = new HashSet<int>();
|
|
|
|
|
|
|
|
c.Add(id1);
|
|
|
|
|
|
|
|
c.Add(id2);
|
|
|
|
|
|
|
|
clusters.Add(c);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return clusters;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public enum Gender
|
|
|
|
public enum Gender
|
|
|
|
{
|
|
|
|
{
|
|
|
|
Unknown, Male, Feemale
|
|
|
|
Unknown, Male, Feemale
|
|
|
@ -165,11 +341,257 @@ namespace HNSWDemo
|
|
|
|
|
|
|
|
|
|
|
|
static void Main(string[] args)
|
|
|
|
static void Main(string[] args)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
InsertTimeExplosionTest();
|
|
|
|
QuantizatorTest();
|
|
|
|
Console.WriteLine("Completed");
|
|
|
|
Console.WriteLine("Completed");
|
|
|
|
Console.ReadKey();
|
|
|
|
Console.ReadKey();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void QAccuracityTest()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
int K = 200;
|
|
|
|
|
|
|
|
var count = 5000;
|
|
|
|
|
|
|
|
var testCount = 500;
|
|
|
|
|
|
|
|
var dimensionality = 128;
|
|
|
|
|
|
|
|
var totalHits = new List<int>();
|
|
|
|
|
|
|
|
var timewatchesNP = new List<float>();
|
|
|
|
|
|
|
|
var timewatchesHNSW = new List<float>();
|
|
|
|
|
|
|
|
var q = new Quantizator(-1f, 1f);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var samples = RandomVectors(dimensionality, count).Select(v => q.QuantizeToLong(v)).ToList();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var sw = new Stopwatch();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var test = new QLVectorsDirectCompare(samples, CosineDistance.NonOptimized);
|
|
|
|
|
|
|
|
var world = new SmallWorld<long[]>(NSWOptions<long[]>.Create(8, 12, 100, 100, CosineDistance.NonOptimized));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sw.Start();
|
|
|
|
|
|
|
|
var ids = world.AddItems(samples.ToArray());
|
|
|
|
|
|
|
|
sw.Stop();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Console.WriteLine($"Insert {ids.Length} items: {sw.ElapsedMilliseconds} ms");
|
|
|
|
|
|
|
|
Console.WriteLine("Start test");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var test_vectors = RandomVectors(dimensionality, testCount).Select(v => q.QuantizeToLong(v)).ToList();
|
|
|
|
|
|
|
|
foreach (var v in test_vectors)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
sw.Restart();
|
|
|
|
|
|
|
|
var gt = test.KNearest(v, K).ToDictionary(p => p.Item1, p => p.Item2);
|
|
|
|
|
|
|
|
sw.Stop();
|
|
|
|
|
|
|
|
timewatchesNP.Add(sw.ElapsedMilliseconds);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sw.Restart();
|
|
|
|
|
|
|
|
var result = world.Search(v, K);
|
|
|
|
|
|
|
|
sw.Stop();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
timewatchesHNSW.Add(sw.ElapsedMilliseconds);
|
|
|
|
|
|
|
|
var hits = 0;
|
|
|
|
|
|
|
|
foreach (var r in result)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
if (gt.ContainsKey(r.Item1))
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
hits++;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
totalHits.Add(hits);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Console.WriteLine($"MIN Accuracity: {totalHits.Min() * 100 / K}%");
|
|
|
|
|
|
|
|
Console.WriteLine($"AVG Accuracity: {totalHits.Average() * 100 / K}%");
|
|
|
|
|
|
|
|
Console.WriteLine($"MAX Accuracity: {totalHits.Max() * 100 / K}%");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Console.WriteLine($"MIN HNSW TIME: {timewatchesHNSW.Min()} ms");
|
|
|
|
|
|
|
|
Console.WriteLine($"AVG HNSW TIME: {timewatchesHNSW.Average()} ms");
|
|
|
|
|
|
|
|
Console.WriteLine($"MAX HNSW TIME: {timewatchesHNSW.Max()} ms");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Console.WriteLine($"MIN NP TIME: {timewatchesNP.Min()} ms");
|
|
|
|
|
|
|
|
Console.WriteLine($"AVG NP TIME: {timewatchesNP.Average()} ms");
|
|
|
|
|
|
|
|
Console.WriteLine($"MAX NP TIME: {timewatchesNP.Max()} ms");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void QInsertTimeExplosionTest()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var count = 10000;
|
|
|
|
|
|
|
|
var iterationCount = 100;
|
|
|
|
|
|
|
|
var dimensionality = 128;
|
|
|
|
|
|
|
|
var sw = new Stopwatch();
|
|
|
|
|
|
|
|
var world = new SmallWorld<long[]>(NSWOptions<long[]>.Create(6, 12, 100, 100, CosineDistance.NonOptimized));
|
|
|
|
|
|
|
|
var q = new Quantizator(-1f, 1f);
|
|
|
|
|
|
|
|
for (int i = 0; i < iterationCount; i++)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var samples = RandomVectors(dimensionality, count);
|
|
|
|
|
|
|
|
sw.Restart();
|
|
|
|
|
|
|
|
var ids = world.AddItems(samples.Select(v => q.QuantizeToLong(v)).ToArray());
|
|
|
|
|
|
|
|
sw.Stop();
|
|
|
|
|
|
|
|
Console.WriteLine($"ITERATION: [{i.ToString("D4")}] COUNT: [{ids.Length}] ELAPSED [{sw.ElapsedMilliseconds} ms]");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void AccuracityTest()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
int K = 200;
|
|
|
|
|
|
|
|
var count = 3000;
|
|
|
|
|
|
|
|
var testCount = 500;
|
|
|
|
|
|
|
|
var dimensionality = 128;
|
|
|
|
|
|
|
|
var totalHits = new List<int>();
|
|
|
|
|
|
|
|
var timewatchesNP = new List<float>();
|
|
|
|
|
|
|
|
var timewatchesHNSW = new List<float>();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var samples = RandomVectors(dimensionality, count);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var sw = new Stopwatch();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var test = new VectorsDirectCompare(samples, CosineDistance.NonOptimized);
|
|
|
|
|
|
|
|
var world = new SmallWorld<float[]>(NSWOptions<float[]>.Create(8, 12, 100, 100, CosineDistance.NonOptimized));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sw.Start();
|
|
|
|
|
|
|
|
var ids = world.AddItems(samples.ToArray());
|
|
|
|
|
|
|
|
sw.Stop();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
|
|
byte[] dump;
|
|
|
|
|
|
|
|
using (var ms = new MemoryStream())
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
world.Serialize(ms);
|
|
|
|
|
|
|
|
dump = ms.ToArray();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
Console.WriteLine($"Full dump size: {dump.Length} bytes");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ReadOnlySmallWorld<float[]> world;
|
|
|
|
|
|
|
|
using (var ms = new MemoryStream(dump))
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
world = SmallWorld.CreateReadOnlyWorldFrom<float[]>(NSWReadOnlyOption<float[]>.Create(100, CosineDistance.NonOptimized, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple), ms);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Console.WriteLine($"Insert {ids.Length} items: {sw.ElapsedMilliseconds} ms");
|
|
|
|
|
|
|
|
Console.WriteLine("Start test");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var test_vectors = RandomVectors(dimensionality, testCount);
|
|
|
|
|
|
|
|
foreach (var v in test_vectors)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
sw.Restart();
|
|
|
|
|
|
|
|
var gt = test.KNearest(v, K).ToDictionary(p => p.Item1, p => p.Item2);
|
|
|
|
|
|
|
|
sw.Stop();
|
|
|
|
|
|
|
|
timewatchesNP.Add(sw.ElapsedMilliseconds);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sw.Restart();
|
|
|
|
|
|
|
|
var result = world.Search(v, K);
|
|
|
|
|
|
|
|
sw.Stop();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
timewatchesHNSW.Add(sw.ElapsedMilliseconds);
|
|
|
|
|
|
|
|
var hits = 0;
|
|
|
|
|
|
|
|
foreach (var r in result)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
if (gt.ContainsKey(r.Item1))
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
hits++;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
totalHits.Add(hits);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Console.WriteLine($"MIN Accuracity: {totalHits.Min() * 100 / K}%");
|
|
|
|
|
|
|
|
Console.WriteLine($"AVG Accuracity: {totalHits.Average() * 100 / K}%");
|
|
|
|
|
|
|
|
Console.WriteLine($"MAX Accuracity: {totalHits.Max() * 100 / K}%");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Console.WriteLine($"MIN HNSW TIME: {timewatchesHNSW.Min()} ms");
|
|
|
|
|
|
|
|
Console.WriteLine($"AVG HNSW TIME: {timewatchesHNSW.Average()} ms");
|
|
|
|
|
|
|
|
Console.WriteLine($"MAX HNSW TIME: {timewatchesHNSW.Max()} ms");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Console.WriteLine($"MIN NP TIME: {timewatchesNP.Min()} ms");
|
|
|
|
|
|
|
|
Console.WriteLine($"AVG NP TIME: {timewatchesNP.Average()} ms");
|
|
|
|
|
|
|
|
Console.WriteLine($"MAX NP TIME: {timewatchesNP.Max()} ms");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void QuantizatorTest()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var samples = RandomVectors(128, 500000);
|
|
|
|
|
|
|
|
var min = samples.SelectMany(s => s).Min();
|
|
|
|
|
|
|
|
var max = samples.SelectMany(s => s).Max();
|
|
|
|
|
|
|
|
var q = new Quantizator(min, max);
|
|
|
|
|
|
|
|
var q_samples = samples.Select(s => q.QuantizeToLong(s)).ToArray();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// comparing
|
|
|
|
|
|
|
|
var list = new List<float>();
|
|
|
|
|
|
|
|
for (int i = 0; i < samples.Count - 1; i++)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var v1 = samples[i];
|
|
|
|
|
|
|
|
var v2 = samples[i + 1];
|
|
|
|
|
|
|
|
var dist = CosineDistance.NonOptimized(v1, v2);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var qv1 = q_samples[i];
|
|
|
|
|
|
|
|
var qv2 = q_samples[i + 1];
|
|
|
|
|
|
|
|
var qdist = CosineDistance.NonOptimized(qv1, qv2);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
list.Add(Math.Abs(dist - qdist));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Console.WriteLine($"Min diff: {list.Min()}");
|
|
|
|
|
|
|
|
Console.WriteLine($"Avg diff: {list.Average()}");
|
|
|
|
|
|
|
|
Console.WriteLine($"Max diff: {list.Max()}");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void SaveRestoreTest()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var count = 1000;
|
|
|
|
|
|
|
|
var dimensionality = 128;
|
|
|
|
|
|
|
|
var samples = RandomVectors(dimensionality, count);
|
|
|
|
|
|
|
|
var world = new SmallWorld<float[]>(NSWOptions<float[]>.Create(6, 15, 200, 200, CosineDistance.ForUnits));
|
|
|
|
|
|
|
|
var sw = new Stopwatch();
|
|
|
|
|
|
|
|
sw.Start();
|
|
|
|
|
|
|
|
var ids = world.AddItems(samples.ToArray());
|
|
|
|
|
|
|
|
sw.Stop();
|
|
|
|
|
|
|
|
Console.WriteLine($"Insert {ids.Length} items on {sw.ElapsedMilliseconds} ms");
|
|
|
|
|
|
|
|
Console.WriteLine("Start test");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
byte[] dump;
|
|
|
|
|
|
|
|
using (var ms = new MemoryStream())
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
world.Serialize(ms);
|
|
|
|
|
|
|
|
dump = ms.ToArray();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
Console.WriteLine($"Full dump size: {dump.Length} bytes");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
byte[] testDump;
|
|
|
|
|
|
|
|
var restoredWorld = new SmallWorld<float[]>(NSWOptions<float[]>.Create(6, 15, 200, 200, CosineDistance.ForUnits));
|
|
|
|
|
|
|
|
using (var ms = new MemoryStream(dump))
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
restoredWorld.Deserialize(ms);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
using (var ms = new MemoryStream())
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
restoredWorld.Serialize(ms);
|
|
|
|
|
|
|
|
testDump = ms.ToArray();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (testDump.Length != dump.Length)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
Console.WriteLine($"Incorrect restored size. Got {testDump.Length}. Expected: {dump.Length}");
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void InsertTimeExplosionTest()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var count = 10000;
|
|
|
|
|
|
|
|
var iterationCount = 100;
|
|
|
|
|
|
|
|
var dimensionality = 128;
|
|
|
|
|
|
|
|
var sw = new Stopwatch();
|
|
|
|
|
|
|
|
var world = new SmallWorld<float[]>(NSWOptions<float[]>.Create(6, 12, 100, 100, CosineDistance.NonOptimized));
|
|
|
|
|
|
|
|
for (int i = 0; i < iterationCount; i++)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var samples = RandomVectors(dimensionality, count);
|
|
|
|
|
|
|
|
sw.Restart();
|
|
|
|
|
|
|
|
var ids = world.AddItems(samples.ToArray());
|
|
|
|
|
|
|
|
sw.Stop();
|
|
|
|
|
|
|
|
Console.WriteLine($"ITERATION: [{i.ToString("D4")}] COUNT: [{ids.Length}] ELAPSED [{sw.ElapsedMilliseconds} ms]");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
static void TestOnMnist()
|
|
|
|
static void TestOnMnist()
|
|
|
|
{
|
|
|
|
{
|
|
|
|
int imageCount, rowCount, colCount;
|
|
|
|
int imageCount, rowCount, colCount;
|
|
|
@ -257,10 +679,6 @@ namespace HNSWDemo
|
|
|
|
|
|
|
|
|
|
|
|
static void DrawHistogram(Histogram histogram, string filename)
|
|
|
|
static void DrawHistogram(Histogram histogram, string filename)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
/* while (histogram.CountSignChanges() > 3)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
histogram.Smooth();
|
|
|
|
|
|
|
|
}*/
|
|
|
|
|
|
|
|
var wb = 1200 / histogram.Values.Length;
|
|
|
|
var wb = 1200 / histogram.Values.Length;
|
|
|
|
var k = 600.0f / (float)histogram.Values.Max();
|
|
|
|
var k = 600.0f / (float)histogram.Values.Max();
|
|
|
|
|
|
|
|
|
|
|
@ -269,7 +687,8 @@ namespace HNSWDemo
|
|
|
|
|
|
|
|
|
|
|
|
using (var bmp = new Bitmap(1200, 600))
|
|
|
|
using (var bmp = new Bitmap(1200, 600))
|
|
|
|
{
|
|
|
|
{
|
|
|
|
using (var g = Graphics.FromImage(bmp)) {
|
|
|
|
using (var g = Graphics.FromImage(bmp))
|
|
|
|
|
|
|
|
{
|
|
|
|
for (int i = 0; i<histogram.Values.Length; i++)
|
|
|
|
for (int i = 0; i<histogram.Values.Length; i++)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
var height = (int)(histogram.Values[i] * k);
|
|
|
|
var height = (int)(histogram.Values[i] * k);
|
|
|
@ -297,7 +716,7 @@ namespace HNSWDemo
|
|
|
|
var count = 10000;
|
|
|
|
var count = 10000;
|
|
|
|
var dimensionality = 128;
|
|
|
|
var dimensionality = 128;
|
|
|
|
var samples = RandomVectors(dimensionality, count);
|
|
|
|
var samples = RandomVectors(dimensionality, count);
|
|
|
|
var world = new SmallWorld<float[]>(NSWOptions<float[]>.Create(6, 15, 200, 200, CosineDistance.ForUnits, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple));
|
|
|
|
var world = new SmallWorld<float[]>(NSWOptions<float[]>.Create(6, 15, 200, 200, CosineDistance.ForUnits));
|
|
|
|
var ids = world.AddItems(samples.ToArray());
|
|
|
|
var ids = world.AddItems(samples.ToArray());
|
|
|
|
|
|
|
|
|
|
|
|
Console.WriteLine("Start test");
|
|
|
|
Console.WriteLine("Start test");
|
|
|
@ -313,7 +732,7 @@ namespace HNSWDemo
|
|
|
|
ReadOnlySmallWorld<float[]> compactWorld;
|
|
|
|
ReadOnlySmallWorld<float[]> compactWorld;
|
|
|
|
using (var ms = new MemoryStream(dump))
|
|
|
|
using (var ms = new MemoryStream(dump))
|
|
|
|
{
|
|
|
|
{
|
|
|
|
compactWorld = SmallWorld.CreateReadOnlyWorldFrom<float[]>(NSWReadOnlyOption<float[]>.Create(200, CosineDistance.ForUnits, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple), ms);
|
|
|
|
compactWorld = SmallWorld.CreateReadOnlyWorldFrom<float[]>(NSWReadOnlyOption<float[]>.Create(200, CosineDistance.ForUnits), ms);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Compare worlds outputs
|
|
|
|
// Compare worlds outputs
|
|
|
@ -379,7 +798,7 @@ namespace HNSWDemo
|
|
|
|
var samples = RandomVectors(dimensionality, count);
|
|
|
|
var samples = RandomVectors(dimensionality, count);
|
|
|
|
|
|
|
|
|
|
|
|
var test = new VectorsDirectCompare(samples, CosineDistance.ForUnits);
|
|
|
|
var test = new VectorsDirectCompare(samples, CosineDistance.ForUnits);
|
|
|
|
var world = new SmallWorld<float[]>(NSWOptions<float[]>.Create(6, 15, 200, 200, CosineDistance.ForUnits, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple));
|
|
|
|
var world = new SmallWorld<float[]>(NSWOptions<float[]>.Create(6, 15, 200, 200, CosineDistance.ForUnits));
|
|
|
|
var ids = world.AddItems(samples.ToArray());
|
|
|
|
var ids = world.AddItems(samples.ToArray());
|
|
|
|
|
|
|
|
|
|
|
|
Console.WriteLine("Start test");
|
|
|
|
Console.WriteLine("Start test");
|
|
|
@ -394,7 +813,7 @@ namespace HNSWDemo
|
|
|
|
ReadOnlySmallWorld<float[]> compactWorld;
|
|
|
|
ReadOnlySmallWorld<float[]> compactWorld;
|
|
|
|
using (var ms = new MemoryStream(dump))
|
|
|
|
using (var ms = new MemoryStream(dump))
|
|
|
|
{
|
|
|
|
{
|
|
|
|
compactWorld = SmallWorld.CreateReadOnlyWorldFrom<float[]>(NSWReadOnlyOption<float[]>.Create(200, CosineDistance.ForUnits, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple), ms);
|
|
|
|
compactWorld = SmallWorld.CreateReadOnlyWorldFrom<float[]>(NSWReadOnlyOption<float[]>.Create(200, CosineDistance.ForUnits), ms);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Compare worlds outputs
|
|
|
|
// Compare worlds outputs
|
|
|
@ -494,61 +913,6 @@ namespace HNSWDemo
|
|
|
|
Console.WriteLine($"MAX HNSWCompact Accuracity: {totalHitsHNSWCompact.Max() * 100 / K}%");
|
|
|
|
Console.WriteLine($"MAX HNSWCompact Accuracity: {totalHitsHNSWCompact.Max() * 100 / K}%");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void SaveRestoreTest()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var count = 1000;
|
|
|
|
|
|
|
|
var dimensionality = 128;
|
|
|
|
|
|
|
|
var samples = RandomVectors(dimensionality, count);
|
|
|
|
|
|
|
|
var world = new SmallWorld<float[]>(NSWOptions<float[]>.Create(6, 15, 200, 200, CosineDistance.ForUnits, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple));
|
|
|
|
|
|
|
|
var sw = new Stopwatch();
|
|
|
|
|
|
|
|
sw.Start();
|
|
|
|
|
|
|
|
var ids = world.AddItems(samples.ToArray());
|
|
|
|
|
|
|
|
sw.Stop();
|
|
|
|
|
|
|
|
Console.WriteLine($"Insert {ids.Length} items on {sw.ElapsedMilliseconds} ms");
|
|
|
|
|
|
|
|
Console.WriteLine("Start test");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
byte[] dump;
|
|
|
|
|
|
|
|
using (var ms = new MemoryStream())
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
world.Serialize(ms);
|
|
|
|
|
|
|
|
dump = ms.ToArray();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
Console.WriteLine($"Full dump size: {dump.Length} bytes");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
byte[] testDump;
|
|
|
|
|
|
|
|
var restoredWorld = new SmallWorld<float[]>(NSWOptions<float[]>.Create(6, 15, 200, 200, CosineDistance.ForUnits, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple));
|
|
|
|
|
|
|
|
using (var ms = new MemoryStream(dump))
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
restoredWorld.Deserialize(ms);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
using (var ms = new MemoryStream())
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
restoredWorld.Serialize(ms);
|
|
|
|
|
|
|
|
testDump = ms.ToArray();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (testDump.Length != dump.Length)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
Console.WriteLine($"Incorrect restored size. Got {testDump.Length}. Expected: {dump.Length}");
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ReadOnlySmallWorld<float[]> compactWorld;
|
|
|
|
|
|
|
|
using (var ms = new MemoryStream(dump))
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
compactWorld = SmallWorld.CreateReadOnlyWorldFrom<float[]>(NSWReadOnlyOption<float[]>.Create(200, CosineDistance.ForUnits, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple), ms);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
byte[] smallWorldDump;
|
|
|
|
|
|
|
|
using (var ms = new MemoryStream())
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
compactWorld.Serialize(ms);
|
|
|
|
|
|
|
|
smallWorldDump = ms.ToArray();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
var p = smallWorldDump.Length * 100.0f / dump.Length;
|
|
|
|
|
|
|
|
Console.WriteLine($"Compact dump size: {smallWorldDump.Length} bytes. Decrease: {100 - p}%");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void FilterTest()
|
|
|
|
static void FilterTest()
|
|
|
|
{
|
|
|
|
{
|
|
|
|
var count = 1000;
|
|
|
|
var count = 1000;
|
|
|
@ -598,82 +962,6 @@ namespace HNSWDemo
|
|
|
|
Console.WriteLine($"SUCCESS: {hits}");
|
|
|
|
Console.WriteLine($"SUCCESS: {hits}");
|
|
|
|
Console.WriteLine($"ERROR: {miss}");
|
|
|
|
Console.WriteLine($"ERROR: {miss}");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*/
|
|
|
|
static void AccuracityTest()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
int K = 200;
|
|
|
|
|
|
|
|
var count = 2000;
|
|
|
|
|
|
|
|
var testCount = 1000;
|
|
|
|
|
|
|
|
var dimensionality = 128;
|
|
|
|
|
|
|
|
var totalHits = new List<int>();
|
|
|
|
|
|
|
|
var timewatchesNP = new List<float>();
|
|
|
|
|
|
|
|
var timewatchesHNSW = new List<float>();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var samples = RandomVectors(dimensionality, count);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var sw = new Stopwatch();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var test = new VectorsDirectCompare(samples, CosineDistance.NonOptimized);
|
|
|
|
|
|
|
|
var world = new SmallWorld<float[]>(NSWOptions<float[]>.Create(6, 12, 100, 100, CosineDistance.NonOptimized, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sw.Start();
|
|
|
|
|
|
|
|
var ids = world.AddItems(samples.ToArray());
|
|
|
|
|
|
|
|
sw.Stop();
|
|
|
|
|
|
|
|
Console.WriteLine($"Insert {ids.Length} items: {sw.ElapsedMilliseconds} ms");
|
|
|
|
|
|
|
|
Console.WriteLine("Start test");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var test_vectors = RandomVectors(dimensionality, testCount);
|
|
|
|
|
|
|
|
foreach (var v in test_vectors)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
sw.Restart();
|
|
|
|
|
|
|
|
var gt = test.KNearest(v, K).ToDictionary(p => p.Item1, p => p.Item2);
|
|
|
|
|
|
|
|
sw.Stop();
|
|
|
|
|
|
|
|
timewatchesNP.Add(sw.ElapsedMilliseconds);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sw.Restart();
|
|
|
|
|
|
|
|
var result = world.Search(v, K);
|
|
|
|
|
|
|
|
sw.Stop();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
timewatchesHNSW.Add(sw.ElapsedMilliseconds);
|
|
|
|
|
|
|
|
var hits = 0;
|
|
|
|
|
|
|
|
foreach (var r in result)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
if (gt.ContainsKey(r.Item1))
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
hits++;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
totalHits.Add(hits);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
Console.WriteLine($"MIN Accuracity: {totalHits.Min() * 100 / K}%");
|
|
|
|
|
|
|
|
Console.WriteLine($"AVG Accuracity: {totalHits.Average() * 100 / K}%");
|
|
|
|
|
|
|
|
Console.WriteLine($"MAX Accuracity: {totalHits.Max() * 100 / K}%");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Console.WriteLine($"MIN HNSW TIME: {timewatchesHNSW.Min()} ms");
|
|
|
|
|
|
|
|
Console.WriteLine($"AVG HNSW TIME: {timewatchesHNSW.Average()} ms");
|
|
|
|
|
|
|
|
Console.WriteLine($"MAX HNSW TIME: {timewatchesHNSW.Max()} ms");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Console.WriteLine($"MIN NP TIME: {timewatchesNP.Min()} ms");
|
|
|
|
|
|
|
|
Console.WriteLine($"AVG NP TIME: {timewatchesNP.Average()} ms");
|
|
|
|
|
|
|
|
Console.WriteLine($"MAX NP TIME: {timewatchesNP.Max()} ms");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void InsertTimeExplosionTest()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var count = 20000;
|
|
|
|
|
|
|
|
var iterationCount = 100;
|
|
|
|
|
|
|
|
var dimensionality = 128;
|
|
|
|
|
|
|
|
var sw = new Stopwatch();
|
|
|
|
|
|
|
|
var world = new SmallWorld<float[]>(NSWOptions<float[]>.Create(6, 8, 150, 150, Metrics.L2Euclidean, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple));
|
|
|
|
|
|
|
|
for (int i = 0; i < iterationCount; i++)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
var samples = RandomVectors(dimensionality, count);
|
|
|
|
|
|
|
|
sw.Restart();
|
|
|
|
|
|
|
|
var ids = world.AddItems(samples.ToArray());
|
|
|
|
|
|
|
|
sw.Stop();
|
|
|
|
|
|
|
|
Console.WriteLine($"ITERATION: [{i.ToString("D4")}] COUNT: [{ids.Length}] ELAPSED [{sw.ElapsedMilliseconds} ms]");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|