From 2d1e4f9d5b06693228a7a34102eafac43f19f95d Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 25 Dec 2021 23:53:42 +0300 Subject: [PATCH] HNSW MNIST test Fix process find of cutoff for bimodal histogram --- TestHNSW/HNSWDemo/Program.cs | 112 +----- TestHNSW/HNSWDemo/Tests/AccuracityTest.cs | 4 +- .../HNSWDemo/Tests/AutoClusteringMNISTTest.cs | 142 +++++++ TestHNSW/HNSWDemo/Tests/FilterTest.cs | 57 +++ TestHNSW/HNSWDemo/Tests/HistogramTest.cs | 4 +- .../HNSWDemo/Tests/InsertTimeExplosionTest.cs | 2 +- TestHNSW/HNSWDemo/Tests/QuantizatorTest.cs | 4 +- .../HNSWDemo/Tests/QuantizeAccuracityTest.cs | 4 +- .../HNSWDemo/Tests/QuantizeHistogramTest.cs | 8 +- .../Tests/QuantizeInsertTimeExplosionTest.cs | 2 +- .../HNSWDemo/Utils/QLVectorsDirectCompare.cs | 2 +- .../HNSWDemo/Utils/QVectorsDirectCompare.cs | 2 +- .../HNSWDemo/Utils/VectorsDirectCompare.cs | 2 +- ZeroLevel.HNSW/Model/Histogram.cs | 107 ++++- ZeroLevel.HNSW/Model/SearchContext.cs | 18 +- .../Services/AutomaticGraphClusterer.cs | 2 +- ZeroLevel.HNSW/Services/Layer.cs | 71 +++- ZeroLevel.HNSW/Services/LinksSet.cs | 7 +- ZeroLevel.HNSW/SmallWorld.cs | 10 +- ZeroLevel.HNSW/Utils/CosineDistance.cs | 136 +------ ZeroLevel.HNSW/Utils/EuclidDistance.cs | 83 ---- ZeroLevel.HNSW/Utils/Metrics.cs | 367 ++++++++++++++++++ 22 files changed, 776 insertions(+), 370 deletions(-) create mode 100644 TestHNSW/HNSWDemo/Tests/AutoClusteringMNISTTest.cs create mode 100644 TestHNSW/HNSWDemo/Tests/FilterTest.cs delete mode 100644 ZeroLevel.HNSW/Utils/EuclidDistance.cs create mode 100644 ZeroLevel.HNSW/Utils/Metrics.cs diff --git a/TestHNSW/HNSWDemo/Program.cs b/TestHNSW/HNSWDemo/Program.cs index 487917c..39e2d60 100644 --- a/TestHNSW/HNSWDemo/Program.cs +++ b/TestHNSW/HNSWDemo/Program.cs @@ -7,118 +7,10 @@ namespace HNSWDemo { static void Main(string[] args) { - new AutoClusteringTest().Run(); + new AutoClusteringMNISTTest().Run(); + //new HistogramTest().Run(); Console.WriteLine("Completed"); Console.ReadKey(); } - - /* - static void TestOnMnist() - { - int imageCount, rowCount, colCount; - var buf = new byte[4]; - var image = new byte[28 * 28]; - var vectors = new List(); - using (var fs = new FileStream("t10k-images.idx3-ubyte", FileMode.Open, FileAccess.Read, FileShare.None)) - { - // first 4 bytes is a magic number - fs.Read(buf, 0, 4); - // second 4 bytes is the number of images - fs.Read(buf, 0, 4); - imageCount = BitConverter.ToInt32(buf.Reverse().ToArray(), 0); - // third 4 bytes is the row count - fs.Read(buf, 0, 4); - rowCount = BitConverter.ToInt32(buf.Reverse().ToArray(), 0); - // fourth 4 bytes is the column count - fs.Read(buf, 0, 4); - colCount = BitConverter.ToInt32(buf.Reverse().ToArray(), 0); - - for (int i = 0; i < imageCount; i++) - { - fs.Read(image, 0, image.Length); - vectors.Add(image.Select(b => (float)b).ToArray()); - } - } - - //var direct = new VectorsDirectCompare(vectors, Metrics.L2Euclidean); - - var options = NSWOptions.Create(8, 16, 200, 200, Metrics.L2Euclidean, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple); - SmallWorld world; - if (File.Exists("graph.bin")) - { - using (var fs = new FileStream("graph.bin", FileMode.Open, FileAccess.Read, FileShare.None)) - { - world = SmallWorld.CreateWorldFrom(options, fs); - } - } - else - { - world = SmallWorld.CreateWorld(options); - world.AddItems(vectors); - using (var fs = new FileStream("graph.bin", FileMode.Create, FileAccess.Write, FileShare.None)) - { - world.Serialize(fs); - } - } - - var clusters = AutomaticGraphClusterer.DetectClusters(world); - Console.WriteLine($"Found {clusters.Count} clusters"); - for (int i = 0; i < clusters.Count; i++) - { - Console.WriteLine($"Cluster {i + 1} countains {clusters[i].Count} items"); - } - } - - - static void FilterTest() - { - var count = 1000; - var testCount = 100; - var dimensionality = 128; - var samples = Person.GenerateRandom(dimensionality, count); - - var testDict = samples.ToDictionary(s => s.Item2.Number, s => s.Item2); - - var map = new HNSWMap(); - var world = new SmallWorld(NSWOptions.Create(6, 15, 200, 200, CosineDistance.ForUnits, true, true, selectionHeuristic: NeighbourSelectionHeuristic.SelectSimple)); - - var ids = world.AddItems(samples.Select(i => i.Item1).ToArray()); - for (int bi = 0; bi < samples.Count; bi++) - { - map.Append(samples[bi].Item2.Number, ids[bi]); - } - - Console.WriteLine("Start test"); - int K = 200; - var vectors = RandomVectors(dimensionality, testCount); - - var context = new SearchContext() - .SetActiveNodes(map - .ConvertFeaturesToIds(samples - .Where(p => p.Item2.Age > 20 && p.Item2.Age < 50 && p.Item2.Gender == Gender.Feemale) - .Select(p => p.Item2.Number))); - - var hits = 0; - var miss = 0; - foreach (var v in vectors) - { - var numbers = map.ConvertIdsToFeatures(world.Search(v, K, context).Select(r => r.Item1)); - foreach (var r in numbers) - { - var record = testDict[r]; - if (record.Gender == Gender.Feemale && record.Age > 20 && record.Age < 50) - { - hits++; - } - else - { - miss++; - } - } - } - Console.WriteLine($"SUCCESS: {hits}"); - Console.WriteLine($"ERROR: {miss}"); - } - */ } } diff --git a/TestHNSW/HNSWDemo/Tests/AccuracityTest.cs b/TestHNSW/HNSWDemo/Tests/AccuracityTest.cs index fe229c5..75e5795 100644 --- a/TestHNSW/HNSWDemo/Tests/AccuracityTest.cs +++ b/TestHNSW/HNSWDemo/Tests/AccuracityTest.cs @@ -25,8 +25,8 @@ namespace HNSWDemo.Tests var sw = new Stopwatch(); - var test = new VectorsDirectCompare(samples, CosineDistance.NonOptimized); - var world = new SmallWorld(NSWOptions.Create(8, 12, 100, 100, CosineDistance.NonOptimized)); + var test = new VectorsDirectCompare(samples, Metrics.Cosine); + var world = new SmallWorld(NSWOptions.Create(8, 12, 100, 100, Metrics.Cosine)); sw.Start(); var ids = world.AddItems(samples.ToArray()); diff --git a/TestHNSW/HNSWDemo/Tests/AutoClusteringMNISTTest.cs b/TestHNSW/HNSWDemo/Tests/AutoClusteringMNISTTest.cs new file mode 100644 index 0000000..0e26a81 --- /dev/null +++ b/TestHNSW/HNSWDemo/Tests/AutoClusteringMNISTTest.cs @@ -0,0 +1,142 @@ +using System; +using System.Collections.Generic; +using System.Drawing; +using System.Drawing.Imaging; +using System.IO; +using System.Linq; +using System.Runtime.InteropServices; +using ZeroLevel.HNSW; +using ZeroLevel.HNSW.Services; +using ZeroLevel.Services.FileSystem; + +namespace HNSWDemo.Tests +{ + public class AutoClusteringMNISTTest + : ITest + { + private static int Width = 3000; + private static int Height = 3000; + + private static byte[] PadLines(byte[] bytes, int rows, int columns) + { + int currentStride = columns; // 3 + int newStride = columns; // 4 + byte[] newBytes = new byte[newStride * rows]; + for (int i = 0; i < rows; i++) + Buffer.BlockCopy(bytes, currentStride * i, newBytes, newStride * i, currentStride); + return newBytes; + } + + public void Run() + { + var folder = @"D:\Mnist"; + int columns = 28; + int rows = 28; + int imageCount, rowCount, colCount; + var buf = new byte[4]; + var image = new byte[rows * columns]; + var vectors = new List(); + using (var fs = new FileStream("t10k-images.idx3-ubyte", FileMode.Open, FileAccess.Read, FileShare.None)) + { + // first 4 bytes is a magic number + fs.Read(buf, 0, 4); + // second 4 bytes is the number of images + fs.Read(buf, 0, 4); + imageCount = BitConverter.ToInt32(buf.Reverse().ToArray(), 0); + // third 4 bytes is the row count + fs.Read(buf, 0, 4); + rowCount = BitConverter.ToInt32(buf.Reverse().ToArray(), 0); + // fourth 4 bytes is the column count + fs.Read(buf, 0, 4); + colCount = BitConverter.ToInt32(buf.Reverse().ToArray(), 0); + + for (int i = 0; i < imageCount; i++) + { + fs.Read(image, 0, image.Length); + var v = new byte[image.Length]; + Array.Copy(image, v, image.Length); + vectors.Add(v); + } + } + var options = NSWOptions.Create(8, 16, 200, 200, Metrics.L2Euclidean); + SmallWorld world; + if (File.Exists("graph_mnist.bin")) + { + using (var fs = new FileStream("graph_mnist.bin", FileMode.Open, FileAccess.Read, FileShare.None)) + { + world = SmallWorld.CreateWorldFrom(options, fs); + } + } + else + { + world = SmallWorld.CreateWorld(options); + world.AddItems(vectors); + using (var fs = new FileStream("graph_mnist.bin", FileMode.Create, FileAccess.Write, FileShare.None)) + { + world.Serialize(fs); + } + } + + var distance = new Func((id1, id2) => Metrics.L2Euclidean(world.GetVector(id1), world.GetVector(id2))); + var links = world.GetLinks().SelectMany(pair => pair.Value.Select(p=> distance(pair.Key, p))).ToList(); + var exists = links.Where(n => n > 0).ToArray(); + + var histogram = new Histogram(HistogramMode.SQRT, links); + DrawHistogram(histogram, @"D:\Mnist\histogram.jpg"); + + var clusters = AutomaticGraphClusterer.DetectClusters(world); + Console.WriteLine($"Found {clusters.Count} clusters"); + + + for (int i = 0; i < clusters.Count; i++) + { + var ouput = Path.Combine(folder, i.ToString("D3")); + FSUtils.CleanAndTestFolder(ouput); + foreach (var v in clusters[i]) + { + int stride = columns; + byte[] newbytes = PadLines(world.GetVector(v), rows, columns); + using (var im = new Bitmap(columns, rows, stride, PixelFormat.Format8bppIndexed, Marshal.UnsafeAddrOfPinnedArrayElement(newbytes, 0))) + { + im.Save(Path.Combine(ouput, $"{v}.bmp")); + } + } + Console.WriteLine($"Cluster {i + 1} countains {clusters[i].Count} items"); + } + } + + static void DrawHistogram(Histogram histogram, string filename) + { + var wb = Width / histogram.Values.Length; + var k = ((float)Height) / (float)histogram.Values.Max(); + + var maxes = histogram.GetMaximums().ToDictionary(m => m.Index, m => m); + int threshold = histogram.CuttOff(); + + using (var bmp = new Bitmap(Width, Height)) + { + using (var g = Graphics.FromImage(bmp)) + { + for (int i = 0; i < histogram.Values.Length; i++) + { + var height = (int)(histogram.Values[i] * k); + if (maxes.ContainsKey(i)) + { + g.DrawRectangle(Pens.Red, i * wb, bmp.Height - height, wb, height); + g.DrawRectangle(Pens.Red, i * wb + 1, bmp.Height - height, wb - 1, height); + } + else + { + g.DrawRectangle(Pens.Blue, i * wb, bmp.Height - height, wb, height); + } + if (i == threshold) + { + g.DrawLine(Pens.Green, i * wb + wb / 2, 0, i * wb + wb / 2, bmp.Height); + } + } + } + bmp.Save(filename); + } + } + } +} diff --git a/TestHNSW/HNSWDemo/Tests/FilterTest.cs b/TestHNSW/HNSWDemo/Tests/FilterTest.cs new file mode 100644 index 0000000..d69cd8f --- /dev/null +++ b/TestHNSW/HNSWDemo/Tests/FilterTest.cs @@ -0,0 +1,57 @@ +using HNSWDemo.Model; +using System; +using System.Linq; +using ZeroLevel.HNSW; + +namespace HNSWDemo.Tests +{ + public class FilterTest + : ITest + { + private const int count = 3000; + private const int testCount = 100; + private const int dimensionality = 128; + + public void Run() + { + var map = new HNSWMap(); + var samples = Person.GenerateRandom(dimensionality, count); + var testDict = samples.ToDictionary(s => s.Item2.Number, s => s.Item2); + var world = new SmallWorld(NSWOptions.Create(6, 15, 200, 200, CosineDistance.ForUnits)); + var ids = world.AddItems(samples.Select(i => i.Item1).ToArray()); + for (int bi = 0; bi < samples.Count; bi++) + { + map.Append(samples[bi].Item2.Number, ids[bi]); + } + Console.WriteLine("Start test"); + int K = 200; + var vectors = VectorUtils.RandomVectors(dimensionality, testCount); + + var context = new SearchContext() + .SetActiveNodes(map + .ConvertFeaturesToIds(samples + .Where(p => p.Item2.Age > 20 && p.Item2.Age < 50 && p.Item2.Gender == Gender.Feemale) + .Select(p => p.Item2.Number))); + var hits = 0; + var miss = 0; + foreach (var v in vectors) + { + var numbers = map.ConvertIdsToFeatures(world.Search(v, K, context).Select(r => r.Item1)); + foreach (var r in numbers) + { + var record = testDict[r]; + if (context.NodeCheckMode == Mode.None || (record.Gender == Gender.Feemale && record.Age > 20 && record.Age < 50)) + { + hits++; + } + else + { + miss++; + } + } + } + Console.WriteLine($"SUCCESS: {hits}"); + Console.WriteLine($"ERROR: {miss}"); + } + } +} diff --git a/TestHNSW/HNSWDemo/Tests/HistogramTest.cs b/TestHNSW/HNSWDemo/Tests/HistogramTest.cs index 1b804c3..9ee0754 100644 --- a/TestHNSW/HNSWDemo/Tests/HistogramTest.cs +++ b/TestHNSW/HNSWDemo/Tests/HistogramTest.cs @@ -24,7 +24,7 @@ namespace HNSWDemo.Tests var histogram = new Histogram(HistogramMode.SQRT, weights); histogram.Smooth(); - int threshold = histogram.OTSU(); + int threshold = histogram.CuttOff(); var min = histogram.Bounds[threshold - 1]; var max = histogram.Bounds[threshold]; var R = (max + min) / 2; @@ -38,7 +38,7 @@ namespace HNSWDemo.Tests var k = ((float)Height) / (float)histogram.Values.Max(); var maxes = histogram.GetMaximums().ToDictionary(m => m.Index, m => m); - int threshold = histogram.OTSU(); + int threshold = histogram.CuttOff(); using (var bmp = new Bitmap(Width, Height)) { diff --git a/TestHNSW/HNSWDemo/Tests/InsertTimeExplosionTest.cs b/TestHNSW/HNSWDemo/Tests/InsertTimeExplosionTest.cs index 2641b36..f451540 100644 --- a/TestHNSW/HNSWDemo/Tests/InsertTimeExplosionTest.cs +++ b/TestHNSW/HNSWDemo/Tests/InsertTimeExplosionTest.cs @@ -14,7 +14,7 @@ namespace HNSWDemo.Tests public void Run() { var sw = new Stopwatch(); - var world = new SmallWorld(NSWOptions.Create(6, 12, 100, 100, CosineDistance.NonOptimized)); + var world = new SmallWorld(NSWOptions.Create(6, 12, 100, 100, Metrics.Cosine)); for (int i = 0; i < IterationCount; i++) { var samples = VectorUtils.RandomVectors(Dimensionality, Count); diff --git a/TestHNSW/HNSWDemo/Tests/QuantizatorTest.cs b/TestHNSW/HNSWDemo/Tests/QuantizatorTest.cs index a6dee3d..b8575be 100644 --- a/TestHNSW/HNSWDemo/Tests/QuantizatorTest.cs +++ b/TestHNSW/HNSWDemo/Tests/QuantizatorTest.cs @@ -26,11 +26,11 @@ namespace HNSWDemo.Tests { var v1 = samples[i]; var v2 = samples[i + 1]; - var dist = CosineDistance.NonOptimized(v1, v2); + var dist = Metrics.Cosine(v1, v2); var qv1 = q_samples[i]; var qv2 = q_samples[i + 1]; - var qdist = CosineDistance.NonOptimized(qv1, qv2); + var qdist = Metrics.Cosine(qv1, qv2); list.Add(Math.Abs(dist - qdist)); } diff --git a/TestHNSW/HNSWDemo/Tests/QuantizeAccuracityTest.cs b/TestHNSW/HNSWDemo/Tests/QuantizeAccuracityTest.cs index 6e3bcc3..324a92d 100644 --- a/TestHNSW/HNSWDemo/Tests/QuantizeAccuracityTest.cs +++ b/TestHNSW/HNSWDemo/Tests/QuantizeAccuracityTest.cs @@ -28,8 +28,8 @@ namespace HNSWDemo.Tests var sw = new Stopwatch(); - var test = new VectorsDirectCompare(s, CosineDistance.NonOptimized); - var world = new SmallWorld(NSWOptions.Create(6, 8, 100, 100, CosineDistance.NonOptimized)); + var test = new VectorsDirectCompare(s, Metrics.Cosine); + var world = new SmallWorld(NSWOptions.Create(6, 8, 100, 100, Metrics.Cosine)); sw.Start(); var ids = world.AddItems(samples.ToArray()); diff --git a/TestHNSW/HNSWDemo/Tests/QuantizeHistogramTest.cs b/TestHNSW/HNSWDemo/Tests/QuantizeHistogramTest.cs index 4e81ee2..f71a33c 100644 --- a/TestHNSW/HNSWDemo/Tests/QuantizeHistogramTest.cs +++ b/TestHNSW/HNSWDemo/Tests/QuantizeHistogramTest.cs @@ -18,15 +18,15 @@ namespace HNSWDemo.Tests { var vectors = VectorUtils.RandomVectors(Dimensionality, Count); var q = new Quantizator(-1f, 1f); - var world = SmallWorld.CreateWorld(NSWOptions.Create(8, 16, 200, 200, CosineDistance.NonOptimized)); + var world = SmallWorld.CreateWorld(NSWOptions.Create(8, 16, 200, 200, Metrics.Cosine)); world.AddItems(vectors.Select(v => q.QuantizeToLong(v)).ToList()); - var distance = new Func((id1, id2) => CosineDistance.NonOptimized(world.GetVector(id1), world.GetVector(id2))); + var distance = new Func((id1, id2) => Metrics.Cosine(world.GetVector(id1), world.GetVector(id2))); var weights = world.GetLinks().SelectMany(pair => pair.Value.Select(id => distance(pair.Key, id))); var histogram = new Histogram(HistogramMode.SQRT, weights); histogram.Smooth(); - int threshold = histogram.OTSU(); + int threshold = histogram.CuttOff(); var min = histogram.Bounds[threshold - 1]; var max = histogram.Bounds[threshold]; var R = (max + min) / 2; @@ -40,7 +40,7 @@ namespace HNSWDemo.Tests var k = ((float)Height) / (float)histogram.Values.Max(); var maxes = histogram.GetMaximums().ToDictionary(m => m.Index, m => m); - int threshold = histogram.OTSU(); + int threshold = histogram.CuttOff(); using (var bmp = new Bitmap(Width, Height)) { diff --git a/TestHNSW/HNSWDemo/Tests/QuantizeInsertTimeExplosionTest.cs b/TestHNSW/HNSWDemo/Tests/QuantizeInsertTimeExplosionTest.cs index 70fe553..b42ddde 100644 --- a/TestHNSW/HNSWDemo/Tests/QuantizeInsertTimeExplosionTest.cs +++ b/TestHNSW/HNSWDemo/Tests/QuantizeInsertTimeExplosionTest.cs @@ -16,7 +16,7 @@ namespace HNSWDemo.Tests public void Run() { var sw = new Stopwatch(); - var world = new SmallWorld(NSWOptions.Create(6, 12, 100, 100, CosineDistance.NonOptimized)); + var world = new SmallWorld(NSWOptions.Create(6, 12, 100, 100, Metrics.Cosine)); var q = new Quantizator(-1f, 1f); for (int i = 0; i < IterationCount; i++) { diff --git a/TestHNSW/HNSWDemo/Utils/QLVectorsDirectCompare.cs b/TestHNSW/HNSWDemo/Utils/QLVectorsDirectCompare.cs index 196118a..10182b5 100644 --- a/TestHNSW/HNSWDemo/Utils/QLVectorsDirectCompare.cs +++ b/TestHNSW/HNSWDemo/Utils/QLVectorsDirectCompare.cs @@ -42,7 +42,7 @@ namespace HNSWDemo.Utils // 1. Find R - bound between intra-cluster distances and out-of-cluster distances var histogram = new Histogram(HistogramMode.SQRT, links.Values); - int threshold = histogram.OTSU(); + int threshold = histogram.CuttOff(); var min = histogram.Bounds[threshold - 1]; var max = histogram.Bounds[threshold]; var R = (max + min) / 2; diff --git a/TestHNSW/HNSWDemo/Utils/QVectorsDirectCompare.cs b/TestHNSW/HNSWDemo/Utils/QVectorsDirectCompare.cs index 92c88d7..1649ea9 100644 --- a/TestHNSW/HNSWDemo/Utils/QVectorsDirectCompare.cs +++ b/TestHNSW/HNSWDemo/Utils/QVectorsDirectCompare.cs @@ -42,7 +42,7 @@ namespace HNSWDemo.Utils // 1. Find R - bound between intra-cluster distances and out-of-cluster distances var histogram = new Histogram(HistogramMode.SQRT, links.Values); - int threshold = histogram.OTSU(); + int threshold = histogram.CuttOff(); var min = histogram.Bounds[threshold - 1]; var max = histogram.Bounds[threshold]; var R = (max + min) / 2; diff --git a/TestHNSW/HNSWDemo/Utils/VectorsDirectCompare.cs b/TestHNSW/HNSWDemo/Utils/VectorsDirectCompare.cs index a000c45..4d50c3f 100644 --- a/TestHNSW/HNSWDemo/Utils/VectorsDirectCompare.cs +++ b/TestHNSW/HNSWDemo/Utils/VectorsDirectCompare.cs @@ -42,7 +42,7 @@ namespace HNSWDemo.Utils // 1. Find R - bound between intra-cluster distances and out-of-cluster distances var histogram = new Histogram(HistogramMode.SQRT, links.Values); - int threshold = histogram.OTSU(); + int threshold = histogram.CuttOff(); var min = histogram.Bounds[threshold - 1]; var max = histogram.Bounds[threshold]; var R = (max + min) / 2; diff --git a/ZeroLevel.HNSW/Model/Histogram.cs b/ZeroLevel.HNSW/Model/Histogram.cs index 4ed35cc..20c1e55 100644 --- a/ZeroLevel.HNSW/Model/Histogram.cs +++ b/ZeroLevel.HNSW/Model/Histogram.cs @@ -150,7 +150,7 @@ namespace ZeroLevel.HNSW return (float)sum; } - + /* public int OTSU() { float p1, p2, p12; @@ -172,15 +172,108 @@ namespace ZeroLevel.HNSW threshold = k; } } - /* - var local_max = Values[threshold]; - for (int i = threshold + 1; i < Values.Length; i++) + return threshold; + } + */ + /* +1. Градиент V[I] - V[i-1] +2. Походы окнами от 1 и выше, пока не сойдется к бимодальности +3. Найти cutoff как минимум между пиками + +Modes = 0 +W = 1 +D = [V.count1] +Maxes = [] +For I in [1..V.count] + D= V[I] - V[i-1] +do + +Modes = 0 +S = +1 +do + for wnd in D + if wnd.sum > 0 & S < 0 + S = +1 + Elif wnd.sum < 0 & S > 0 + Maxes.push(wnd.maxindex) + Modes ++ + S = -1 +W++ +while Modes > 2 +If Modes == 2 +Cutoff = Maxes[0] +Min = V[I] +For I=Maxes[0] to Maxes[1] + if V[I] < Min + Min = V[I] + Cutoff = i + */ + + public int CuttOff() + { + var grad = new int[Values.Length]; + grad[0] = 0; + grad[1] = 0; + for (int k = 2; k < Values.Length; k++) { - + grad[k - 1] = Values[k] - Values[k - 1]; } - */ - return threshold; + var modes = 0; + var window = 0; + var sign = 1; + var sum = 0; + var max = 0; + var maxInd = 0; + var maxes = new List(); + do + { + maxes.Clear(); + window++; + modes = 0; + sum = 0; + for (int i = 0; i < grad.Length; i += window) + { + sum = grad[i]; + max = Values[i]; + maxInd = i; + for (var w = 1; w < window && (i + w) < grad.Length; w++) + { + sum += grad[i + w]; + if (Values[i + w] > max) + { + max = Values[i + w]; + maxInd = i + w; + } + } + if (sum > 0 && sign < 0) + { + sign = 1; + } + else if (sum < 0 && sign > 0) + { + modes++; + maxes.Add(maxInd); + sign = -1; + } + } + } while (modes > 2); + if (modes == 2) + { + var cutoff = maxes[0]; + var min = Values[cutoff]; + for (int i = maxes[0] + 1; i < maxes[1]; i++) + { + if (Values[i] < min) + { + cutoff = i; + min = Values[i]; + } + } + return cutoff; + } + return -1; } + #endregion static bool NumbersHasSameSign(int left, int right) diff --git a/ZeroLevel.HNSW/Model/SearchContext.cs b/ZeroLevel.HNSW/Model/SearchContext.cs index 12b336f..5a0bf4b 100644 --- a/ZeroLevel.HNSW/Model/SearchContext.cs +++ b/ZeroLevel.HNSW/Model/SearchContext.cs @@ -5,20 +5,22 @@ using System.Runtime.CompilerServices; namespace ZeroLevel.HNSW { - public sealed class SearchContext + public enum Mode { - enum Mode - { - None, - ActiveCheck, - InactiveCheck, - ActiveInactiveCheck - } + None, + ActiveCheck, + InactiveCheck, + ActiveInactiveCheck + } + public sealed class SearchContext + { private HashSet _activeNodes; private HashSet _entryNodes; private Mode _mode; + public Mode NodeCheckMode => _mode; + public SearchContext() { _mode = Mode.None; diff --git a/ZeroLevel.HNSW/Services/AutomaticGraphClusterer.cs b/ZeroLevel.HNSW/Services/AutomaticGraphClusterer.cs index b78fe02..9994134 100644 --- a/ZeroLevel.HNSW/Services/AutomaticGraphClusterer.cs +++ b/ZeroLevel.HNSW/Services/AutomaticGraphClusterer.cs @@ -22,7 +22,7 @@ namespace ZeroLevel.HNSW.Services // 1. Find R - bound between intra-cluster distances and out-of-cluster distances var histogram = new Histogram(HistogramMode.SQRT, links.Select(l => l.Distance)); - int threshold = histogram.OTSU(); + int threshold = histogram.CuttOff(); var min = histogram.Bounds[threshold - 1]; var max = histogram.Bounds[threshold]; var R = (max + min) / 2; diff --git a/ZeroLevel.HNSW/Services/Layer.cs b/ZeroLevel.HNSW/Services/Layer.cs index 3966202..16367cc 100644 --- a/ZeroLevel.HNSW/Services/Layer.cs +++ b/ZeroLevel.HNSW/Services/Layer.cs @@ -256,14 +256,80 @@ namespace ZeroLevel.HNSW return W; } + internal IEnumerable<(int, float)> KNearestAtLayer(int entryPointId, Func targetCosts, int ef, SearchContext context) + { + int farthestId; + float farthestDistance; + var d = targetCosts(entryPointId); + + var v = new VisitedBitSet(_vectors.Count, _options.M); + // * v ← ep // set of visited elements + v.Add(entryPointId); + // * C ← ep // set of candidates + var C = new MinHeap(ef); + C.Push((entryPointId, d)); + // * W ← ep // dynamic list of found nearest neighbors + var W = new MaxHeap(ef + 1); + if (context.IsActiveNode(entryPointId)) + { + W.Push((entryPointId, d)); + } + + // * while │C│ > 0 + while (C.Count > 0) + { + // * c ← extract nearest element from C to q + var c = C.Pop(); + // * f ← get furthest element from W to q + // * if distance(c, q) > distance(f, q) + if (W.TryPeek(out _, out farthestDistance) && c.Item2 > farthestDistance) + { + // * break // all elements in W are evaluated + break; + } + + // * for each e ∈ neighbourhood(c) at layer lc // update C and W + foreach (var e in GetNeighbors(c.Item1)) + { + // * if e ∉ v + if (!v.Contains(e)) + { + // * v ← v ⋃ e + v.Add(e); + // * f ← get furthest element from W to q + W.TryPeek(out farthestId, out farthestDistance); + + var eDistance = targetCosts(e); + // * if distance(e, q) < distance(f, q) or │W│ < ef + if (W.Count < ef || (farthestId >= 0 && eDistance < farthestDistance)) + { + // * C ← C ⋃ e + C.Push((e, eDistance)); + // * W ← W ⋃ e + if (context.IsActiveNode(e)) + { + W.Push((e, eDistance)); + if (W.Count > ef) + { + W.Pop(); + } + } + } + } + } + } + C.Clear(); + v.Clear(); + return W; + } + /// /// Algorithm 2 /// /// query element /// enter points ep /// Output: ef closest neighbors to q - /* - internal IEnumerable<(int, float)> KNearestAtLayer(int entryPointId, Func targetCosts, int ef, SearchContext context) + internal IEnumerable<(int, float)> KNearestAвtLayer(int entryPointId, Func targetCosts, int ef, SearchContext context) { int farthestId; float farthestDistance; @@ -326,7 +392,6 @@ namespace ZeroLevel.HNSW v.Clear(); return W; } - */ /// /// Algorithm 2, modified for LookAlike diff --git a/ZeroLevel.HNSW/Services/LinksSet.cs b/ZeroLevel.HNSW/Services/LinksSet.cs index c3631ff..8870d1e 100644 --- a/ZeroLevel.HNSW/Services/LinksSet.cs +++ b/ZeroLevel.HNSW/Services/LinksSet.cs @@ -75,7 +75,8 @@ namespace ZeroLevel.HNSW public void Serialize(IBinaryWriter writer) { writer.WriteBoolean(false); // true - set with weights - writer.WriteInt32(_set.Sum(pair => pair.Value.Count)); + var count = _set.Sum(pair => pair.Value.Count); + writer.WriteInt32(count); foreach (var record in _set) { var id = record.Key; @@ -89,9 +90,9 @@ namespace ZeroLevel.HNSW public void Deserialize(IBinaryReader reader) { - if (reader.ReadBoolean() == false) + if (reader.ReadBoolean() != false) { - throw new InvalidOperationException("Incompatible data format. The set does not contain weights."); + throw new InvalidOperationException("Incompatible format"); } _set.Clear(); _set = null; diff --git a/ZeroLevel.HNSW/SmallWorld.cs b/ZeroLevel.HNSW/SmallWorld.cs index 2da0dd6..4749559 100644 --- a/ZeroLevel.HNSW/SmallWorld.cs +++ b/ZeroLevel.HNSW/SmallWorld.cs @@ -40,6 +40,8 @@ namespace ZeroLevel.HNSW public SmallWorld(NSWOptions options, Stream stream) { _options = options; + _layerLevelGenerator = new ProbabilityLayerNumberGenerator(_options.LayersCount, _options.M); + DistanceFunction = new Func((id1, id2) => _options.Distance(_vectors[id1], _vectors[id2])); Deserialize(stream); } @@ -57,7 +59,7 @@ namespace ZeroLevel.HNSW yield return (pair.Item1, _vectors[pair.Item1], pair.Item2); } } - /* + public IEnumerable<(int, TItem, float)> Search(TItem vector, int k, SearchContext context) { if (context == null) @@ -76,6 +78,7 @@ namespace ZeroLevel.HNSW } } + /* public IEnumerable<(int, TItem, float)> Search(int k, SearchContext context) { if (context == null) @@ -261,7 +264,7 @@ namespace ZeroLevel.HNSW _lockGraph.ExitReadLock(); } } - /* + private IEnumerable<(int, float)> KNearest(TItem q, int k, SearchContext context) { _lockGraph.EnterReadLock(); @@ -271,6 +274,7 @@ namespace ZeroLevel.HNSW { return Enumerable.Empty<(int, float)>(); } + int id; float value; var distance = new Func(candidate => _options.Distance(q, _vectors[candidate])); @@ -309,7 +313,7 @@ namespace ZeroLevel.HNSW _lockGraph.ExitReadLock(); } } - */ + /* private IEnumerable<(int, float)> KNearest(int k, SearchContext context) diff --git a/ZeroLevel.HNSW/Utils/CosineDistance.cs b/ZeroLevel.HNSW/Utils/CosineDistance.cs index 3b7317c..a94d44e 100644 --- a/ZeroLevel.HNSW/Utils/CosineDistance.cs +++ b/ZeroLevel.HNSW/Utils/CosineDistance.cs @@ -27,141 +27,7 @@ namespace ZeroLevel.HNSW /// Left vector. /// Right vector. /// Cosine distance between u and v. - public static float NonOptimized(float[] u, float[] v) - { - if (u.Length != v.Length) - { - throw new ArgumentException("Vectors have non-matching dimensions"); - } - - float dot = 0.0f; - float nru = 0.0f; - float nrv = 0.0f; - for (int i = 0; i < u.Length; ++i) - { - dot += u[i] * v[i]; - nru += u[i] * u[i]; - nrv += v[i] * v[i]; - } - - var similarity = dot / (float)(Math.Sqrt(nru) * Math.Sqrt(nrv)); - return 1 - similarity; - } - - public static float NonOptimized(byte[] u, byte[] v) - { - if (u.Length != v.Length) - { - throw new ArgumentException("Vectors have non-matching dimensions"); - } - - float dot = 0.0f; - float nru = 0.0f; - float nrv = 0.0f; - for (int i = 0; i < u.Length; ++i) - { - dot += (float)(u[i] * v[i]); - nru += (float)(u[i] * u[i]); - nrv += (float)(v[i] * v[i]); - } - - var similarity = dot / (float)(Math.Sqrt(nru) * Math.Sqrt(nrv)); - return 1 - similarity; - } - - public static float NonOptimized(int[] u, int[] v) - { - if (u.Length != v.Length) - { - throw new ArgumentException("Vectors have non-matching dimensions"); - } - - float dot = 0.0f; - float nru = 0.0f; - float nrv = 0.0f; - byte[] bu; - byte[] bv; - - for (int i = 0; i < u.Length; ++i) - { - bu = BitConverter.GetBytes(u[i]); - bv = BitConverter.GetBytes(v[i]); - - dot += (float)(bu[0] * bv[0]); - nru += (float)(bu[0] * bu[0]); - nrv += (float)(bv[0] * bv[0]); - - dot += (float)(bu[1] * bv[1]); - nru += (float)(bu[1] * bu[1]); - nrv += (float)(bv[1] * bv[1]); - - dot += (float)(bu[2] * bv[2]); - nru += (float)(bu[2] * bu[2]); - nrv += (float)(bv[2] * bv[2]); - - dot += (float)(bu[3] * bv[3]); - nru += (float)(bu[3] * bu[3]); - nrv += (float)(bv[3] * bv[3]); - } - - var similarity = dot / (float)(Math.Sqrt(nru) * Math.Sqrt(nrv)); - return 1 - similarity; - } - - public static float NonOptimized(long[] u, long[] v) - { - if (u.Length != v.Length) - { - throw new ArgumentException("Vectors have non-matching dimensions"); - } - - float dot = 0.0f; - float nru = 0.0f; - float nrv = 0.0f; - byte[] bu; - byte[] bv; - - for (int i = 0; i < u.Length; ++i) - { - bu = BitConverter.GetBytes(u[i]); - bv = BitConverter.GetBytes(v[i]); - - dot += (float)(bu[0] * bv[0]); - nru += (float)(bu[0] * bu[0]); - nrv += (float)(bv[0] * bv[0]); - - dot += (float)(bu[1] * bv[1]); - nru += (float)(bu[1] * bu[1]); - nrv += (float)(bv[1] * bv[1]); - - dot += (float)(bu[2] * bv[2]); - nru += (float)(bu[2] * bu[2]); - nrv += (float)(bv[2] * bv[2]); - - dot += (float)(bu[3] * bv[3]); - nru += (float)(bu[3] * bu[3]); - nrv += (float)(bv[3] * bv[3]); - - dot += (float)(bu[4] * bv[4]); - nru += (float)(bu[4] * bu[4]); - nrv += (float)(bv[4] * bv[4]); - - dot += (float)(bu[5] * bv[5]); - nru += (float)(bu[5] * bu[5]); - nrv += (float)(bv[5] * bv[5]); - - dot += (float)(bu[6] * bv[6]); - nru += (float)(bu[6] * bu[6]); - nrv += (float)(bv[6] * bv[6]); - - dot += (float)(bu[7] * bv[7]); - nru += (float)(bu[7] * bu[7]); - nrv += (float)(bv[7] * bv[7]); - } - - var similarity = dot / (float)(Math.Sqrt(nru) * Math.Sqrt(nrv)); - return 1 - similarity; - } + /// /// Calculates cosine distance with assumption that u and v are unit vectors. diff --git a/ZeroLevel.HNSW/Utils/EuclidDistance.cs b/ZeroLevel.HNSW/Utils/EuclidDistance.cs deleted file mode 100644 index fe79ee3..0000000 --- a/ZeroLevel.HNSW/Utils/EuclidDistance.cs +++ /dev/null @@ -1,83 +0,0 @@ -using System; - -namespace ZeroLevel.HNSW -{ - public static class Metrics - { - /// - /// The taxicab metric is also known as rectilinear distance, - /// L1 distance or L1 norm, city block distance, Manhattan distance, - /// or Manhattan length, with the corresponding variations in the name of the geometry. - /// It represents the distance between points in a city road grid. - /// It examines the absolute differences between the coordinates of a pair of objects. - /// - public static float L1Manhattan(float[] v1, float[] v2) - { - float res = 0; - for (int i = 0; i < v1.Length; i++) - { - float t = v1[i] - v2[i]; - res += t * t; - } - return (res); - } - - /// - /// Euclidean distance is the most common use of distance. - /// Euclidean distance, or simply 'distance', - /// examines the root of square differences between the coordinates of a pair of objects. - /// This is most generally known as the Pythagorean theorem. - /// - public static float L2Euclidean(float[] v1, float[] v2) - { - float res = 0; - for (int i = 0; i < v1.Length; i++) - { - float t = v1[i] - v2[i]; - res += t * t; - } - return (float)Math.Sqrt(res); - } - - /// - /// The general metric for distance is the Minkowski distance. - /// When lambda is equal to 1, it becomes the city block distance (L1), - /// and when lambda is equal to 2, it becomes the Euclidean distance (L2). - /// The special case is when lambda is equal to infinity (taking a limit), - /// where it is considered as the Chebyshev distance. - /// - public static float MinkowskiDistance(float[] v1, float[] v2, int order) - { - int count = v1.Length; - double sum = 0.0; - for (int i = 0; i < count; i++) - { - sum = sum + Math.Pow(Math.Abs(v1[i] - v2[i]), order); - } - return (float)Math.Pow(sum, (1 / order)); - } - - /// - /// Chebyshev distance is also called the Maximum value distance, - /// defined on a vector space where the distance between two vectors is - /// the greatest of their differences along any coordinate dimension. - /// In other words, it examines the absolute magnitude of the differences - /// between the coordinates of a pair of objects. - /// - public static double ChebyshevDistance(float[] v1, float[] v2) - { - int count = v1.Length; - float max = float.MinValue; - float c; - for (int i = 0; i < count; i++) - { - c = Math.Abs(v1[i] - v2[i]); - if (c > max) - { - max = c; - } - } - return max; - } - } -} diff --git a/ZeroLevel.HNSW/Utils/Metrics.cs b/ZeroLevel.HNSW/Utils/Metrics.cs new file mode 100644 index 0000000..94d895b --- /dev/null +++ b/ZeroLevel.HNSW/Utils/Metrics.cs @@ -0,0 +1,367 @@ +using System; +using System.Linq; + +namespace ZeroLevel.HNSW +{ + public static class Metrics + { + /// + /// The taxicab metric is also known as rectilinear distance, + /// L1 distance or L1 norm, city block distance, Manhattan distance, + /// or Manhattan length, with the corresponding variations in the name of the geometry. + /// It represents the distance between points in a city road grid. + /// It examines the absolute differences between the coordinates of a pair of objects. + /// + public static float L1Manhattan(float[] v1, float[] v2) + { + float res = 0; + for (int i = 0; i < v1.Length; i++) + { + float t = v1[i] - v2[i]; + res += t * t; + } + return (res); + } + + public static float L1Manhattan(byte[] v1, byte[] v2) + { + float res = 0; + for (int i = 0; i < v1.Length; i++) + { + float t = v1[i] - v2[i]; + res += t * t; + } + return (res); + } + + public static float L1Manhattan(int[] v1, int[] v2) + { + float res = 0; + for (int i = 0; i < v1.Length; i++) + { + float t = v1[i] - v2[i]; + res += t * t; + } + return (res); + } + + public static float L1Manhattan(long[] v1, long[] v2) + { + float res = 0; + for (int i = 0; i < v1.Length; i++) + { + float t = v1[i] - v2[i]; + res += t * t; + } + return (res); + } + + /// + /// Euclidean distance is the most common use of distance. + /// Euclidean distance, or simply 'distance', + /// examines the root of square differences between the coordinates of a pair of objects. + /// This is most generally known as the Pythagorean theorem. + /// + public static float L2Euclidean(float[] v1, float[] v2) + { + float res = 0; + for (int i = 0; i < v1.Length; i++) + { + float t = v1[i] - v2[i]; + res += t * t; + } + return (float)Math.Sqrt(res); + } + + public static float L2Euclidean(byte[] v1, byte[] v2) + { + float res = 0; + for (int i = 0; i < v1.Length; i++) + { + float t = v1[i] - v2[i]; + res += t * t; + } + return (float)Math.Sqrt(res); + } + + public static float L2Euclidean(int[] v1, int[] v2) + { + float res = 0; + for (int i = 0; i < v1.Length; i++) + { + float t = v1[i] - v2[i]; + res += t * t; + } + return (float)Math.Sqrt(res); + } + + public static float L2Euclidean(long[] v1, long[] v2) + { + float res = 0; + for (int i = 0; i < v1.Length; i++) + { + float t = v1[i] - v2[i]; + res += t * t; + } + return (float)Math.Sqrt(res); + } + + /// + /// The general metric for distance is the Minkowski distance. + /// When lambda is equal to 1, it becomes the city block distance (L1), + /// and when lambda is equal to 2, it becomes the Euclidean distance (L2). + /// The special case is when lambda is equal to infinity (taking a limit), + /// where it is considered as the Chebyshev distance. + /// + public static float MinkowskiDistance(float[] v1, float[] v2, int order) + { + int count = v1.Length; + double sum = 0.0; + for (int i = 0; i < count; i++) + { + sum = sum + Math.Pow(Math.Abs(v1[i] - v2[i]), order); + } + return (float)Math.Pow(sum, (1 / order)); + } + + public static float MinkowskiDistance(byte[] v1, byte[] v2, int order) + { + int count = v1.Length; + double sum = 0.0; + for (int i = 0; i < count; i++) + { + sum = sum + Math.Pow(Math.Abs(v1[i] - v2[i]), order); + } + return (float)Math.Pow(sum, (1 / order)); + } + + public static float MinkowskiDistance(int[] v1, int[] v2, int order) + { + int count = v1.Length; + double sum = 0.0; + for (int i = 0; i < count; i++) + { + sum = sum + Math.Pow(Math.Abs(v1[i] - v2[i]), order); + } + return (float)Math.Pow(sum, (1 / order)); + } + + public static float MinkowskiDistance(long[] v1, long[] v2, int order) + { + int count = v1.Length; + double sum = 0.0; + for (int i = 0; i < count; i++) + { + sum = sum + Math.Pow(Math.Abs(v1[i] - v2[i]), order); + } + return (float)Math.Pow(sum, (1 / order)); + } + + /// + /// Chebyshev distance is also called the Maximum value distance, + /// defined on a vector space where the distance between two vectors is + /// the greatest of their differences along any coordinate dimension. + /// In other words, it examines the absolute magnitude of the differences + /// between the coordinates of a pair of objects. + /// + public static double ChebyshevDistance(float[] v1, float[] v2) + { + int count = v1.Length; + float max = float.MinValue; + float c; + for (int i = 0; i < count; i++) + { + c = Math.Abs(v1[i] - v2[i]); + if (c > max) + { + max = c; + } + } + return max; + } + + public static double ChebyshevDistance(byte[] v1, byte[] v2) + { + int count = v1.Length; + float max = float.MinValue; + float c; + for (int i = 0; i < count; i++) + { + c = Math.Abs(v1[i] - v2[i]); + if (c > max) + { + max = c; + } + } + return max; + } + + public static double ChebyshevDistance(int[] v1, int[] v2) + { + int count = v1.Length; + float max = float.MinValue; + float c; + for (int i = 0; i < count; i++) + { + c = Math.Abs(v1[i] - v2[i]); + if (c > max) + { + max = c; + } + } + return max; + } + + public static double ChebyshevDistance(long[] v1, long[] v2) + { + int count = v1.Length; + float max = float.MinValue; + float c; + for (int i = 0; i < count; i++) + { + c = Math.Abs(v1[i] - v2[i]); + if (c > max) + { + max = c; + } + } + return max; + } + + public static float Cosine(float[] u, float[] v) + { + if (u.Length != v.Length) + { + throw new ArgumentException("Vectors have non-matching dimensions"); + } + + float dot = 0.0f; + float nru = 0.0f; + float nrv = 0.0f; + for (int i = 0; i < u.Length; ++i) + { + dot += u[i] * v[i]; + nru += u[i] * u[i]; + nrv += v[i] * v[i]; + } + + var similarity = dot / (float)(Math.Sqrt(nru) * Math.Sqrt(nrv)); + return 1 - similarity; + } + + public static float Cosine(byte[] u, byte[] v) + { + if (u.Length != v.Length) + { + throw new ArgumentException("Vectors have non-matching dimensions"); + } + + float dot = 0.0f; + float nru = 0.0f; + float nrv = 0.0f; + for (int i = 0; i < u.Length; ++i) + { + dot += (float)(u[i] * v[i]); + nru += (float)(u[i] * u[i]); + nrv += (float)(v[i] * v[i]); + } + + var similarity = dot / (float)(Math.Sqrt(nru) * Math.Sqrt(nrv)); + return 1 - similarity; + } + + public static float Cosine(int[] u, int[] v) + { + if (u.Length != v.Length) + { + throw new ArgumentException("Vectors have non-matching dimensions"); + } + + float dot = 0.0f; + float nru = 0.0f; + float nrv = 0.0f; + byte[] bu; + byte[] bv; + + for (int i = 0; i < u.Length; ++i) + { + bu = BitConverter.GetBytes(u[i]); + bv = BitConverter.GetBytes(v[i]); + + dot += (float)(bu[0] * bv[0]); + nru += (float)(bu[0] * bu[0]); + nrv += (float)(bv[0] * bv[0]); + + dot += (float)(bu[1] * bv[1]); + nru += (float)(bu[1] * bu[1]); + nrv += (float)(bv[1] * bv[1]); + + dot += (float)(bu[2] * bv[2]); + nru += (float)(bu[2] * bu[2]); + nrv += (float)(bv[2] * bv[2]); + + dot += (float)(bu[3] * bv[3]); + nru += (float)(bu[3] * bu[3]); + nrv += (float)(bv[3] * bv[3]); + } + + var similarity = dot / (float)(Math.Sqrt(nru) * Math.Sqrt(nrv)); + return 1 - similarity; + } + + public static float Cosine(long[] u, long[] v) + { + if (u.Length != v.Length) + { + throw new ArgumentException("Vectors have non-matching dimensions"); + } + + float dot = 0.0f; + float nru = 0.0f; + float nrv = 0.0f; + byte[] bu; + byte[] bv; + + for (int i = 0; i < u.Length; ++i) + { + bu = BitConverter.GetBytes(u[i]); + bv = BitConverter.GetBytes(v[i]); + + dot += (float)(bu[0] * bv[0]); + nru += (float)(bu[0] * bu[0]); + nrv += (float)(bv[0] * bv[0]); + + dot += (float)(bu[1] * bv[1]); + nru += (float)(bu[1] * bu[1]); + nrv += (float)(bv[1] * bv[1]); + + dot += (float)(bu[2] * bv[2]); + nru += (float)(bu[2] * bu[2]); + nrv += (float)(bv[2] * bv[2]); + + dot += (float)(bu[3] * bv[3]); + nru += (float)(bu[3] * bu[3]); + nrv += (float)(bv[3] * bv[3]); + + dot += (float)(bu[4] * bv[4]); + nru += (float)(bu[4] * bu[4]); + nrv += (float)(bv[4] * bv[4]); + + dot += (float)(bu[5] * bv[5]); + nru += (float)(bu[5] * bu[5]); + nrv += (float)(bv[5] * bv[5]); + + dot += (float)(bu[6] * bv[6]); + nru += (float)(bu[6] * bu[6]); + nrv += (float)(bv[6] * bv[6]); + + dot += (float)(bu[7] * bv[7]); + nru += (float)(bu[7] * bu[7]); + nrv += (float)(bv[7] * bv[7]); + } + + var similarity = dot / (float)(Math.Sqrt(nru) * Math.Sqrt(nrv)); + return 1 - similarity; + } + } +}