From 2595c8ce09ece14da6b5bc406496e5c9cebb65c4 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 10 Dec 2021 06:12:32 +0300 Subject: [PATCH] HNSW. New distance metrics L1, L2, Minkowski, Chebyshev --- TestHNSW/HNSWDemo/Program.cs | 6 +- ZeroLevel.HNSW/Utils/EuclidDistance.cs | 83 ++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 ZeroLevel.HNSW/Utils/EuclidDistance.cs diff --git a/TestHNSW/HNSWDemo/Program.cs b/TestHNSW/HNSWDemo/Program.cs index 1f40801..009a04b 100644 --- a/TestHNSW/HNSWDemo/Program.cs +++ b/TestHNSW/HNSWDemo/Program.cs @@ -380,7 +380,11 @@ namespace HNSWDemo int K = 200; var vectors = RandomVectors(dimensionality, testCount); - var context = new SearchContext().SetActiveNodes(map.ConvertFeaturesToIds(samples.Where(p => p.Item2.Age > 20 && p.Item2.Age < 50 && p.Item2.Gender == Gender.Feemale).Select(p => p.Item2.Number))); + var context = new SearchContext() + .SetActiveNodes(map + .ConvertFeaturesToIds(samples + .Where(p => p.Item2.Age > 20 && p.Item2.Age < 50 && p.Item2.Gender == Gender.Feemale) + .Select(p => p.Item2.Number))); var hits = 0; var miss = 0; diff --git a/ZeroLevel.HNSW/Utils/EuclidDistance.cs b/ZeroLevel.HNSW/Utils/EuclidDistance.cs new file mode 100644 index 0000000..1fa0211 --- /dev/null +++ b/ZeroLevel.HNSW/Utils/EuclidDistance.cs @@ -0,0 +1,83 @@ +using System; + +namespace ZeroLevel.HNSW.Utils +{ + public static class Metrics + { + /// + /// The taxicab metric is also known as rectilinear distance, + /// L1 distance or L1 norm, city block distance, Manhattan distance, + /// or Manhattan length, with the corresponding variations in the name of the geometry. + /// It represents the distance between points in a city road grid. + /// It examines the absolute differences between the coordinates of a pair of objects. + /// + public static float L1Manhattan(float[] v1, float[] v2) + { + float res = 0; + for (int i = 0; i < v1.Length; i++) + { + float t = v1[i] - v2[i]; + res += t * t; + } + return (res); + } + + /// + /// Euclidean distance is the most common use of distance. + /// Euclidean distance, or simply 'distance', + /// examines the root of square differences between the coordinates of a pair of objects. + /// This is most generally known as the Pythagorean theorem. + /// + public static float L2Euclidean(float[] v1, float[] v2) + { + float res = 0; + for (int i = 0; i < v1.Length; i++) + { + float t = v1[i] - v2[i]; + res += t * t; + } + return (float)Math.Sqrt(res); + } + + /// + /// The general metric for distance is the Minkowski distance. + /// When lambda is equal to 1, it becomes the city block distance (L1), + /// and when lambda is equal to 2, it becomes the Euclidean distance (L2). + /// The special case is when lambda is equal to infinity (taking a limit), + /// where it is considered as the Chebyshev distance. + /// + public static float MinkowskiDistance(float[] v1, float[] v2, int order) + { + int count = v1.Length; + double sum = 0.0; + for (int i = 0; i < count; i++) + { + sum = sum + Math.Pow(Math.Abs(v1[i] - v2[i]), order); + } + return (float)Math.Pow(sum, (1 / order)); + } + + /// + /// Chebyshev distance is also called the Maximum value distance, + /// defined on a vector space where the distance between two vectors is + /// the greatest of their differences along any coordinate dimension. + /// In other words, it examines the absolute magnitude of the differences + /// between the coordinates of a pair of objects. + /// + public static double ChebyshevDistance(float[] v1, float[] v2) + { + int count = v1.Length; + float max = float.MinValue; + float c; + for (int i = 0; i < count; i++) + { + c = Math.Abs(v1[i] - v2[i]); + if (c > max) + { + max = c; + } + } + return max; + } + } +}