Append LAL graph
pull/1/head
unknown 3 years ago
parent 6dc71702f5
commit b55ae7d814

@ -1,6 +1,7 @@
using HNSWDemo.Tests; using HNSWDemo.Tests;
using System; using System;
using ZeroLevel.Services.Web; using System.IO;
using ZeroLevel.HNSW;
namespace HNSWDemo namespace HNSWDemo
{ {
@ -8,12 +9,29 @@ namespace HNSWDemo
{ {
static void Main(string[] args) static void Main(string[] args)
{ {
var uri = new Uri("https://hack33d.ru/bpla/upload.php?path=128111&get=0J/QuNC70LjQv9C10L3QutC+INCS0LvQsNC00LjQvNC40YAg0JzQuNGF0LDQudC70L7QstC40Yc7MDQuMDkuMTk1NCAoNjYg0LvQtdGCKTvQnNC+0YHQutC+0LLRgdC60LDRjzsxMjgxMTE7TEFfUkVaVVM7RkxZXzAy");
var parts = UrlUtility.ParseQueryString(uri.Query);
new AutoClusteringMNISTTest().Run(); new LALTest().Run();
//new HistogramTest().Run(); // new AutoClusteringMNISTTest().Run();
// new AccuracityTest().Run();
Console.WriteLine("Completed"); Console.WriteLine("Completed");
Console.ReadKey(); Console.ReadKey();
} }
static int GetC(string file)
{
var name = Path.GetFileNameWithoutExtension(file);
var index = name.IndexOf("_M");
if (index > 0)
{
index = name.IndexOf("_", index + 2);
if (index > 0)
{
var num = name.Substring(index + 1, name.Length - index - 1);
return int.Parse(num);
}
}
return -1;
}
} }
} }

@ -0,0 +1,9 @@
{
"profiles": {
"HNSWDemo": {
"commandName": "Project",
"hotReloadEnabled": false,
"nativeDebugging": false
}
}
}

@ -11,7 +11,7 @@ namespace HNSWDemo.Tests
: ITest : ITest
{ {
private static int K = 200; private static int K = 200;
private static int count = 3000; private static int count = 10000;
private static int testCount = 500; private static int testCount = 500;
private static int dimensionality = 128; private static int dimensionality = 128;

@ -1,5 +1,6 @@
using System; using System;
using System.Drawing; using System.Drawing;
using System.IO;
using System.Linq; using System.Linq;
using ZeroLevel.HNSW; using ZeroLevel.HNSW;
@ -10,12 +11,23 @@ namespace HNSWDemo.Tests
{ {
private static int Count = 3000; private static int Count = 3000;
private static int Dimensionality = 128; private static int Dimensionality = 128;
private static int Width = 3000; private static int Width = 2440;
private static int Height = 3000; private static int Height = 1920;
public void Run() public void Run()
{ {
var vectors = VectorUtils.RandomVectors(Dimensionality, Count); Create(Dimensionality, @"D:\hist");
// Process.Start("explorer", $"D:\\hist{Dimensionality.ToString("D3")}.jpg");
/* for (int i = 12; i < 512; i++)
{
Create(i, @"D:\hist");
}*/
}
private void Create(int dim, string output)
{
var vectors = VectorUtils.RandomVectors(dim, Count);
var world = SmallWorld.CreateWorld<float[]>(NSWOptions<float[]>.Create(8, 16, 200, 200, Metrics.L2Euclidean)); var world = SmallWorld.CreateWorld<float[]>(NSWOptions<float[]>.Create(8, 16, 200, 200, Metrics.L2Euclidean));
world.AddItems(vectors); world.AddItems(vectors);
@ -29,7 +41,7 @@ namespace HNSWDemo.Tests
var max = histogram.Bounds[threshold]; var max = histogram.Bounds[threshold];
var R = (max + min) / 2; var R = (max + min) / 2;
DrawHistogram(histogram, @"D:\hist.jpg"); DrawHistogram(histogram, Path.Combine(output, $"hist{dim.ToString("D3")}.jpg"));
} }
static void DrawHistogram(Histogram histogram, string filename) static void DrawHistogram(Histogram histogram, string filename)

@ -0,0 +1,101 @@
using HNSWDemo.Model;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using ZeroLevel.HNSW;
namespace HNSWDemo.Tests
{
internal class LALTest
: ITest
{
private const int count = 5000;
private const int testCount = 100;
private const int dimensionality = 128;
public void Run()
{
var moda = 3;
var persons = Person.GenerateRandom(dimensionality, count);
var samples = new Dictionary<int, List<(float[], Person)>>();
var options = NSWOptions<float[]>.Create(6, 8, 100, 100, Metrics.Cosine);
foreach (var p in persons)
{
var c = (int)Math.Abs(p.Item2.Number.GetHashCode() % moda);
if (samples.ContainsKey(c) == false) samples.Add(c, new List<(float[], Person)>());
samples[c].Add(p);
}
var worlds = new SplittedLALGraph();
var mappers = new HNSWMappers<long>(l => (int)Math.Abs(l.GetHashCode() % moda));
var worlds_dict = new Dictionary<int, SmallWorld<float[]>>();
var maps_dict = new Dictionary<int, HNSWMap<long>>();
foreach (var p in samples)
{
var c = p.Key;
if (worlds_dict.ContainsKey(c) == false)
{
worlds_dict.Add(c, new SmallWorld<float[]>(options));
}
if (maps_dict.ContainsKey(c) == false)
{
maps_dict.Add(c, new HNSWMap<long>());
}
var w = worlds_dict[c];
var m = maps_dict[c];
var ids = w.AddItems(p.Value.Select(i => i.Item1));
for (int i = 0; i < ids.Length; i++)
{
m.Append(p.Value[i].Item2.Number, ids[i]);
}
}
var name = Guid.NewGuid().ToString();
foreach (var p in samples)
{
var c = p.Key;
var w = worlds_dict[c];
var m = maps_dict[c];
using (var s = File.Create(name))
{
w.Serialize(s);
}
using (var s = File.OpenRead(name))
{
var l = LALGraph.FromHNSWGraph<float[]>(s);
worlds.Append(l, c);
}
File.Delete(name);
mappers.Append(m, c);
}
var entries = new long[10];
for (int i = 0; i < entries.Length; i++)
{
entries[i] = persons[DefaultRandomGenerator.Instance.Next(0, persons.Count - 1)].Item2.Number;
}
var contexts = mappers.CreateContext(null, entries);
var result = worlds.KNearest(10, contexts);
Console.WriteLine("Entries:");
foreach (var n in entries)
{
Console.WriteLine($"\t{n}");
}
Console.WriteLine("Extensions:");
foreach (var n in mappers.ConvertIdsToFeatures(result))
{
Console.WriteLine($"\t[{n}]");
}
}
}
}

@ -7,6 +7,15 @@ namespace Consumer
{ {
static void Main(string[] args) static void Main(string[] args)
{ {
IConfiguration conf = Configuration.Create();
conf.Append("ServiceName", "Test consumer");
conf.Append("ServiceKey", "test.consumer");
conf.Append("ServiceType", "Destination");
conf.Append("ServiceGroup", "Test");
conf.Append("Version", "1.0.0.1");
conf.Append("discovery", "127.0.0.1:5012");
Configuration.Save(conf);
Bootstrap.Startup<ConsumerService>(args) Bootstrap.Startup<ConsumerService>(args)
.EnableConsoleLog(LogLevel.FullStandart) .EnableConsoleLog(LogLevel.FullStandart)
.UseDiscovery() .UseDiscovery()

@ -7,6 +7,15 @@ namespace Processor
{ {
static void Main(string[] args) static void Main(string[] args)
{ {
IConfiguration conf = Configuration.Create();
conf.Append("ServiceName", "Test processor");
conf.Append("ServiceKey", "test.processor");
conf.Append("ServiceType", "Core");
conf.Append("ServiceGroup", "Test");
conf.Append("Version", "1.0.0.1");
conf.Append("discovery", "127.0.0.1:5012");
Configuration.Save(conf);
Bootstrap.Startup<ProcessorService>(args) Bootstrap.Startup<ProcessorService>(args)
.EnableConsoleLog(LogLevel.FullStandart) .EnableConsoleLog(LogLevel.FullStandart)
.UseDiscovery() .UseDiscovery()

@ -6,6 +6,15 @@ namespace Source
{ {
static void Main(string[] args) static void Main(string[] args)
{ {
IConfiguration conf = Configuration.Create();
conf.Append("ServiceName", "Test source");
conf.Append("ServiceKey", "test.source");
conf.Append("ServiceType", "Sources");
conf.Append("ServiceGroup", "Test");
conf.Append("Version", "1.0.0.1");
conf.Append("discovery", "127.0.0.1:5012");
Configuration.Save(conf);
Bootstrap.Startup<SourceService>(args) Bootstrap.Startup<SourceService>(args)
.EnableConsoleLog(ZeroLevel.Logging.LogLevel.FullStandart) .EnableConsoleLog(ZeroLevel.Logging.LogLevel.FullStandart)
.UseDiscovery() .UseDiscovery()

@ -6,6 +6,15 @@ namespace Watcher
{ {
static void Main(string[] args) static void Main(string[] args)
{ {
IConfiguration conf = Configuration.Create();
conf.Append("ServiceName", "Watcher");
conf.Append("ServiceKey", "test.watcher");
conf.Append("ServiceType", "System");
conf.Append("ServiceGroup", "Test");
conf.Append("Version", "1.0.0.1");
conf.Append("discovery", "127.0.0.1:5012");
Configuration.Save(conf);
Bootstrap.Startup<WatcherService>(args) Bootstrap.Startup<WatcherService>(args)
.UseDiscovery() .UseDiscovery()
.Run() .Run()

@ -1,4 +1,5 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.IO;
using ZeroLevel.Services.Serialization; using ZeroLevel.Services.Serialization;
namespace ZeroLevel.HNSW namespace ZeroLevel.HNSW
@ -12,6 +13,7 @@ namespace ZeroLevel.HNSW
private Dictionary<TFeature, int> _map; private Dictionary<TFeature, int> _map;
private Dictionary<int, TFeature> _reverse_map; private Dictionary<int, TFeature> _reverse_map;
public int this[TFeature feature] => _map.GetValueOrDefault(feature);
public HNSWMap(int capacity = -1) public HNSWMap(int capacity = -1)
{ {
if (capacity > 0) if (capacity > 0)
@ -27,6 +29,14 @@ namespace ZeroLevel.HNSW
} }
} }
public HNSWMap(Stream stream)
{
using (var reader = new MemoryStreamReader(stream))
{
Deserialize(reader);
}
}
public void Append(TFeature feature, int vectorId) public void Append(TFeature feature, int vectorId)
{ {
_map[feature] = vectorId; _map[feature] = vectorId;

@ -0,0 +1,75 @@
using System;
using System.Collections.Generic;
namespace ZeroLevel.HNSW
{
public class HNSWMappers<TFeature>
{
private readonly IDictionary<int, HNSWMap<TFeature>> _mappers = new Dictionary<int, HNSWMap<TFeature>>();
private readonly Func<TFeature, int> _bucketFunction;
public HNSWMappers(Func<TFeature, int> bucketFunction)
{
_bucketFunction = bucketFunction;
}
public void Append(HNSWMap<TFeature> map, int c)
{
_mappers.Add(c, map);
}
public IEnumerable<TFeature> ConvertIdsToFeatures(IEnumerable<int> ids)
{
foreach (var map in _mappers)
{
foreach (var feature in map.Value.ConvertIdsToFeatures(ids))
{
yield return feature;
}
}
}
public IDictionary<int, SearchContext> CreateContext(IEnumerable<TFeature> activeNodes, IEnumerable<TFeature> entryPoints)
{
var actives = new Dictionary<int, List<int>>();
var entries = new Dictionary<int, List<int>>();
if (activeNodes != null)
{
foreach (var node in activeNodes)
{
var c = _bucketFunction(node);
if (_mappers.ContainsKey(c))
{
if (actives.ContainsKey(c) == false)
{
actives.Add(c, new List<int>());
}
actives[c].Add(_mappers[c][node]);
}
}
}
if (entryPoints != null)
{
foreach (var entryPoint in entryPoints)
{
var c = _bucketFunction(entryPoint);
if (_mappers.ContainsKey(c))
{
if (entries.ContainsKey(c) == false)
{
entries.Add(c, new List<int>());
}
entries[c].Add(_mappers[c][entryPoint]);
}
}
}
var result = new Dictionary<int, SearchContext>();
foreach (var pair in _mappers)
{
var active = actives.GetValueOrDefault(pair.Key);
var entry = entries.GetValueOrDefault(pair.Key);
result.Add(pair.Key, new SearchContext().SetActiveNodes(active).SetEntryPointsNodes(entry));
}
return result;
}
}
}

@ -0,0 +1,105 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using ZeroLevel.Services.Serialization;
namespace ZeroLevel.HNSW
{
public class LALGraph
{
private readonly LALLinks _links = new LALLinks();
private LALGraph() { }
public static LALGraph FromLALGraph(Stream stream)
{
var l = new LALGraph();
l.Deserialize(stream);
return l;
}
public static LALGraph FromHNSWGraph<TItem>(Stream stream)
{
var l = new LALGraph();
l.DeserializeFromHNSW<TItem>(stream);
return l;
}
public IEnumerable<int> KNearest(int k, SearchContext context)
{
var v = new VisitedBitSet(_links.Count, 1);
var C = new Queue<int>();
var W = new List<int>();
var entryPoints = context.EntryPoints;
do
{
foreach (var ep in entryPoints)
{
var neighboursIds = _links.FindNeighbors(ep);
for (int i = 0; i < neighboursIds.Length; ++i)
{
C.Enqueue(neighboursIds[i]);
}
v.Add(ep);
}
// run bfs
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = C.Dequeue();
if (context.IsActiveNode(toExpand))
{
if (W.Count < k)
{
W.Add(toExpand);
if (W.Count > k)
{
var loser_id = DefaultRandomGenerator.Instance.Next(0, W.Count - 1);
W.RemoveAt(loser_id);
}
}
}
}
entryPoints = W.Select(id => id).ToList();
}
while (W.Count < k && entryPoints.Any());
C.Clear();
v.Clear();
return W;
}
public void Deserialize(Stream stream)
{
using (var reader = new MemoryStreamReader(stream))
{
_links.Deserialize(reader); // deserialize only base layer and skip another
}
}
public void DeserializeFromHNSW<TItem>(Stream stream)
{
using (var reader = new MemoryStreamReader(stream))
{
reader.ReadInt32(); // EntryPoint
reader.ReadInt32(); // MaxLayer
int count = reader.ReadInt32(); // Vectors count
for (int i = 0; i < count; i++)
{
reader.ReadCompatible<TItem>(); // Vector
}
reader.ReadInt32(); // countLayers
_links.Deserialize(reader); // deserialize only base layer and skip another
}
}
public void Serialize(Stream stream)
{
using (var writer = new MemoryStreamWriter(stream))
{
_links.Serialize(writer);
}
}
}
}

@ -0,0 +1,79 @@
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using ZeroLevel.Services.Serialization;
namespace ZeroLevel.HNSW
{
internal class LALLinks
{
private ConcurrentDictionary<int, int[]> _set = new ConcurrentDictionary<int, int[]>();
internal IDictionary<int, int[]> Links => _set;
private readonly int[] _empty = new int[0];
internal int Count => _set.Count;
public LALLinks()
{
}
internal IEnumerable<(int, int)> FindLinksForId(int id)
{
if (_set.ContainsKey(id))
{
return _set[id].Select(v => (id, v));
}
return Enumerable.Empty<(int, int)>();
}
internal int[] FindNeighbors(int id)
{
if (_set.ContainsKey(id))
{
return _set[id];
}
return _empty;
}
internal IEnumerable<(int, int)> Items()
{
return _set
.SelectMany(pair => _set[pair.Key]
.Select(v => (pair.Key, v)));
}
public void Dispose()
{
_set.Clear();
_set = null;
}
public void Serialize(IBinaryWriter writer)
{
writer.WriteInt32(_set.Count);
foreach (var record in _set)
{
writer.WriteInt32(record.Key);
writer.WriteCollection(record.Value);
}
}
public void Deserialize(IBinaryReader reader)
{
_set.Clear();
_set = null;
var count = reader.ReadInt32();
_set = new ConcurrentDictionary<int, int[]>(1, count);
for (int i = 0; i < count; i++)
{
var id = reader.ReadInt32();
var links_count = reader.ReadInt32();
_set[id] = new int[links_count];
for (int l = 0; l < links_count; l++)
{
_set[id][l] = reader.ReadInt32();
}
}
}
}
}

@ -0,0 +1,29 @@
using System.Collections.Generic;
namespace ZeroLevel.HNSW
{
public class SplittedLALGraph
{
private readonly IDictionary<int, LALGraph> _graphs = new Dictionary<int, LALGraph>();
public void Append(LALGraph graph, int c)
{
_graphs.Add(c, graph);
}
public IEnumerable<int> KNearest(int k, IDictionary<int, SearchContext> contexts)
{
var partial_k = 1 + (k / _graphs.Count);
var result = new List<int>();
foreach (var graph in _graphs)
{
var context = contexts[graph.Key];
if (context.EntryPoints != null)
{
result.AddRange(graph.Value.KNearest(partial_k, context));
}
}
return result;
}
}
}

@ -70,24 +70,15 @@ namespace ZeroLevel.HNSW
_set.Clear(); _set.Clear();
_set = null; _set = null;
} }
private const int HALF_LONG_BITS = 32;
public void Serialize(IBinaryWriter writer) public void Serialize(IBinaryWriter writer)
{ {
writer.WriteBoolean(false); // true - set with weights writer.WriteInt32(_set.Count);
var count = _set.Sum(pair => pair.Value.Count);
writer.WriteInt32(count);
foreach (var record in _set) foreach (var record in _set)
{ {
var id = record.Key; writer.WriteInt32(record.Key);
foreach (var r in record.Value) writer.WriteCollection(record.Value);
{
var key = (((long)(id)) << HALF_LONG_BITS) + r;
writer.WriteLong(key);
} }
} }
}
public void Deserialize(IBinaryReader reader) public void Deserialize(IBinaryReader reader)
{ {
if (reader.ReadBoolean() != false) if (reader.ReadBoolean() != false)
@ -100,16 +91,13 @@ namespace ZeroLevel.HNSW
_set = new ConcurrentDictionary<int, HashSet<int>>(); _set = new ConcurrentDictionary<int, HashSet<int>>();
for (int i = 0; i < count; i++) for (int i = 0; i < count; i++)
{ {
var key = reader.ReadLong(); var id = reader.ReadInt32();
var links_count = reader.ReadInt32();
var id1 = (int)(key >> HALF_LONG_BITS); _set[id] = new HashSet<int>(links_count);
var id2 = (int)(key - (((long)id1) << HALF_LONG_BITS)); for (var l = 0; l < links_count; l++)
if (!_set.ContainsKey(id1))
{ {
_set[id1] = new HashSet<int>(); _set[id].Add(reader.ReadInt32());
} }
_set[id1].Add(id2);
} }
} }
} }

@ -4,7 +4,9 @@ namespace ZeroLevel.HNSW
{ {
public static class SmallWorld public static class SmallWorld
{ {
public static SmallWorld<TItem> CreateWorld<TItem>(NSWOptions<TItem> options) => new SmallWorld<TItem>(options); public static SmallWorld<TItem> CreateWorld<TItem>(NSWOptions<TItem> options)
public static SmallWorld<TItem> CreateWorldFrom<TItem>(NSWOptions<TItem> options, Stream stream) => new SmallWorld<TItem>(options, stream); => new SmallWorld<TItem>(options);
public static SmallWorld<TItem> CreateWorldFrom<TItem>(NSWOptions<TItem> options, Stream stream)
=> new SmallWorld<TItem>(options, stream);
} }
} }

@ -3,8 +3,41 @@
<PropertyGroup> <PropertyGroup>
<TargetFramework>net6.0</TargetFramework> <TargetFramework>net6.0</TargetFramework>
<Platforms>AnyCPU;x64</Platforms> <Platforms>AnyCPU;x64</Platforms>
<PlatformTarget>x64</PlatformTarget>
<DebugType>full</DebugType>
<Version>1.0.0.1</Version>
<Company>ogoun</Company>
<Authors>Ogoun</Authors>
<Copyright>Copyright Ogoun 2022</Copyright>
<PackageProjectUrl>https://github.com/ogoun/Zero/wiki</PackageProjectUrl>
<PackageIcon>zero.png</PackageIcon>
<RepositoryUrl>https://github.com/ogoun/Zero</RepositoryUrl>
<RepositoryType>git</RepositoryType>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<Optimize>False</Optimize>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Optimize>False</Optimize>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
<Optimize>True</Optimize>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<None Include="..\zero.png">
<Pack>True</Pack>
<PackagePath>\</PackagePath>
</None>
</ItemGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="System.Numerics.Vectors" Version="4.5.0" /> <PackageReference Include="System.Numerics.Vectors" Version="4.5.0" />
</ItemGroup> </ItemGroup>

@ -13,12 +13,12 @@
<PackageLicenseUrl></PackageLicenseUrl> <PackageLicenseUrl></PackageLicenseUrl>
<PackageIconUrl></PackageIconUrl> <PackageIconUrl></PackageIconUrl>
<RepositoryUrl>https://github.com/ogoun/Zero</RepositoryUrl> <RepositoryUrl>https://github.com/ogoun/Zero</RepositoryUrl>
<RepositoryType>GitHub</RepositoryType> <RepositoryType>git</RepositoryType>
<Version>3.3.5.7</Version> <Version>3.3.5.7</Version>
<FileVersion>3.3.5.7</FileVersion> <FileVersion>3.3.5.7</FileVersion>
<Platforms>AnyCPU;x64;x86</Platforms> <Platforms>AnyCPU;x64;x86</Platforms>
<PackageIcon>zero.png</PackageIcon> <PackageIcon>zero.png</PackageIcon>
<DebugType>none</DebugType> <DebugType>full</DebugType>
<ErrorReport>none</ErrorReport> <ErrorReport>none</ErrorReport>
<ApplicationIcon>zero.ico</ApplicationIcon> <ApplicationIcon>zero.ico</ApplicationIcon>
</PropertyGroup> </PropertyGroup>

Loading…
Cancel
Save

Powered by TurnKey Linux.