You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Zero/ZeroLevel.HNSW/Layer.cs

246 lines
9.8 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

using System;
using System.Collections.Generic;
using System.Linq;
namespace ZeroLevel.HNSW
{
/// <summary>
/// NSW graph
/// </summary>
internal sealed class Layer<TItem>
{
private readonly NSWOptions<TItem> _options;
private readonly VectorSet<TItem> _vectors;
private CompactBiDirectionalLinksSet _links = new CompactBiDirectionalLinksSet();
/// <summary>
/// Count nodes at layer
/// </summary>
public int Count => (_links.Count >> 1);
public Layer(NSWOptions<TItem> options, VectorSet<TItem> vectors)
{
_options = options;
_vectors = vectors;
}
public void AddBidirectionallConnectionts(int q, int p, float qpDistance)
{
// поиск в ширину ближайших узлов к найденному
var nearest = _links.FindLinksForId(p).ToArray();
// если у найденного узла максимальное количество связей
// if │eConn│ > Mmax // shrink connections of e
if (nearest.Length >= _options.M)
{
// ищем связь с самой большой дистанцией
float distance = nearest[0].Item3;
int index = 0;
for (int ni = 1; ni < nearest.Length; ni++)
{
if (nearest[ni].Item3 > distance)
{
index = ni;
distance = nearest[ni].Item3;
}
}
// делаем перелинковку вставляя новый узел между найденными
var id1 = nearest[index].Item1;
var id2 = nearest[index].Item2;
_links.Relink(id1, id2, q, qpDistance, _options.Distance(_vectors[id2], _vectors[q]));
}
else
{
// добавляем связь нового узла к найденному
_links.Add(q, p, qpDistance);
}
}
#region Implementation of https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf
/// <summary>
/// Algorithm 2
/// </summary>
/// <param name="q">query element</param>
/// <param name="ep">enter points ep</param>
/// <returns>Output: ef closest neighbors to q</returns>
public void RunKnnAtLayer(int entryPointId, Func<int, float> targetCosts, IDictionary<int, float> W, int ef)
{
/*
* v ← ep // set of visited elements
* C ← ep // set of candidates
* W ← ep // dynamic list of found nearest neighbors
* while │C│ > 0
* c ← extract nearest element from C to q
* f ← get furthest element from W to q
* if distance(c, q) > distance(f, q)
* break // all elements in W are evaluated
* for each e ∈ neighbourhood(c) at layer lc // update C and W
* if e ∉ v
* v ← v e
* f ← get furthest element from W to q
* if distance(e, q) < distance(f, q) or │W│ < ef
* C ← C e
* W ← W e
* if │W│ > ef
* remove furthest element from W to q
* return W
*/
var v = new VisitedBitSet(_vectors.Count, _options.M);
// v ← ep // set of visited elements
v.Add(entryPointId);
// C ← ep // set of candidates
var C = new Dictionary<int, float>();
C.Add(entryPointId, targetCosts(entryPointId));
// W ← ep // dynamic list of found nearest neighbors
W.Add(entryPointId, C[entryPointId]);
// run bfs
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = popCandidate();
var farthestResult = fartherFromResult();
if (toExpand.Item2 > farthestResult.Item2)
{
// the closest candidate is farther than farthest result
break;
}
// expand candidate
var neighboursIds = GetNeighbors(toExpand.Item1).ToArray();
for (int i = 0; i < neighboursIds.Length; ++i)
{
int neighbourId = neighboursIds[i];
if (!v.Contains(neighbourId))
{
// enqueue perspective neighbours to expansion list
farthestResult = fartherFromResult();
var neighbourDistance = targetCosts(neighbourId);
if (W.Count < ef || neighbourDistance < farthestResult.Item2)
{
C.Add(neighbourId, neighbourDistance);
W.Add(neighbourId, neighbourDistance);
if (W.Count > ef)
{
fartherPopFromResult();
}
}
v.Add(neighbourId);
}
}
}
C.Clear();
v.Clear();
}
/// <summary>
/// Algorithm 3
/// </summary>
public IDictionary<int, float> SELECT_NEIGHBORS_SIMPLE(Func<int, float> distance, IDictionary<int, float> candidates, int M)
{
var bestN = M;
var W = new Dictionary<int, float>(candidates);
if (W.Count > bestN)
{
var popFarther = new Action(() => { var pair = W.OrderByDescending(e => e.Value).First(); W.Remove(pair.Key); });
while (W.Count > bestN)
{
popFarther();
}
}
// return M nearest elements from C to q
return W;
}
/// <summary>
/// Algorithm 4
/// </summary>
/// <param name="q">base element</param>
/// <param name="C">candidate elements</param>
/// <param name="extendCandidates">flag indicating whether or not to extend candidate list</param>
/// <param name="keepPrunedConnections">flag indicating whether or not to add discarded elements</param>
/// <returns>Output: M elements selected by the heuristic</returns>
public IDictionary<int, float> SELECT_NEIGHBORS_HEURISTIC(Func<int, float> distance, IDictionary<int, float> candidates, int M, bool extendCandidates, bool keepPrunedConnections)
{
// R ← ∅
var R = new Dictionary<int, float>();
// W ← C // working queue for the candidates
var W = new Dictionary<int, float>(candidates);
// if extendCandidates // extend candidates by their neighbors
if (extendCandidates)
{
var extendBuffer = new HashSet<int>();
// for each e ∈ C
foreach (var e in W)
{
var neighbors = GetNeighbors(e.Key);
// for each e_adj ∈ neighbourhood(e) at layer lc
foreach (var e_adj in neighbors)
{
// if eadj ∉ W
if (extendBuffer.Contains(e_adj) == false)
{
extendBuffer.Add(e_adj);
}
}
}
// W ← W eadj
foreach (var id in extendBuffer)
{
W.Add(id, distance(id));
}
}
// Wd ← ∅ // queue for the discarded candidates
var Wd = new Dictionary<int, float>();
var popCandidate = new Func<(int, float)>(() => { var pair = W.OrderBy(e => e.Value).First(); W.Remove(pair.Key); return (pair.Key, pair.Value); });
var fartherFromResult = new Func<(int, float)>(() => { if (R.Count == 0) return (-1, 0f); var pair = R.OrderByDescending(e => e.Value).First(); return (pair.Key, pair.Value); });
var popNearestDiscarded = new Func<(int, float)>(() => { var pair = Wd.OrderBy(e => e.Value).First(); W.Remove(pair.Key); return (pair.Key, pair.Value); });
// while │W│ > 0 and │R│< M
while (W.Count > 0 && R.Count < M)
{
// e ← extract nearest element from W to q
var (e, ed) = popCandidate();
var (fe, fd) = fartherFromResult();
// if e is closer to q compared to any element from R
if (R.Count == 0 ||
ed < fd)
{
// R ← R e
R.Add(e, ed);
}
// else
{
// Wd ← Wd e
Wd.Add(e, ed);
}
// if keepPrunedConnections // add some of the discarded // connections from Wd
if (keepPrunedConnections)
{
// while │Wd│> 0 and │R│< M
while (Wd.Count > 0 && R.Count < M)
{
// R ← R extract nearest element from Wd to q
var nearest = popNearestDiscarded();
R.Add(nearest.Item1, nearest.Item2);
}
}
}
// return R
return R;
}
#endregion
private IEnumerable<int> GetNeighbors(int id) => _links.FindLinksForId(id).Select(d => d.Item2);
}
}

Powered by TurnKey Linux.