using System;
using System.Collections.Generic;
using System.Linq;
using ZeroLevel.Services.Serialization;
namespace ZeroLevel.HNSW.Services.OPT
{
///
/// NSW graph
///
internal sealed class OptLayer
: IBinarySerializable
{
private readonly NSWOptions _options;
private readonly VectorSet _vectors;
private readonly CompactBiDirectionalLinksSet _links;
internal SortedList Links => _links.Links;
///
/// There are links е the layer
///
internal bool HasLinks => (_links.Count > 0);
///
/// HNSW layer
///
/// HNSW graph options
/// General vector set
internal OptLayer(NSWOptions options, VectorSet vectors)
{
_options = options;
_vectors = vectors;
_links = new CompactBiDirectionalLinksSet();
}
///
/// Adding new bidirectional link
///
/// New node
/// The node with which the connection will be made
///
///
internal void AddBidirectionallConnections(int q, int p, float qpDistance, bool isMapLayer)
{
// поиск в ширину ближайших узлов к найденному
var nearest = _links.FindLinksForId(p).ToArray();
// если у найденного узла максимальное количество связей
// if │eConn│ > Mmax // shrink connections of e
if (nearest.Length >= (isMapLayer ? _options.M * 2 : _options.M))
{
// ищем связь с самой большой дистанцией
float distance = nearest[0].Item3;
int index = 0;
for (int ni = 1; ni < nearest.Length; ni++)
{
// Если осталась ссылка узла на себя, удаляем ее в первую очередь
if (nearest[ni].Item1 == nearest[ni].Item2)
{
index = ni;
break;
}
if (nearest[ni].Item3 > distance)
{
index = ni;
distance = nearest[ni].Item3;
}
}
// делаем перелинковку вставляя новый узел между найденными
var id1 = nearest[index].Item1;
var id2 = nearest[index].Item2;
_links.Relink(id1, id2, q, qpDistance, _options.Distance(_vectors[id2], _vectors[q]));
}
else
{
if (nearest.Length == 1 && nearest[0].Item1 == nearest[0].Item2)
{
// убираем связи на самих себя
var id1 = nearest[0].Item1;
var id2 = nearest[0].Item2;
_links.Relink(id1, id2, q, qpDistance, _options.Distance(_vectors[id2], _vectors[q]));
}
else
{
// добавляем связь нового узла к найденному
_links.Add(q, p, qpDistance);
}
}
}
///
/// Adding a node with a connection to itself
///
///
internal void Append(int q)
{
_links.Add(q, q, 0);
}
#region Implementation of https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf
///
/// Algorithm 2
///
/// query element
/// enter points ep
/// Output: ef closest neighbors to q
internal void KNearestAtLayer(int entryPointId, Func targetCosts, BinaryHeap W, int ef)
{
/*
* v ← ep // set of visited elements
* C ← ep // set of candidates
* W ← ep // dynamic list of found nearest neighbors
* while │C│ > 0
* c ← extract nearest element from C to q
* f ← get furthest element from W to q
* if distance(c, q) > distance(f, q)
* break // all elements in W are evaluated
* for each e ∈ neighbourhood(c) at layer lc // update C and W
* if e ∉ v
* v ← v ⋃ e
* f ← get furthest element from W to q
* if distance(e, q) < distance(f, q) or │W│ < ef
* C ← C ⋃ e
* W ← W ⋃ e
* if │W│ > ef
* remove furthest element from W to q
* return W
*/
var v = new VisitedBitSet(_vectors.Count, _options.M);
// v ← ep // set of visited elements
v.Add(entryPointId);
var d = targetCosts(entryPointId);
// C ← ep // set of candidates
var C = new BinaryHeap();
C.Push(entryPointId, d);
// W ← ep // dynamic list of found nearest neighbors
W.Push(entryPointId, d);
// run bfs
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = C.PopNearest();
var farthestResult = W.Farthest;
if (toExpand.Item2 > farthestResult.Item2)
{
// the closest candidate is farther than farthest result
break;
}
// expand candidate
var neighboursIds = GetNeighbors(toExpand.Item1).ToArray();
for (int i = 0; i < neighboursIds.Length; ++i)
{
int neighbourId = neighboursIds[i];
if (!v.Contains(neighbourId))
{
// enqueue perspective neighbours to expansion list
farthestResult = W.Farthest;
var neighbourDistance = targetCosts(neighbourId);
if (W.Count < ef || neighbourDistance < farthestResult.Item2)
{
C.Push(neighbourId, neighbourDistance);
W.Push(neighbourId, neighbourDistance);
if (W.Count > ef)
{
W.PopFarthest();
}
}
v.Add(neighbourId);
}
}
}
C.Clear();
v.Clear();
}
///
/// Algorithm 2
///
/// query element
/// enter points ep
/// Output: ef closest neighbors to q
internal void KNearestAtLayer(int entryPointId, Func targetCosts, BinaryHeap W, int ef, SearchContext context)
{
/*
* v ← ep // set of visited elements
* C ← ep // set of candidates
* W ← ep // dynamic list of found nearest neighbors
* while │C│ > 0
* c ← extract nearest element from C to q
* f ← get furthest element from W to q
* if distance(c, q) > distance(f, q)
* break // all elements in W are evaluated
* for each e ∈ neighbourhood(c) at layer lc // update C and W
* if e ∉ v
* v ← v ⋃ e
* f ← get furthest element from W to q
* if distance(e, q) < distance(f, q) or │W│ < ef
* C ← C ⋃ e
* W ← W ⋃ e
* if │W│ > ef
* remove furthest element from W to q
* return W
*/
var v = new VisitedBitSet(_vectors.Count, _options.M);
// v ← ep // set of visited elements
v.Add(entryPointId);
// C ← ep // set of candidates
var C = new BinaryHeap();
var d = targetCosts(entryPointId);
C.Push(entryPointId, d);
// W ← ep // dynamic list of found nearest neighbors
if (context.IsActiveNode(entryPointId))
{
W.Push(entryPointId, d);
}
// run bfs
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = C.PopNearest();
if (W.Count > 0)
{
if (toExpand.Item2 > W.Farthest.Item2)
{
// the closest candidate is farther than farthest result
break;
}
}
// expand candidate
var neighboursIds = GetNeighbors(toExpand.Item1).ToArray();
for (int i = 0; i < neighboursIds.Length; ++i)
{
int neighbourId = neighboursIds[i];
if (!v.Contains(neighbourId))
{
// enqueue perspective neighbours to expansion list
var neighbourDistance = targetCosts(neighbourId);
if (context.IsActiveNode(neighbourId))
{
if (W.Count < ef || (W.Count > 0 && neighbourDistance < W.Farthest.Item2))
{
W.Push(neighbourId, neighbourDistance);
if (W.Count > ef)
{
W.PopFarthest();
}
}
}
if (W.Count < ef)
{
C.Push(neighbourId, neighbourDistance);
}
v.Add(neighbourId);
}
}
}
C.Clear();
v.Clear();
}
///
/// Algorithm 2, modified for LookAlike
///
/// query element
/// enter points ep
/// Output: ef closest neighbors to q
internal void KNearestAtLayer(BinaryHeap W, int ef, SearchContext context)
{
/*
* v ← ep // set of visited elements
* C ← ep // set of candidates
* W ← ep // dynamic list of found nearest neighbors
* while │C│ > 0
* c ← extract nearest element from C to q
* f ← get furthest element from W to q
* if distance(c, q) > distance(f, q)
* break // all elements in W are evaluated
* for each e ∈ neighbourhood(c) at layer lc // update C and W
* if e ∉ v
* v ← v ⋃ e
* f ← get furthest element from W to q
* if distance(e, q) < distance(f, q) or │W│ < ef
* C ← C ⋃ e
* W ← W ⋃ e
* if │W│ > ef
* remove furthest element from W to q
* return W
*/
// v ← ep // set of visited elements
var v = new VisitedBitSet(_vectors.Count, _options.M);
// C ← ep // set of candidates
var C = new BinaryHeap();
foreach (var ep in context.EntryPoints)
{
var neighboursIds = GetNeighbors(ep).ToArray();
for (int i = 0; i < neighboursIds.Length; ++i)
{
C.Push(ep, _links.Distance(ep, neighboursIds[i]));
}
v.Add(ep);
}
// W ← ep // dynamic list of found nearest neighbors
// run bfs
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = C.PopNearest();
if (W.Count > 0)
{
if (toExpand.Item2 > W.Farthest.Item2)
{
// the closest candidate is farther than farthest result
break;
}
}
if (context.IsActiveNode(toExpand.Item1))
{
if (W.Count < ef || W.Count == 0 || (W.Count > 0 && toExpand.Item2 < W.Farthest.Item2))
{
W.Push(toExpand.Item1, toExpand.Item2);
if (W.Count > ef)
{
W.PopFarthest();
}
}
}
}
if (W.Count > ef)
{
while (W.Count > ef)
{
W.PopFarthest();
}
return;
}
else
{
foreach (var c in W)
{
C.Push(c.Item1, c.Item2);
}
}
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = C.PopNearest();
// expand candidate
var neighboursIds = GetNeighbors(toExpand.Item1).ToArray();
for (int i = 0; i < neighboursIds.Length; ++i)
{
int neighbourId = neighboursIds[i];
if (!v.Contains(neighbourId))
{
// enqueue perspective neighbours to expansion list
var neighbourDistance = _links.Distance(toExpand.Item1, neighbourId);
if (context.IsActiveNode(neighbourId))
{
if (W.Count < ef || (W.Count > 0 && neighbourDistance < W.Farthest.Item2))
{
W.Push(neighbourId, neighbourDistance);
if (W.Count > ef)
{
W.PopFarthest();
}
}
}
if (W.Count < ef)
{
C.Push(neighbourId, neighbourDistance);
}
v.Add(neighbourId);
}
}
}
C.Clear();
v.Clear();
}
///
/// Algorithm 3
///
internal BinaryHeap SELECT_NEIGHBORS_SIMPLE(BinaryHeap W, int M)
{
var bestN = M;
if (W.Count > bestN)
{
while (W.Count > bestN)
{
W.PopFarthest();
}
}
return W;
}
///
/// Algorithm 4
///
/// base element
/// candidate elements
/// flag indicating whether or not to extend candidate list
/// flag indicating whether or not to add discarded elements
/// Output: M elements selected by the heuristic
internal BinaryHeap SELECT_NEIGHBORS_HEURISTIC(Func distance, BinaryHeap W, int M)
{
// R ← ∅
var R = new BinaryHeap();
// W ← C // working queue for the candidates
// if extendCandidates // extend candidates by their neighbors
if (_options.ExpandBestSelection)
{
var extendBuffer = new HashSet();
// for each e ∈ C
foreach (var e in W)
{
var neighbors = GetNeighbors(e.Item1);
// for each e_adj ∈ neighbourhood(e) at layer lc
foreach (var e_adj in neighbors)
{
// if eadj ∉ W
if (extendBuffer.Contains(e_adj) == false)
{
extendBuffer.Add(e_adj);
}
}
}
// W ← W ⋃ eadj
foreach (var id in extendBuffer)
{
W.Push(id, distance(id));
}
}
// Wd ← ∅ // queue for the discarded candidates
var Wd = new BinaryHeap();
// while │W│ > 0 and │R│< M
while (W.Count > 0 && R.Count < M)
{
// e ← extract nearest element from W to q
var (e, ed) = W.PopNearest();
var (fe, fd) = R.PopFarthest();
// if e is closer to q compared to any element from R
if (R.Count == 0 ||
ed < fd)
{
// R ← R ⋃ e
R.Push(e, ed);
}
else
{
// Wd ← Wd ⋃ e
Wd.Push(e, ed);
}
}
// if keepPrunedConnections // add some of the discarded // connections from Wd
if (_options.KeepPrunedConnections)
{
// while │Wd│> 0 and │R│< M
while (Wd.Count > 0 && R.Count < M)
{
// R ← R ⋃ extract nearest element from Wd to q
var nearest = Wd.PopNearest();
R.Push(nearest.Item1, nearest.Item2);
}
}
// return R
return R;
}
#endregion
private IEnumerable GetNeighbors(int id) => _links.FindLinksForId(id).Select(d => d.Item2);
public void Serialize(IBinaryWriter writer)
{
_links.Serialize(writer);
}
public void Deserialize(IBinaryReader reader)
{
_links.Deserialize(reader);
}
internal Histogram GetHistogram(HistogramMode mode) => _links.CalculateHistogram(mode);
}
}