You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Zero/ZeroLevel.HNSW/Services/ReadOnly/ReadOnlyLayer.cs

317 lines
13 KiB

3 years ago
using System;
using System.Collections.Generic;
using System.Linq;
using ZeroLevel.Services.Serialization;
3 years ago
namespace ZeroLevel.HNSW
{
/// <summary>
/// NSW graph
/// </summary>
internal sealed class ReadOnlyLayer<TItem>
: IBinarySerializable
3 years ago
{
private readonly NSWReadOnlyOption<TItem> _options;
private readonly ReadOnlyVectorSet<TItem> _vectors;
private readonly ReadOnlyCompactBiDirectionalLinksSet _links;
/// <summary>
/// HNSW layer
/// </summary>
/// <param name="options">HNSW graph options</param>
/// <param name="vectors">General vector set</param>
internal ReadOnlyLayer(NSWReadOnlyOption<TItem> options, ReadOnlyVectorSet<TItem> vectors)
3 years ago
{
_options = options;
_vectors = vectors;
_links = new ReadOnlyCompactBiDirectionalLinksSet();
3 years ago
}
3 years ago
#region Implementation of https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf
/// <summary>
/// Algorithm 2
/// </summary>
/// <param name="q">query element</param>
/// <param name="ep">enter points ep</param>
/// <returns>Output: ef closest neighbors to q</returns>
internal void KNearestAtLayer(int entryPointId, Func<int, float> targetCosts, IDictionary<int, float> W, int ef)
3 years ago
{
/*
* v ep // set of visited elements
* C ep // set of candidates
* W ep // dynamic list of found nearest neighbors
* while C > 0
* c extract nearest element from C to q
* f get furthest element from W to q
* if distance(c, q) > distance(f, q)
* break // all elements in W are evaluated
* for each e neighbourhood(c) at layer lc // update C and W
* if e v
* v v e
* f get furthest element from W to q
* if distance(e, q) < distance(f, q) or W < ef
* C C e
* W W e
* if W > ef
* remove furthest element from W to q
* return W
*/
var v = new VisitedBitSet(_vectors.Count, 1);
3 years ago
// v ← ep // set of visited elements
v.Add(entryPointId);
3 years ago
// C ← ep // set of candidates
var C = new Dictionary<int, float>();
C.Add(entryPointId, targetCosts(entryPointId));
3 years ago
// W ← ep // dynamic list of found nearest neighbors
W.Add(entryPointId, C[entryPointId]);
var popCandidate = new Func<(int, float)>(() => { var pair = C.OrderBy(e => e.Value).First(); C.Remove(pair.Key); return (pair.Key, pair.Value); });
var fartherFromResult = new Func<(int, float)>(() => { var pair = W.OrderByDescending(e => e.Value).First(); return (pair.Key, pair.Value); });
var fartherPopFromResult = new Action(() => { var pair = W.OrderByDescending(e => e.Value).First(); W.Remove(pair.Key); });
// run bfs
3 years ago
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = popCandidate();
var farthestResult = fartherFromResult();
if (toExpand.Item2 > farthestResult.Item2)
3 years ago
{
// the closest candidate is farther than farthest result
3 years ago
break;
}
// expand candidate
var neighboursIds = GetNeighbors(toExpand.Item1).ToArray();
for (int i = 0; i < neighboursIds.Length; ++i)
3 years ago
{
int neighbourId = neighboursIds[i];
if (!v.Contains(neighbourId))
3 years ago
{
// enqueue perspective neighbours to expansion list
farthestResult = fartherFromResult();
var neighbourDistance = targetCosts(neighbourId);
if (W.Count < ef || neighbourDistance < farthestResult.Item2)
3 years ago
{
C.Add(neighbourId, neighbourDistance);
W.Add(neighbourId, neighbourDistance);
3 years ago
if (W.Count > ef)
{
fartherPopFromResult();
3 years ago
}
}
v.Add(neighbourId);
3 years ago
}
}
}
C.Clear();
v.Clear();
3 years ago
}
/// <summary>
/// Algorithm 2
/// </summary>
/// <param name="q">query element</param>
/// <param name="ep">enter points ep</param>
/// <returns>Output: ef closest neighbors to q</returns>
internal void KNearestAtLayer(int entryPointId, Func<int, float> targetCosts, IDictionary<int, float> W, int ef, HashSet<int> activeNodes)
{
/*
* v ep // set of visited elements
* C ep // set of candidates
* W ep // dynamic list of found nearest neighbors
* while C > 0
* c extract nearest element from C to q
* f get furthest element from W to q
* if distance(c, q) > distance(f, q)
* break // all elements in W are evaluated
* for each e neighbourhood(c) at layer lc // update C and W
* if e v
* v v e
* f get furthest element from W to q
* if distance(e, q) < distance(f, q) or W < ef
* C C e
* W W e
* if W > ef
* remove furthest element from W to q
* return W
*/
var v = new VisitedBitSet(_vectors.Count, 1);
// v ← ep // set of visited elements
v.Add(entryPointId);
// C ← ep // set of candidates
var C = new Dictionary<int, float>();
C.Add(entryPointId, targetCosts(entryPointId));
// W ← ep // dynamic list of found nearest neighbors
if (activeNodes.Contains(entryPointId))
{
W.Add(entryPointId, C[entryPointId]);
}
var popCandidate = new Func<(int, float)>(() => { var pair = C.OrderBy(e => e.Value).First(); C.Remove(pair.Key); return (pair.Key, pair.Value); });
var farthestDistance = new Func<float>(() => { var pair = W.OrderByDescending(e => e.Value).First(); return pair.Value; });
var fartherPopFromResult = new Action(() => { var pair = W.OrderByDescending(e => e.Value).First(); W.Remove(pair.Key); });
// run bfs
while (C.Count > 0)
{
// get next candidate to check and expand
var toExpand = popCandidate();
if (W.Count > 0)
{
if (toExpand.Item2 > farthestDistance())
{
// the closest candidate is farther than farthest result
break;
}
}
// expand candidate
var neighboursIds = GetNeighbors(toExpand.Item1).ToArray();
for (int i = 0; i < neighboursIds.Length; ++i)
{
int neighbourId = neighboursIds[i];
if (!v.Contains(neighbourId))
{
// enqueue perspective neighbours to expansion list
var neighbourDistance = targetCosts(neighbourId);
if (activeNodes.Contains(neighbourId))
{
if (W.Count < ef || (W.Count > 0 && neighbourDistance < farthestDistance()))
{
W.Add(neighbourId, neighbourDistance);
if (W.Count > ef)
{
fartherPopFromResult();
}
}
}
if (W.Count < ef)
{
C.Add(neighbourId, neighbourDistance);
}
v.Add(neighbourId);
}
}
}
C.Clear();
v.Clear();
}
3 years ago
/// <summary>
/// Algorithm 3
/// </summary>
internal IDictionary<int, float> SELECT_NEIGHBORS_SIMPLE(Func<int, float> distance, IDictionary<int, float> candidates, int M)
3 years ago
{
var bestN = M;
var W = new Dictionary<int, float>(candidates);
if (W.Count > bestN)
3 years ago
{
var popFarther = new Action(() => { var pair = W.OrderByDescending(e => e.Value).First(); W.Remove(pair.Key); });
while (W.Count > bestN)
{
popFarther();
}
3 years ago
}
// return M nearest elements from C to q
return W;
3 years ago
}
3 years ago
/// <summary>
/// Algorithm 4
/// </summary>
/// <param name="q">base element</param>
/// <param name="C">candidate elements</param>
/// <param name="extendCandidates">flag indicating whether or not to extend candidate list</param>
/// <param name="keepPrunedConnections">flag indicating whether or not to add discarded elements</param>
/// <returns>Output: M elements selected by the heuristic</returns>
internal IDictionary<int, float> SELECT_NEIGHBORS_HEURISTIC(Func<int, float> distance, IDictionary<int, float> candidates, int M)
3 years ago
{
// R ← ∅
var R = new Dictionary<int, float>();
// W ← C // working queue for the candidates
var W = new Dictionary<int, float>(candidates);
3 years ago
// if extendCandidates // extend candidates by their neighbors
if (_options.ExpandBestSelection)
3 years ago
{
var extendBuffer = new HashSet<int>();
3 years ago
// for each e ∈ C
foreach (var e in W)
3 years ago
{
var neighbors = GetNeighbors(e.Key);
3 years ago
// for each e_adj ∈ neighbourhood(e) at layer lc
foreach (var e_adj in neighbors)
3 years ago
{
// if eadj ∉ W
if (extendBuffer.Contains(e_adj) == false)
3 years ago
{
extendBuffer.Add(e_adj);
3 years ago
}
}
}
// W ← W eadj
foreach (var id in extendBuffer)
{
W[id] = distance(id);
}
3 years ago
}
3 years ago
// Wd ← ∅ // queue for the discarded candidates
var Wd = new Dictionary<int, float>();
var popCandidate = new Func<(int, float)>(() => { var pair = W.OrderBy(e => e.Value).First(); W.Remove(pair.Key); return (pair.Key, pair.Value); });
var fartherFromResult = new Func<(int, float)>(() => { if (R.Count == 0) return (-1, 0f); var pair = R.OrderByDescending(e => e.Value).First(); return (pair.Key, pair.Value); });
var popNearestDiscarded = new Func<(int, float)>(() => { var pair = Wd.OrderBy(e => e.Value).First(); Wd.Remove(pair.Key); return (pair.Key, pair.Value); });
3 years ago
// while │W│ > 0 and │R│< M
while (W.Count > 0 && R.Count < M)
3 years ago
{
// e ← extract nearest element from W to q
var (e, ed) = popCandidate();
var (fe, fd) = fartherFromResult();
3 years ago
// if e is closer to q compared to any element from R
if (R.Count == 0 ||
ed < fd)
3 years ago
{
// R ← R e
R.Add(e, ed);
}
else
3 years ago
{
// Wd ← Wd e
Wd.Add(e, ed);
}
}
// if keepPrunedConnections // add some of the discarded // connections from Wd
if (_options.KeepPrunedConnections)
{
// while │Wd│> 0 and │R│< M
while (Wd.Count > 0 && R.Count < M)
3 years ago
{
// R ← R extract nearest element from Wd to q
var nearest = popNearestDiscarded();
R[nearest.Item1] = nearest.Item2;
3 years ago
}
}
// return R
return R;
}
#endregion
private IEnumerable<int> GetNeighbors(int id) => _links.FindLinksForId(id);
public void Serialize(IBinaryWriter writer)
{
_links.Serialize(writer);
}
public void Deserialize(IBinaryReader reader)
{
_links.Deserialize(reader);
}
3 years ago
}
}

Powered by TurnKey Linux.