using System; using System.Collections.Generic; using System.Linq; namespace ZeroLevel.HNSW { /// /// NSW graph /// internal sealed class Layer { private readonly NSWOptions _options; private readonly VectorSet _vectors; private CompactBiDirectionalLinksSet _links = new CompactBiDirectionalLinksSet(); /// /// Count nodes at layer /// public int CountLinks => (_links.Count); public Layer(NSWOptions options, VectorSet vectors) { _options = options; _vectors = vectors; } public void AddBidirectionallConnectionts(int q, int p, float qpDistance, bool isMapLayer) { // поиск в ширину ближайших узлов к найденному var nearest = _links.FindLinksForId(p).ToArray(); // если у найденного узла максимальное количество связей // if │eConn│ > Mmax // shrink connections of e if (nearest.Length >= (isMapLayer ? _options.M * 2 : _options.M)) { // ищем связь с самой большой дистанцией float distance = nearest[0].Item3; int index = 0; for (int ni = 1; ni < nearest.Length; ni++) { if (nearest[ni].Item3 > distance) { index = ni; distance = nearest[ni].Item3; } } // делаем перелинковку вставляя новый узел между найденными var id1 = nearest[index].Item1; var id2 = nearest[index].Item2; _links.Relink(id1, id2, q, qpDistance, _options.Distance(_vectors[id2], _vectors[q])); } else { // добавляем связь нового узла к найденному _links.Add(q, p, qpDistance); } } public void Append(int q) { _links.Add(q, q, 0); } #region Implementation of https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf /// /// Algorithm 2 /// /// query element /// enter points ep /// Output: ef closest neighbors to q public void RunKnnAtLayer(int entryPointId, Func targetCosts, IDictionary W, int ef) { /* * v ← ep // set of visited elements * C ← ep // set of candidates * W ← ep // dynamic list of found nearest neighbors * while │C│ > 0 * c ← extract nearest element from C to q * f ← get furthest element from W to q * if distance(c, q) > distance(f, q) * break // all elements in W are evaluated * for each e ∈ neighbourhood(c) at layer lc // update C and W * if e ∉ v * v ← v ⋃ e * f ← get furthest element from W to q * if distance(e, q) < distance(f, q) or │W│ < ef * C ← C ⋃ e * W ← W ⋃ e * if │W│ > ef * remove furthest element from W to q * return W */ var v = new VisitedBitSet(_vectors.Count, _options.M); // v ← ep // set of visited elements v.Add(entryPointId); // C ← ep // set of candidates var C = new Dictionary(); C.Add(entryPointId, targetCosts(entryPointId)); // W ← ep // dynamic list of found nearest neighbors W.Add(entryPointId, C[entryPointId]); var popCandidate = new Func<(int, float)>(() => { var pair = C.OrderBy(e => e.Value).First(); C.Remove(pair.Key); return (pair.Key, pair.Value); }); var fartherFromResult = new Func<(int, float)>(() => { var pair = W.OrderByDescending(e => e.Value).First(); return (pair.Key, pair.Value); }); var fartherPopFromResult = new Action(() => { var pair = W.OrderByDescending(e => e.Value).First(); W.Remove(pair.Key); }); // run bfs while (C.Count > 0) { // get next candidate to check and expand var toExpand = popCandidate(); var farthestResult = fartherFromResult(); if (toExpand.Item2 > farthestResult.Item2) { // the closest candidate is farther than farthest result break; } // expand candidate var neighboursIds = GetNeighbors(toExpand.Item1).ToArray(); for (int i = 0; i < neighboursIds.Length; ++i) { int neighbourId = neighboursIds[i]; if (!v.Contains(neighbourId)) { // enqueue perspective neighbours to expansion list farthestResult = fartherFromResult(); var neighbourDistance = targetCosts(neighbourId); if (W.Count < ef || neighbourDistance < farthestResult.Item2) { C.Add(neighbourId, neighbourDistance); W.Add(neighbourId, neighbourDistance); if (W.Count > ef) { fartherPopFromResult(); } } v.Add(neighbourId); } } } C.Clear(); v.Clear(); } /// /// Algorithm 3 /// public IDictionary SELECT_NEIGHBORS_SIMPLE(Func distance, IDictionary candidates, int M) { var bestN = M; var W = new Dictionary(candidates); if (W.Count > bestN) { var popFarther = new Action(() => { var pair = W.OrderByDescending(e => e.Value).First(); W.Remove(pair.Key); }); while (W.Count > bestN) { popFarther(); } } // return M nearest elements from C to q return W; } /// /// Algorithm 4 /// /// base element /// candidate elements /// flag indicating whether or not to extend candidate list /// flag indicating whether or not to add discarded elements /// Output: M elements selected by the heuristic public IDictionary SELECT_NEIGHBORS_HEURISTIC(Func distance, IDictionary candidates, int M) { // R ← ∅ var R = new Dictionary(); // W ← C // working queue for the candidates var W = new Dictionary(candidates); // if extendCandidates // extend candidates by their neighbors if (_options.ExpandBestSelection) { var extendBuffer = new HashSet(); // for each e ∈ C foreach (var e in W) { var neighbors = GetNeighbors(e.Key); // for each e_adj ∈ neighbourhood(e) at layer lc foreach (var e_adj in neighbors) { // if eadj ∉ W if (extendBuffer.Contains(e_adj) == false) { extendBuffer.Add(e_adj); } } } // W ← W ⋃ eadj foreach (var id in extendBuffer) { W[id] = distance(id); } } // Wd ← ∅ // queue for the discarded candidates var Wd = new Dictionary(); var popCandidate = new Func<(int, float)>(() => { var pair = W.OrderBy(e => e.Value).First(); W.Remove(pair.Key); return (pair.Key, pair.Value); }); var fartherFromResult = new Func<(int, float)>(() => { if (R.Count == 0) return (-1, 0f); var pair = R.OrderByDescending(e => e.Value).First(); return (pair.Key, pair.Value); }); var popNearestDiscarded = new Func<(int, float)>(() => { var pair = Wd.OrderBy(e => e.Value).First(); Wd.Remove(pair.Key); return (pair.Key, pair.Value); }); // while │W│ > 0 and │R│< M while (W.Count > 0 && R.Count < M) { // e ← extract nearest element from W to q var (e, ed) = popCandidate(); var (fe, fd) = fartherFromResult(); // if e is closer to q compared to any element from R if (R.Count == 0 || ed < fd) { // R ← R ⋃ e R.Add(e, ed); } else { // Wd ← Wd ⋃ e Wd.Add(e, ed); } } // if keepPrunedConnections // add some of the discarded // connections from Wd if (_options.KeepPrunedConnections) { // while │Wd│> 0 and │R│< M while (Wd.Count > 0 && R.Count < M) { // R ← R ⋃ extract nearest element from Wd to q var nearest = popNearestDiscarded(); R[nearest.Item1] = nearest.Item2; } } // return R return R; } #endregion private IEnumerable GetNeighbors(int id) => _links.FindLinksForId(id).Select(d => d.Item2); } }