using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Threading.Tasks; using ZeroLevel.Services.HashFunctions; using ZeroLevel.Services.PartitionStorage; namespace ZeroLevel.Sleopok.Engine.Services.Storage { public sealed class DataStorage { #region Private private class PositionDocScore { private float score = 0.0f; private int _last_position = -1; private int count = 0; public float GetScore(int total, bool exactMatch) { if (exactMatch) { return (count == total) ? 1.0f : 0f; } return (score / (float)total) * count; } public void Increase(int position) { if (position == 0) { score = 1.0f; } else { var diff = position - _last_position; score += 1.0f + 1.0f / diff; } _last_position = position; count++; } } private class DateSourceWriter : IPartitionDataWriter { private readonly IStorePartitionBuilder _builder; public DateSourceWriter(IStorePartitionBuilder builder) { _builder = builder; } public async Task Complete() { _builder.CompleteAdding(); _builder.Compress(); await _builder.RebuildIndex(); } public async Task Write(string host, string document) { await _builder.Store(host, document); } public long GetTotalRecords() => _builder.TotalRecords; public void Dispose() { _builder.Dispose(); } } #endregion private readonly IStore _store; public DataStorage(string rootFolder) { var serializers = new StoreSerializers( async (w, n) => await w.WriteStringAsync(n), async (w, n) => await w.WriteStringAsync(n), async (w, n) => await w.WriteBytesAsync(n), async (r) => { try { return new DeserializeResult(true, await r.ReadStringAsync()); } catch { return new DeserializeResult(false, string.Empty); } }, async (r) => { try { return new DeserializeResult(true, await r.ReadStringAsync()); } catch { return new DeserializeResult(false, string.Empty); } }, async (r) => { try { return new DeserializeResult(true, await r.ReadBytesAsync()); } catch { return new DeserializeResult(false, new byte[0]); } }); var options = new StoreOptions { Index = new IndexOptions { Enabled = true, StepType = IndexStepType.Step, StepValue = 32, EnableIndexInMemoryCachee = false }, RootFolder = rootFolder, FilePartition = new StoreFilePartition("Token hash", (token, _) => Math.Abs(StringHash.DotNetFullHash(token.ToLowerInvariant()) % 47).ToString()), MergeFunction = list => { return Compressor.Compress(list.OrderBy(c => c).ToArray()); }, Partitions = new List> { new StoreCatalogPartition("Field", m => m.Field) }, KeyComparer = (left, right) => string.Compare(left, right, true), ThreadSafeWriting = true }; _store = new Store(options, serializers); } public IPartitionDataWriter GetWriter(string field) { return new DateSourceWriter(_store.CreateBuilder(new StoreMetadata { Field = field })); } /// /// Поиск документов. /// /// Поле по которому производится поиск. /// Поисковый запрос. /// Множитель ранга для результата поиска. /// true - если искать только точные совпадения. /// Список идентификаторов документов с коэффициентом ранжирования. public async Task> GetDocuments(string field, string[] tokens, float boost, bool exactMatch) { var documents = new Dictionary(); var accessor = _store.CreateAccessor(new StoreMetadata { Field = field }); if (accessor != null) { using (accessor) { int step = 0; foreach (var token in tokens) { var sr = await accessor.Find(token.ToLowerInvariant()); if (sr.Success) { foreach (var doc in Compressor.DecompressToDocuments(sr.Value)) { if (false == documents.ContainsKey(doc)) { documents.Add(doc, new PositionDocScore()); } documents[doc].Increase(step); } } step++; } } } return documents.ToDictionary(d => d.Key, d => boost * d.Value.GetScore(tokens.Length, exactMatch)); } public async Task>> GetAllDocuments(string field) { var documents = new Dictionary>(); var accessor = _store.CreateAccessor(new StoreMetadata { Field = field }); if (accessor != null) { using (accessor) { await foreach (var data in accessor.Iterate()) { data.Deconstruct(out string key, out byte[] val); var docs = Compressor.DecompressToDocuments(val); if (documents.TryGetValue(key, out var documentsIds)) { documentsIds.AddRange(docs); } else { documents[key] = new List(docs); } } } } return documents; } public async Task Dump(string key, Stream stream) { using (TextWriter writer = new StreamWriter(stream)) { await foreach (var i in _store.Bypass(new StoreMetadata { Field = key })) { writer.WriteLine(i.Key); writer.WriteLine(string.Join(' ', Compressor.DecompressToDocuments(i.Value))); } } } public int HasData(string field) { var partition = _store.CreateAccessor(new StoreMetadata { Field = field }); if (partition != null) { using (partition) { return partition.CountDataFiles(); } } return 0; } } }