You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Zero/ZeroLevel.Sleopok.Engine/Services/Indexes/IndexBuilder.cs

80 lines
2.6 KiB

using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using ZeroLevel.Implementation.Semantic.Helpers;
using ZeroLevel.Sleopok.Engine.Models;
using ZeroLevel.Sleopok.Engine.Services.Storage;
namespace ZeroLevel.Sleopok.Engine.Services.Indexes
{
internal sealed class IndexBuilder<T>
: IIndexBuilder<T>
{
private static char[] _separators = new char[] { ',', ' ', '.', '?', '!', '\\', '/', '+', '&' };
private readonly DataStorage _storage;
private readonly IndexInfo<T> _indexInfo;
private readonly Dictionary<string, IPartitionDataWriter> Indexers = new Dictionary<string, IPartitionDataWriter>();
public IndexBuilder(DataStorage storage, IndexInfo<T> indexInfo)
{
_storage = storage;
_indexInfo = indexInfo;
foreach (var field in indexInfo.Fields)
{
Indexers[field.Name] = _storage.GetWriter(field.Name);
}
}
public async Task Complete()
{
foreach (var i in Indexers)
{
await i.Value.Complete();
i.Value.Dispose();
}
}
private static IEnumerable<string> Preprocess(string value)
{
if (string.IsNullOrWhiteSpace(value) == false)
{
return TextAnalizer.ExtractWords(value).Select(w=>w.ToLowerInvariant());
}
return Enumerable.Empty<string>();
}
public async Task Write(IEnumerable<T> batch)
{
foreach (var doc in batch)
{
var doc_id = _indexInfo.GetId(doc);
foreach (var field in _indexInfo.Fields)
{
if (field.FieldType == SleoFieldType.Array)
{
// TO DO OPTIMIZATION
// Если поле уже хранит массив элементов, считать каждый элемент токеном
}
else
{
var value = field.Getter(doc!)?.ToString() ?? string.Empty;
if (string.IsNullOrWhiteSpace(value) == false)
{
foreach (var t in Preprocess(value))
{
await Indexers[field.Name].Write(t, doc_id);
}
}
}
}
}
}
public void Dispose()
{
Complete().Wait();
}
}
}

Powered by TurnKey Linux.