pull/1/head
a.bozhenov 5 years ago
parent e2e6d4748b
commit c16d456c6a

@ -1,4 +1,5 @@
using System; using System;
using System.Net;
using ZeroLevel; using ZeroLevel;
namespace TestApp namespace TestApp

@ -0,0 +1,34 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Xunit;
using ZeroLevel.Services.Semantic;
namespace ZeroLevel.UnitTests
{
public class SemanticTests
{
[Fact]
public void WordTokenizerTest()
{
// Arrange
var line = "Хорошее понимание проекций, отражений и векторных операций (как в истинном значении скалярного (dot) и векторного (cross) произведений векторов) обычно приходит с растущим чувством беспокойства при использованием тригонометрии. ";
var test = new string[] {
"хорошее", "понимание", "проекций", "отражений", "и"
, "векторных", "операций", "как", "в", "истинном"
, "значении", "скалярного","dot","и","векторного","cross","произведений"
,"векторов","обычно","приходит","с","растущим","чувством","беспокойства"
,"при","использованием", "тригонометрии"};
// Act
var terms = WordTokenizer.Tokenize(line).ToArray();
// Assert
Assert.True(test.Length == terms.Length);
for (int i = 0; i < terms.Length; i++)
{
Assert.True(string.CompareOrdinal(test[i], terms[i]) == 0);
}
}
}
}

@ -0,0 +1,36 @@
using System.Collections.Generic;
using ZeroLevel.Services.Semantic;
using ZeroLevel.Services.Serialization;
namespace ZeroLevel.Services.Semantic
{
public class LanguageDictionary
: IBinarySerializable
{
private Trie _words = new Trie();
public uint this[string word] => _words.Key(word) ?? 0;
public string Word(uint key) => _words.Word(key);
public IEnumerable<uint> Keys => _words.Keys;
public void Append(string word)
{
_words.Append(word.Normalize());
}
public void Deserialize(IBinaryReader reader)
{
this._words = reader.Read<Trie>();
}
public void Serialize(IBinaryWriter writer)
{
writer.Write(this._words);
}
public void ToggleReverseIndex(bool enabled)
{
_words.ToggleReverseIndex(enabled);
}
}
}

@ -12,7 +12,7 @@ namespace ZeroLevel.Services.Semantic
internal class TrieNode internal class TrieNode
: IBinarySerializable : IBinarySerializable
{ {
public char? Key; // settet only with rebuild index public char? Key; // setted only with rebuild index
public uint? Value; public uint? Value;
public TrieNode Parent; public TrieNode Parent;
public ConcurrentDictionary<char, TrieNode> Children; public ConcurrentDictionary<char, TrieNode> Children;

@ -0,0 +1,47 @@
using System;
using System.Collections.Generic;
using ZeroLevel.Services.Pools;
namespace ZeroLevel.Services.Semantic
{
public static class WordTokenizer
{
static ObjectPool<char[]> _pool = new ObjectPool<char[]>(() => new char[2048]);
public static IEnumerable<string> Tokenize(string text)
{
int index = 0;
bool first = true;
var buffer = _pool.Allocate();
try
{
for (int i = 0; i < text.Length; i++)
{
if (first && Char.IsLetter(text[i]))
{
first = false;
buffer[index++] = text[i];
}
else if (first == false && Char.IsLetterOrDigit(text[i]))
{
buffer[index++] = text[i];
}
else if (index > 0)
{
yield return new string(buffer, 0, index).ToLowerInvariant();
index = 0;
first = true;
}
}
if (index > 0)
{
yield return new string(buffer, 0, index).ToLowerInvariant();
}
}
finally
{
_pool.Free(buffer);
}
}
}
}

@ -0,0 +1,64 @@
using System.Collections.Concurrent;
using System.Threading;
using ZeroLevel.Services.Serialization;
namespace ZeroLevel.Services.Semantic
{
public class WordsDictionary
: IBinarySerializable
{
private ConcurrentDictionary<string, LanguageDictionary> _dicts = new ConcurrentDictionary<string, LanguageDictionary>();
private ReaderWriterLockSlim _lock = new ReaderWriterLockSlim();
public LanguageDictionary this[string lang]
{
get
{
if (_dicts.ContainsKey(lang) == false)
{
_lock.EnterWriteLock();
try
{
if (_dicts.ContainsKey(lang) == false)
{
_dicts[lang] = new LanguageDictionary();
}
}
finally
{
_lock.ExitWriteLock();
}
}
return _dicts[lang];
}
}
public void ToggleReverseIndex(bool enabled)
{
foreach (var pair in _dicts)
{
pair.Value.ToggleReverseIndex(enabled);
}
}
public void Deserialize(IBinaryReader reader)
{
int count = reader.ReadInt32();
this._dicts = new ConcurrentDictionary<string, LanguageDictionary>();
string key;
for (int i = 0; i < count; i++)
{
key = reader.ReadString();
this._dicts.TryAdd(key, reader.Read<LanguageDictionary>());
}
}
public void Serialize(IBinaryWriter writer)
{
writer.WriteInt32(_dicts.Count);
foreach (var pair in _dicts)
{
writer.WriteString(pair.Key);
writer.Write<LanguageDictionary>(pair.Value);
}
}
}
}
Loading…
Cancel
Save

Powered by TurnKey Linux.