diff --git a/ZeroLevel.UnitTests/TrieTests.cs b/ZeroLevel.UnitTests/TrieTests.cs new file mode 100644 index 0000000..b191a91 --- /dev/null +++ b/ZeroLevel.UnitTests/TrieTests.cs @@ -0,0 +1,93 @@ +using Xunit; +using ZeroLevel.Services.Semantic; +using ZeroLevel.Services.Serialization; + +namespace ZeroLevel.UnitTests +{ + public class TrieTests + { + [Fact] + public void MainTest() + { + // Arrange + var tree = new Trie(); + // Act + tree.Append("коллекция"); + tree.Append("коллектор"); + tree.Append("колл-центр"); + tree.Append("коллектив"); + tree.Append("коллегия"); + tree.Append("метро"); + tree.Append("метрополитен"); + tree.Append("метрополит"); + // Assert + Assert.True(tree.Key("коллекция") == 1); + Assert.True(tree.Key("коллектор") == 2); + Assert.True(tree.Key("колл-центр") == 3); + Assert.True(tree.Key("коллектив") == 4); + Assert.True(tree.Key("коллегия") == 5); + Assert.True(tree.Key("метро") == 6); + Assert.True(tree.Key("метрополитен") == 7); + Assert.True(tree.Key("метрополит") == 8); + + Assert.True(tree.Key("колл") == null); + Assert.True(tree.Key("центр") == null); + + Assert.True(tree.Contains("коллекция")); + Assert.True(tree.Contains("коллектор")); + Assert.True(tree.Contains("колл-центр")); + Assert.True(tree.Contains("коллектив")); + Assert.True(tree.Contains("коллегия")); + Assert.True(tree.Contains("метро")); + Assert.True(tree.Contains("метрополитен")); + Assert.True(tree.Contains("метрополит")); + + Assert.False(tree.Contains("колл")); + Assert.False(tree.Contains("коллег")); + } + + [Fact] + public void SerializationTest() + { + // Arrange + var tree_original = new Trie(); + // Act + tree_original.Append("коллекция"); + tree_original.Append("коллектор"); + tree_original.Append("колл-центр"); + tree_original.Append("коллектив"); + tree_original.Append("коллегия"); + tree_original.Append("метро"); + tree_original.Append("метрополитен"); + tree_original.Append("метрополит"); + + var data = MessageSerializer.Serialize(tree_original); + var tree = MessageSerializer.Deserialize(data); + + // Assert + Assert.True(tree.Key("коллекция") == 1); + Assert.True(tree.Key("коллектор") == 2); + Assert.True(tree.Key("колл-центр") == 3); + Assert.True(tree.Key("коллектив") == 4); + Assert.True(tree.Key("коллегия") == 5); + Assert.True(tree.Key("метро") == 6); + Assert.True(tree.Key("метрополитен") == 7); + Assert.True(tree.Key("метрополит") == 8); + + Assert.True(tree.Key("колл") == null); + Assert.True(tree.Key("центр") == null); + + Assert.True(tree.Contains("коллекция")); + Assert.True(tree.Contains("коллектор")); + Assert.True(tree.Contains("колл-центр")); + Assert.True(tree.Contains("коллектив")); + Assert.True(tree.Contains("коллегия")); + Assert.True(tree.Contains("метро")); + Assert.True(tree.Contains("метрополитен")); + Assert.True(tree.Contains("метрополит")); + + Assert.False(tree.Contains("колл")); + Assert.False(tree.Contains("коллег")); + } + } +} diff --git a/ZeroLevel/Services/Semantic/Trie.cs b/ZeroLevel/Services/Semantic/Trie.cs new file mode 100644 index 0000000..c6b9d45 --- /dev/null +++ b/ZeroLevel/Services/Semantic/Trie.cs @@ -0,0 +1,189 @@ +using System.Collections.Generic; +using ZeroLevel.Services.Serialization; + +namespace ZeroLevel.Services.Semantic +{ + public class Trie + : IBinarySerializable + { + private class TrieNode + : IBinarySerializable + { + public char Key; + public uint? Value; + public List Children; + + public void Deserialize(IBinaryReader reader) + { + this.Key = reader.ReadChar(); + if (reader.ReadBoolean()) + { + this.Value = reader.ReadUInt32(); + } + else + { + this.Value = null; + } + this.Children = reader.ReadCollection(); + } + + public void Serialize(IBinaryWriter writer) + { + writer.WriteChar(this.Key); + if (this.Value.HasValue) + { + writer.WriteBoolean(true); + writer.WriteUInt32(this.Value.Value); + } + else + { + writer.WriteBoolean(false); + } + if (this.Children == null) + { + writer.WriteInt32(0); + } + else + { + writer.WriteCollection(this.Children); + } + } + + internal void Append(string word, ref uint word_index, int index) + { + if (word.Length == index + 1) + { + if (!this.Value.HasValue) + { + this.Value = ++word_index; + } + } + else + { + if (this.Children == null) + { + this.Children = new List(); + } + bool found = false; + for (int i = 0; i < Children.Count; i++) + { + if (Children[i].Key == word[index]) + { + Children[i].Append(word, ref word_index, index + 1); + found = true; + } + } + if (!found) + { + var tn = new TrieNode { Key = word[index] }; + Children.Add(tn); + tn.Append(word, ref word_index, index + 1); + } + } + } + + internal uint? GetKey(string word, int index) + { + if (word.Length == index + 1) + { + return this.Value; + } + else + { + if (this.Children == null) + { + this.Children = new List(); + } + for (int i = 0; i < Children.Count; i++) + { + if (Children[i].Key == word[index]) + { + return Children[i].GetKey(word, index + 1); + } + } + } + return null; + } + } + + private List _roots; + private uint _word_index = 0; + + public Trie() + { + _roots = new List(); + } + + public void Append(string word) + { + if (word.Length == 0) return; + bool found = false; + for (int i = 0; i < _roots.Count; i++) + { + if (_roots[i].Key == word[0]) + { + _roots[i].Append(word, ref _word_index, 1); + found = true; + } + } + if (!found) + { + var tn = new TrieNode { Key = word[0] }; + _roots.Add(tn); + tn.Append(word, ref _word_index, 1); + } + } + + public uint? Key(string word) + { + if (word?.Length == 0) return null; + for (int i = 0; i < _roots.Count; i++) + { + if (_roots[i].Key == word[0]) + { + if (word.Length == 1) + { + return _roots[i].Value; + } + else + { + return _roots[i].GetKey(word, 1); + } + } + } + return null; + } + + public bool Contains(string word) + { + if (word?.Length == 0) return false; + for (int i = 0; i < _roots.Count; i++) + { + if (_roots[i].Key == word[0]) + { + if (word.Length == 1) + { + return _roots[i].Value.HasValue; + } + else + { + return _roots[i].GetKey(word, 1).HasValue; + } + } + } + return false; + } + + public void Serialize(IBinaryWriter writer) + { + writer.WriteUInt32(this._word_index); + writer.WriteCollection(this._roots); + } + + public void Deserialize(IBinaryReader reader) + { + this._word_index = reader.ReadUInt32(); + this._roots = reader.ReadCollection(); + } + } +} diff --git a/ZeroLevel/Services/Serialization/IBinaryReader.cs b/ZeroLevel/Services/Serialization/IBinaryReader.cs index ca2cf76..8235d38 100644 --- a/ZeroLevel/Services/Serialization/IBinaryReader.cs +++ b/ZeroLevel/Services/Serialization/IBinaryReader.cs @@ -8,6 +8,8 @@ namespace ZeroLevel.Services.Serialization { bool ReadBoolean(); + char ReadChar(); + byte ReadByte(); byte[] ReadBytes(); diff --git a/ZeroLevel/Services/Serialization/IBinaryWriter.cs b/ZeroLevel/Services/Serialization/IBinaryWriter.cs index 714d228..8d2609c 100644 --- a/ZeroLevel/Services/Serialization/IBinaryWriter.cs +++ b/ZeroLevel/Services/Serialization/IBinaryWriter.cs @@ -10,6 +10,8 @@ namespace ZeroLevel.Services.Serialization { void WriteBoolean(bool val); + void WriteChar(char val); + void WriteByte(byte val); void WriteBytes(byte[] val); diff --git a/ZeroLevel/Services/Serialization/MemoryStreamReader.cs b/ZeroLevel/Services/Serialization/MemoryStreamReader.cs index c2e2f33..65de261 100644 --- a/ZeroLevel/Services/Serialization/MemoryStreamReader.cs +++ b/ZeroLevel/Services/Serialization/MemoryStreamReader.cs @@ -55,6 +55,14 @@ namespace ZeroLevel.Services.Serialization return (byte)_stream.ReadByte(); } + public char ReadChar() + { + if (CheckOutOfRange(_stream, 1)) + throw new OutOfMemoryException("Array index out of bounds"); + var buffer = ReadBuffer(2); + return BitConverter.ToChar(buffer, 0); + } + /// /// Reading bytes /// diff --git a/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs b/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs index 6809d59..cc78595 100644 --- a/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs +++ b/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs @@ -45,6 +45,14 @@ namespace ZeroLevel.Services.Serialization _stream.WriteByte(val); } + /// + /// Write char (2 bytes) + /// + public void WriteChar(char val) + { + _stream.Write(BitConverter.GetBytes(val), 0, 2); + } + /// /// Write array bytes /// diff --git a/ZeroLevel/ZeroLevel.csproj b/ZeroLevel/ZeroLevel.csproj index 5f09a63..4cc8295 100644 --- a/ZeroLevel/ZeroLevel.csproj +++ b/ZeroLevel/ZeroLevel.csproj @@ -5,16 +5,17 @@ Infrastructure layer library ogoun ogoun - 3.0.0.5 - Fixes + 3.0.0.6 + Added char serialization +Added prefix tree (Trie) https://github.com/ogoun/Zero/wiki Copyright Ogoun 2019 https://opensource.org/licenses/MIT https://raw.githubusercontent.com/ogoun/Zero/master/zero.png https://github.com/ogoun/Zero GitHub - 3.0.5 - 3.0.0.5 + 3.0.6 + 3.0.0.6