From c9b9eadad68a251c3902a455982030112019a144 Mon Sep 17 00:00:00 2001 From: Ogoun Date: Sat, 18 Jan 2020 06:21:22 +0300 Subject: [PATCH] Fixes --- Lemmatization/Lemmatization.csproj | 16 + Lemmatization/Model/Sentence.cs | 7 + Lemmatization/Model/Token.cs | 8 + Lemmatization/Model/TokenType.cs | 35 ++ Lemmatization/Services/Adapters/LemmaLexer.cs | 21 + Lemmatization/Services/SpecTextReader.cs | 206 +++++++++ TFIDFbee/TFIDFbee.sln | 36 +- TFIDFbee/TFIDFbee/Program.cs | 362 ++++------------ TFIDFbee/TFIDFbee/Reader/IDocumentReader.cs | 11 + TFIDFbee/TFIDFbee/Reader/JsonByLineReader.cs | 130 ++++++ .../TFIDFbee/Reader/StateMachineReader.cs | 85 ++++ TFIDFbee/TFIDFbee/RecordParser.cs | 108 +++++ TFIDFbee/TFIDFbee/TFIDFbee.csproj | 9 +- .../Services/Semantic/Helpers/BagOfWords.cs | 14 +- .../Services/Serialization/IBinaryReader.cs | 58 +-- .../Services/Serialization/IBinaryWriter.cs | 50 ++- .../Serialization/MemoryStreamReader.cs | 391 +++++++++++++++--- .../Serialization/MemoryStreamWriter.cs | 299 ++++++++++++-- .../Serialization/MessageSerializer.cs | 246 ++++++++--- .../Serialization/PrimitiveTypeSerializer.cs | 198 +++++++-- 20 files changed, 1779 insertions(+), 511 deletions(-) create mode 100644 Lemmatization/Lemmatization.csproj create mode 100644 Lemmatization/Model/Sentence.cs create mode 100644 Lemmatization/Model/Token.cs create mode 100644 Lemmatization/Model/TokenType.cs create mode 100644 Lemmatization/Services/Adapters/LemmaLexer.cs create mode 100644 Lemmatization/Services/SpecTextReader.cs create mode 100644 TFIDFbee/TFIDFbee/Reader/IDocumentReader.cs create mode 100644 TFIDFbee/TFIDFbee/Reader/JsonByLineReader.cs create mode 100644 TFIDFbee/TFIDFbee/Reader/StateMachineReader.cs create mode 100644 TFIDFbee/TFIDFbee/RecordParser.cs diff --git a/Lemmatization/Lemmatization.csproj b/Lemmatization/Lemmatization.csproj new file mode 100644 index 0000000..9e12723 --- /dev/null +++ b/Lemmatization/Lemmatization.csproj @@ -0,0 +1,16 @@ + + + + netstandard2.0 + + + + + + + + + + + + diff --git a/Lemmatization/Model/Sentence.cs b/Lemmatization/Model/Sentence.cs new file mode 100644 index 0000000..bff2738 --- /dev/null +++ b/Lemmatization/Model/Sentence.cs @@ -0,0 +1,7 @@ +namespace Lemmatization +{ + public class Sentence + { + public Token[] Tokens; + } +} diff --git a/Lemmatization/Model/Token.cs b/Lemmatization/Model/Token.cs new file mode 100644 index 0000000..42a72f2 --- /dev/null +++ b/Lemmatization/Model/Token.cs @@ -0,0 +1,8 @@ +namespace Lemmatization +{ + public class Token + { + public TokenType Type; + public string Value; + } +} diff --git a/Lemmatization/Model/TokenType.cs b/Lemmatization/Model/TokenType.cs new file mode 100644 index 0000000..69740e6 --- /dev/null +++ b/Lemmatization/Model/TokenType.cs @@ -0,0 +1,35 @@ +namespace Lemmatization +{ + public enum TokenType + { + Unknown, + /// + /// Знак пукнтуации + /// + Punctuation, + /// + /// Аббревиатура + /// + Аbbreviation, + /// + /// Слово + /// + Word, + /// + /// Идентификатор (может содержать не только буквы) + /// + Identity, + /// + /// Число + /// + Number, + /// + /// Номер телефона + /// + PhoneNumber, + /// + /// Ссылка + /// + Link + } +} diff --git a/Lemmatization/Services/Adapters/LemmaLexer.cs b/Lemmatization/Services/Adapters/LemmaLexer.cs new file mode 100644 index 0000000..6bb9939 --- /dev/null +++ b/Lemmatization/Services/Adapters/LemmaLexer.cs @@ -0,0 +1,21 @@ +using LemmaSharp; +using ZeroLevel.Services.Semantic; + +namespace Lemmatization +{ + public class LemmaLexer + : ILexer + { + private readonly ILemmatizer _lemmatizer; + + public LemmaLexer() + { + _lemmatizer = new LemmatizerPrebuiltFull(LanguagePrebuilt.Russian); + } + + public string Lex(string word) + { + return _lemmatizer.Lemmatize(word.Trim().ToLowerInvariant()); + } + } +} diff --git a/Lemmatization/Services/SpecTextReader.cs b/Lemmatization/Services/SpecTextReader.cs new file mode 100644 index 0000000..9e1ef97 --- /dev/null +++ b/Lemmatization/Services/SpecTextReader.cs @@ -0,0 +1,206 @@ +using System; +using System.Collections.Generic; + +namespace Lemmatization +{ + public class SpecTextReader + { + private int _position; + private readonly string _template; + + public bool EOF => _position >= _template?.Length; + public bool StartPosition => _position == 0; + public bool LastPosition => _position == _template?.Length - 1; + public char Current => EOF ? char.MinValue : _template[_position]; + public char Next => EOF || LastPosition ? char.MinValue : _template[_position + 1]; + public char Preview => StartPosition ? char.MinValue : _template[_position - 1]; + + public SpecTextReader(string template) + { + _template = template; + _position = 0; + } + + public bool Move(int offset = 1) + { + if (EOF) return false; + if (LastPosition) { _position = _template.Length; return false; } + _position += offset; + if (_position >= _template.Length) + { + _position = _template.Length; + } + return true; + } + + public int SkipSpaces() + { + int count = 0; + while (EOF == false && char.IsWhiteSpace(Current)) { Move(); count++; } + return count; + } + + public void SkipBreaks() + { + while (EOF == false && char.IsWhiteSpace(Current)) Move(); + } + + public bool MoveBack() + { + _position = _position - 1; + if (_position < 0) + { + _position = 0; + return false; + } + return true; + } + + public int FindOffsetTo(char symbol) + { + if (_position == -1 || EOF || LastPosition) return -1; + var search_position = _position; + var sym = _template[search_position]; + while (search_position < _template.Length && false == sym.Equals(symbol)) + { + search_position++; + sym = _template[search_position]; + } + return sym.Equals(symbol) ? search_position - _position : -1; + } + + public bool Test(char sym, int offset = 0) + { + var index = _position + offset; + if (index < 0 || index >= _template.Length) return false; + return _template[index].Equals(sym); + } + + public string ReadIdentity() + { + string identity = string.Empty; + var offset = _position; + if (offset < _template.Length && char.IsLetter(_template[offset])) + { + var index = offset + 1; + while (index < _template.Length && (char.IsLetterOrDigit(_template[index]) || _template[index] == '_' || _template[index] == '-')) + index++; + identity = _template.Substring(offset, index - offset); + } + return identity.ToLowerInvariant(); + } + + public string ReadWord() + { + string identity = string.Empty; + var offset = _position; + if (offset < _template.Length && char.IsLetterOrDigit(_template[offset])) + { + var index = offset + 1; + while (index < _template.Length && char.IsLetterOrDigit(_template[index])) + index++; + identity = _template.Substring(offset, index - offset); + } + return identity; + } + + public static Token[] ParseToTokens(string line) + { + var list = new List(); + char[] buffer = new char[64]; + int count = 0; + + var add = new Action(ch => + { + buffer[count++] = ch; + if (buffer.Length == count) + { + // При нехватке места в буфере, расширяем в два раза место + var arr = new char[buffer.Length * 2]; + for (var k = 0; k < buffer.Length; k++) { arr[k] = buffer[k]; } + buffer = arr; + } + }); + + TokenType tt = TokenType.Unknown; + for (int i = 0; i < line.Length; i++) + { + if (char.IsLetter(line[i])) + { + if (tt == TokenType.Unknown) tt = TokenType.Word; + else if (tt == TokenType.Number) tt = TokenType.Identity; + add(line[i]); + } + else if (char.IsDigit(line[i])) + { + if (tt == TokenType.Unknown) tt = TokenType.Number; + else if (tt == TokenType.Word) tt = TokenType.Identity; + add(line[i]); + } + else if (char.IsWhiteSpace(line[i]) && tt != TokenType.Unknown) + { + if (count > 0) + { + list.Add(new Token { Type = tt, Value = new string(buffer, 0, count) }); + count = 0; + } + } + else + { + if (count > 0) + { + list.Add(new Token { Type = tt, Value = new string(buffer, 0, count) }); + count = 0; + } + if (char.IsWhiteSpace(line[i]) == false) + { + list.Add(new Token { Type = TokenType.Punctuation, Value = line[i].ToString() }); + } + } + } + if (count > 0) + { + list.Add(new Token { Type = tt, Value = new string(buffer, 0, count) }); + } + return list.ToArray(); + } + + public static IEnumerable ReadSentenses(string text) + { + if (false == string.IsNullOrEmpty(text)) + { + char[] buffer = new char[512]; + int count = 0; + + var add = new Action(ch => + { + buffer[count++] = ch; + if (buffer.Length == count) + { + // При нехватке места в буфере, расширяем в два раза место + var arr = new char[buffer.Length * 2]; + for (var k = 0; k < buffer.Length; k++) { arr[k] = buffer[k]; } + buffer = arr; + } + }); + + for (int i = 0; i < text.Length; i++) + { + switch (text[i]) + { + case '.': + if (count > 0) + { + yield return new Sentence { Tokens = ParseToTokens(new string(buffer, 0, count)) }; + count = 0; + } + break; + default: + add(text[i]); + break; + } + } + } + } + } +} diff --git a/TFIDFbee/TFIDFbee.sln b/TFIDFbee/TFIDFbee.sln index 2eca224..c7467e9 100644 --- a/TFIDFbee/TFIDFbee.sln +++ b/TFIDFbee/TFIDFbee.sln @@ -5,9 +5,9 @@ VisualStudioVersion = 16.0.29709.97 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TFIDFbee", "TFIDFbee\TFIDFbee.csproj", "{7B39E0A1-3DE4-4702-8D61-5C9A6CF164C6}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Lemmatization", "..\HoKeMs\Lemmatization\Lemmatization.csproj", "{BF9F7C1E-098B-4815-BA35-8A9845C66663}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Lemmatization", "..\Lemmatization\Lemmatization.csproj", "{98102DAA-F649-45FD-BBE9-F393DBF82275}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ZeroLevel", "..\..\GIT\Zero\ZeroLevel\ZeroLevel.csproj", "{5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ZeroLevel", "..\ZeroLevel\ZeroLevel.csproj", "{6AF46F95-EA67-4258-96B1-7BBC57EB965D}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -25,22 +25,22 @@ Global {7B39E0A1-3DE4-4702-8D61-5C9A6CF164C6}.Release|Any CPU.Build.0 = Release|Any CPU {7B39E0A1-3DE4-4702-8D61-5C9A6CF164C6}.Release|x64.ActiveCfg = Release|Any CPU {7B39E0A1-3DE4-4702-8D61-5C9A6CF164C6}.Release|x64.Build.0 = Release|Any CPU - {BF9F7C1E-098B-4815-BA35-8A9845C66663}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {BF9F7C1E-098B-4815-BA35-8A9845C66663}.Debug|Any CPU.Build.0 = Debug|Any CPU - {BF9F7C1E-098B-4815-BA35-8A9845C66663}.Debug|x64.ActiveCfg = Debug|x64 - {BF9F7C1E-098B-4815-BA35-8A9845C66663}.Debug|x64.Build.0 = Debug|x64 - {BF9F7C1E-098B-4815-BA35-8A9845C66663}.Release|Any CPU.ActiveCfg = Release|Any CPU - {BF9F7C1E-098B-4815-BA35-8A9845C66663}.Release|Any CPU.Build.0 = Release|Any CPU - {BF9F7C1E-098B-4815-BA35-8A9845C66663}.Release|x64.ActiveCfg = Release|Any CPU - {BF9F7C1E-098B-4815-BA35-8A9845C66663}.Release|x64.Build.0 = Release|Any CPU - {5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Debug|Any CPU.Build.0 = Debug|Any CPU - {5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Debug|x64.ActiveCfg = Debug|x64 - {5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Debug|x64.Build.0 = Debug|x64 - {5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Release|Any CPU.ActiveCfg = Release|Any CPU - {5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Release|Any CPU.Build.0 = Release|Any CPU - {5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Release|x64.ActiveCfg = Release|x64 - {5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Release|x64.Build.0 = Release|x64 + {98102DAA-F649-45FD-BBE9-F393DBF82275}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {98102DAA-F649-45FD-BBE9-F393DBF82275}.Debug|Any CPU.Build.0 = Debug|Any CPU + {98102DAA-F649-45FD-BBE9-F393DBF82275}.Debug|x64.ActiveCfg = Debug|Any CPU + {98102DAA-F649-45FD-BBE9-F393DBF82275}.Debug|x64.Build.0 = Debug|Any CPU + {98102DAA-F649-45FD-BBE9-F393DBF82275}.Release|Any CPU.ActiveCfg = Release|Any CPU + {98102DAA-F649-45FD-BBE9-F393DBF82275}.Release|Any CPU.Build.0 = Release|Any CPU + {98102DAA-F649-45FD-BBE9-F393DBF82275}.Release|x64.ActiveCfg = Release|Any CPU + {98102DAA-F649-45FD-BBE9-F393DBF82275}.Release|x64.Build.0 = Release|Any CPU + {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Debug|Any CPU.Build.0 = Debug|Any CPU + {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Debug|x64.ActiveCfg = Debug|x64 + {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Debug|x64.Build.0 = Debug|x64 + {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Release|Any CPU.ActiveCfg = Release|Any CPU + {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Release|Any CPU.Build.0 = Release|Any CPU + {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Release|x64.ActiveCfg = Release|x64 + {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/TFIDFbee/TFIDFbee/Program.cs b/TFIDFbee/TFIDFbee/Program.cs index 61becb0..e47aa19 100644 --- a/TFIDFbee/TFIDFbee/Program.cs +++ b/TFIDFbee/TFIDFbee/Program.cs @@ -1,63 +1,103 @@ -using Accord.MachineLearning; -using Lemmatization; +using Lemmatization; using System; using System.Collections.Generic; using System.IO; using System.Linq; -using System.Text; +using TFIDFbee.Reader; using ZeroLevel; using ZeroLevel.Services.Semantic; using ZeroLevel.Services.Semantic.Helpers; +using ZeroLevel.Services.Serialization; namespace TFIDFbee { class Program { - private const string source = @"D:\Desktop\lenta-ru-data-set_19990901_20171204.json"; + private const string source = @"E:\Desktop\lenta-ru-data-set_19990901_20171204\lenta-ru-data-set_19990901_20171204.json"; private readonly static ILexProvider _lexer = new LexProvider(new LemmaLexer()); static void Main(string[] args) { + Log.AddConsoleLogger(ZeroLevel.Logging.LogLevel.FullDebug); Configuration.Save(Configuration.ReadFromApplicationConfig()); - /*var codebook = new TFIDF() + IDocumentReader reader = new StateMachineReader(source, s => ExtractLemmas(s)); + + BagOfWords codebook; + if (File.Exists("model.bin")) { - Tf = TermFrequency.Log, - Idf = InverseDocumentFrequency.Default, - UpdateDictionary = true - };*/ - var codebook = new ZeroLevel.Services.Semantic.Helpers.BagOfWords(); - foreach (var batch in ParseBatches(1000)) + Log.Info("Load model from file"); + using (var stream = new FileStream("model.bin", FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) + { + codebook = MessageSerializer.Deserialize(stream); + } + } + else { - codebook.Learn(batch); - Console.WriteLine($"Documents: {codebook.NumberOfDocuments}"); - Console.WriteLine($"Words: {codebook.NumberOfWords}"); + Log.Info("Create and train model"); + codebook = new BagOfWords(); + foreach (var batch in reader.ReadBatches(1000)) + { + codebook.Learn(batch); + Log.Info($"\r\n\tDocuments: {codebook.NumberOfDocuments}\r\n\tWords: {codebook.NumberOfWords}"); + } + using (var stream = new FileStream("model.bin", FileMode.Create, FileAccess.Write, FileShare.ReadWrite)) + { + MessageSerializer.Serialize(stream, codebook); + } } - var vectors = new List(); - foreach (var docs in ReadRawDocumentBatches(1000)) + Log.Info("Build document vectors"); + List vectors; + if (File.Exists("vectors.bin")) { - foreach (var doc in docs) + Log.Info("Load vectors from file"); + using (var stream = new FileStream("vectors.bin", FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) + { + vectors = MessageSerializer.DeserializeCompatible>(stream); + } + } + else + { + Log.Info("Create vectors"); + vectors = new List(); + foreach (var docs in reader.ReadRawDocumentBatches(1000)) + { + foreach (var doc in docs) + { + var words = _lexer.ExtractLexTokens(doc.Item2).Select(t => t.Token).Concat(_lexer.ExtractLexTokens(doc.Item1).Select(t => t.Token)).ToArray(); + vectors.Add(codebook.Transform(words)); + } + } + using (var stream = new FileStream("vectors.bin", FileMode.Create, FileAccess.Write, FileShare.ReadWrite)) { - var words = _lexer.ExtractLexTokens(doc.Item2).Select(t => t.Token)/*.Concat(_lexer.ExtractLexTokens(doc.Text).Select(t => t.Token))*/.ToArray(); - vectors.Add(codebook.Transform(words)); + MessageSerializer.SerializeCompatible>(stream, vectors); } } + Log.Info("Find similar documents"); var list = new List>(); + long total_count = (vectors.Count * vectors.Count); + long count = 0; for (int i = 0; i < vectors.Count; i++) { for (int j = i + 1; j < vectors.Count - 1; j++) { + count++; + if (count % 100000 == 0) + { + Log.Info($"Progress: {(int)(count * 100.0d / (double)total_count)} %.\tFound similars: {list.Count}."); + } if (i == j) continue; var diff = vectors[i].Measure(vectors[j]); - if (diff > double.Epsilon) + if (diff > 0.885d) { list.Add(Tuple.Create(diff, i, j)); } } } - var to_present = list.OrderBy(e => e.Item1).Take(200).ToArray(); + Log.Info("Prepare to show similar documents"); + var to_present = list.OrderBy(e => e.Item1).Take(2000).ToArray(); var to_present_map = new Dictionary>(); foreach (var e in to_present) { @@ -72,7 +112,7 @@ namespace TFIDFbee } int index = 0; - foreach (var docs in ReadRawDocumentBatches(1000)) + foreach (var docs in reader.ReadRawDocumentBatches(1000)) { foreach (var doc in docs) { @@ -84,14 +124,22 @@ namespace TFIDFbee } } + Log.Info("Show similar documents"); index = 0; - foreach (var e in to_present) + using (var output = new StreamWriter("out.txt")) { - Console.WriteLine($"#{index++}: {e.Item1}"); - Console.WriteLine(to_present_map[e.Item2].Item1); - Console.WriteLine(to_present_map[e.Item3].Item2); - Console.WriteLine("--------------------"); - Console.WriteLine(); + foreach (var e in to_present) + { + output.WriteLine($"#{index++}: {e.Item1}"); + output.WriteLine("-------------1--------------"); + output.WriteLine(to_present_map[e.Item2].Item1); + output.WriteLine(to_present_map[e.Item2].Item2); + output.WriteLine("-------------2--------------"); + output.WriteLine(to_present_map[e.Item3].Item1); + output.WriteLine(to_present_map[e.Item3].Item2); + output.WriteLine("#############################"); + output.WriteLine(); + } } Console.WriteLine("Completed"); @@ -105,263 +153,5 @@ namespace TFIDFbee .Select(t => t.Token) .Where(s => s.Any(c => char.IsLetter(c))); } - - public static IEnumerable ReadBatches(int size) - { - var list = new List(); - foreach (var batch in ReadDocumentBatches(size)) - { - yield return batch.ToArray(); - list.Clear(); - } - } - - public static IEnumerable> ReadDocumentBatches(int size) - { - string line; - var batch = new List(); - string title = null; - string text = null; - using (StreamReader reader = new StreamReader(source)) - { - while ((line = reader.ReadLine()) != null) - { - var titleIndex = line.IndexOf("\"metaTitle\":"); - if (titleIndex >= 0) - { - var start = line.IndexOf("\"", titleIndex + 12); - var end = line.LastIndexOf("\""); - if (start < end && start != -1 && end != -1) - { - title = line.Substring(start + 1, end - start - 1); - } - } - else - { - var textIndex = line.IndexOf("\"plaintext\":"); - if (textIndex >= 0 && title != null) - { - var start = line.IndexOf("\"", textIndex + 12); - var end = line.LastIndexOf("\""); - if (start < end && start != -1 && end != -1) - { - text = line.Substring(start + 1, end - start - 1); - batch.Add(ExtractLemmas(title).Concat(ExtractLemmas(text)).ToArray()); - if (batch.Count >= size) - { - yield return batch; - batch.Clear(); - GC.Collect(2); - } - title = null; - text = null; - } - } - } - } - } - if (batch.Count > 0) - { - yield return batch; - } - } - - public static IEnumerable>> ReadRawDocumentBatches(int size) - { - string line; - var batch = new List>(); - string title = null; - string text = null; - using (StreamReader reader = new StreamReader(source)) - { - while ((line = reader.ReadLine()) != null) - { - var titleIndex = line.IndexOf("\"metaTitle\":"); - if (titleIndex >= 0) - { - var start = line.IndexOf("\"", titleIndex + 12); - var end = line.LastIndexOf("\""); - if (start < end && start != -1 && end != -1) - { - title = line.Substring(start + 1, end - start - 1); - } - } - else - { - var textIndex = line.IndexOf("\"plaintext\":"); - if (textIndex >= 0 && title != null) - { - var start = line.IndexOf("\"", textIndex + 12); - var end = line.LastIndexOf("\""); - if (start < end && start != -1 && end != -1) - { - text = line.Substring(start + 1, end - start - 1); - batch.Add(Tuple.Create(title, text)); - if (batch.Count >= size) - { - yield return batch; - batch.Clear(); - GC.Collect(2); - } - title = null; - text = null; - } - } - } - } - } - if (batch.Count > 0) - { - yield return batch; - } - } - - private class RecordParser - { - private enum RPState - { - WaitKey, - ParseKey, - WaitKeyConfirm, - WaitValue, - ParseValue - } - private readonly StringBuilder _builder = new StringBuilder(); - private RPState State = RPState.WaitKey; - private char _previous = '\0'; - private string _key; - private string _value; - private readonly Action _callback; - - public RecordParser(Action callback) - { - _callback = callback; - } - - public void Append(string text) - { - foreach (var ch in text) - { - switch (State) - { - case RPState.WaitKey: - if (ch.Equals('"')) - { - State = RPState.ParseKey; - _builder.Clear(); - } - break; - case RPState.ParseKey: - if (ch.Equals('"') && _previous != '\\') - { - if (_builder.Length > 0) - { - State = RPState.WaitKeyConfirm; - } - else - { - State = RPState.WaitKey; - } - } - else - { - _builder.Append(ch); - } - break; - case RPState.WaitKeyConfirm: - if (ch.Equals(':')) - { - _key = _builder.ToString(); - State = RPState.WaitValue; - } - else if (ch == ' ' || ch == '\r' || ch == '\n') - { - // nothing - } - else - { - State = RPState.WaitKey; - } - break; - case RPState.WaitValue: - if (ch.Equals('"')) - { - State = RPState.ParseValue; - _builder.Clear(); - } - else if (ch == ' ' || ch == '\r' || ch == '\n') - { - // nothing - } - else - { - State = RPState.WaitKey; - } - break; - case RPState.ParseValue: - if (ch.Equals('"') && _previous != '\\') - { - if (_builder.Length > 0) - { - _value = _builder.ToString(); - _callback(_key, _value); - } - State = RPState.WaitKey; - } - else - { - _builder.Append(ch); - } - break; - } - _previous = ch; - } - } - } - - public static IEnumerable ParseBatches(int size) - { - var list = new List(); - foreach (var record in Parse()) - { - list.Add(record); - if (list.Count > size) - { - yield return list.ToArray(); - list.Clear(); - } - } - if (list.Count > 0) - { - yield return list.ToArray(); - } - } - - public static IEnumerable Parse() - { - var result = new string[2]; - var parser = new RecordParser((k, v) => - { - switch (k) - { - case "metaTitle": result[0] = v; break; - case "plaintext": result[1] = v; break; - } - }); - char[] buffer = new char[16536]; - int count = 0; - using (StreamReader reader = new StreamReader(source)) - { - count = reader.Read(buffer, 0, buffer.Length); - parser.Append(new string(buffer, 0, count)); - - if (!string.IsNullOrEmpty(result[0]) && !string.IsNullOrEmpty(result[1])) - { - yield return result; - result[0] = null; - result[1] = null; - } - } - } } } diff --git a/TFIDFbee/TFIDFbee/Reader/IDocumentReader.cs b/TFIDFbee/TFIDFbee/Reader/IDocumentReader.cs new file mode 100644 index 0000000..cb8f587 --- /dev/null +++ b/TFIDFbee/TFIDFbee/Reader/IDocumentReader.cs @@ -0,0 +1,11 @@ +using System; +using System.Collections.Generic; + +namespace TFIDFbee.Reader +{ + public interface IDocumentReader + { + IEnumerable ReadBatches(int size); + public IEnumerable>> ReadRawDocumentBatches(int size); + } +} diff --git a/TFIDFbee/TFIDFbee/Reader/JsonByLineReader.cs b/TFIDFbee/TFIDFbee/Reader/JsonByLineReader.cs new file mode 100644 index 0000000..de89910 --- /dev/null +++ b/TFIDFbee/TFIDFbee/Reader/JsonByLineReader.cs @@ -0,0 +1,130 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; + +namespace TFIDFbee.Reader +{ + public class JsonByLineReader + : IDocumentReader + { + private readonly string _file; + private readonly Func> _lexer; + + public JsonByLineReader(string file, Func> lexer) + { + _file = file; + _lexer = lexer; + } + + public IEnumerable ReadBatches(int size) + { + var list = new List(); + foreach (var batch in ReadDocumentBatches(size)) + { + yield return batch.ToArray(); + list.Clear(); + } + } + + private IEnumerable> ReadDocumentBatches(int size) + { + string line; + var batch = new List(); + string title = null; + string text = null; + using (StreamReader reader = new StreamReader(_file)) + { + while ((line = reader.ReadLine()) != null) + { + var titleIndex = line.IndexOf("\"metaTitle\":"); + if (titleIndex >= 0) + { + var start = line.IndexOf("\"", titleIndex + 12); + var end = line.LastIndexOf("\""); + if (start < end && start != -1 && end != -1) + { + title = line.Substring(start + 1, end - start - 1); + } + } + else + { + var textIndex = line.IndexOf("\"plaintext\":"); + if (textIndex >= 0 && title != null) + { + var start = line.IndexOf("\"", textIndex + 12); + var end = line.LastIndexOf("\""); + if (start < end && start != -1 && end != -1) + { + text = line.Substring(start + 1, end - start - 1); + batch.Add(_lexer(title).Concat(_lexer(text)).ToArray()); + if (batch.Count >= size) + { + yield return batch; + batch.Clear(); + GC.Collect(2); + } + title = null; + text = null; + } + } + } + } + } + if (batch.Count > 0) + { + yield return batch; + } + } + + public IEnumerable>> ReadRawDocumentBatches(int size) + { + string line; + var batch = new List>(); + string title = null; + string text = null; + using (StreamReader reader = new StreamReader(_file)) + { + while ((line = reader.ReadLine()) != null) + { + var titleIndex = line.IndexOf("\"metaTitle\":"); + if (titleIndex >= 0) + { + var start = line.IndexOf("\"", titleIndex + 12); + var end = line.LastIndexOf("\""); + if (start < end && start != -1 && end != -1) + { + title = line.Substring(start + 1, end - start - 1); + } + } + else + { + var textIndex = line.IndexOf("\"plaintext\":"); + if (textIndex >= 0 && title != null) + { + var start = line.IndexOf("\"", textIndex + 12); + var end = line.LastIndexOf("\""); + if (start < end && start != -1 && end != -1) + { + text = line.Substring(start + 1, end - start - 1); + batch.Add(Tuple.Create(title, text)); + if (batch.Count >= size) + { + yield return batch; + batch.Clear(); + GC.Collect(2); + } + title = null; + text = null; + } + } + } + } + } + if (batch.Count > 0) + { + yield return batch; + } + } + } +} diff --git a/TFIDFbee/TFIDFbee/Reader/StateMachineReader.cs b/TFIDFbee/TFIDFbee/Reader/StateMachineReader.cs new file mode 100644 index 0000000..c7385ed --- /dev/null +++ b/TFIDFbee/TFIDFbee/Reader/StateMachineReader.cs @@ -0,0 +1,85 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; + +namespace TFIDFbee.Reader +{ + public class StateMachineReader + : IDocumentReader + { + private readonly string _file; + private readonly Func> _lexer; + + public StateMachineReader(string file, Func> lexer) + { + _file = file; + _lexer = lexer; + } + + private IEnumerable Parse() + { + var result = new string[2]; + var parser = new RecordParser((k, v) => + { + switch (k) + { + case "metaTitle": result[0] = v; break; + case "plaintext": result[1] = v; break; + } + }); + char[] buffer = new char[16536]; + int count = 0; + using (StreamReader reader = new StreamReader(_file)) + { + while ((count = reader.Read(buffer, 0, buffer.Length)) > 0) + { + parser.Append(new string(buffer, 0, count)); + + if (!string.IsNullOrEmpty(result[0]) && !string.IsNullOrEmpty(result[1])) + { + yield return result; + result[0] = null; + result[1] = null; + } + } + } + } + + public IEnumerable ReadBatches(int size) + { + var list = new List(); + foreach (var record in Parse()) + { + list.Add((_lexer(record[0]).Concat(_lexer(record[1])).ToArray())); + if (list.Count > size) + { + yield return list.ToArray(); + list.Clear(); + } + } + if (list.Count > 0) + { + yield return list.ToArray(); + } + } + + public IEnumerable>> ReadRawDocumentBatches(int size) + { + var list = new List>(); + foreach (var record in Parse()) + { + list.Add(Tuple.Create(record[0], record[1])); + if (list.Count > size) + { + yield return list.ToArray(); + list.Clear(); + } + } + if (list.Count > 0) + { + yield return list.ToArray(); + } + } + } +} diff --git a/TFIDFbee/TFIDFbee/RecordParser.cs b/TFIDFbee/TFIDFbee/RecordParser.cs new file mode 100644 index 0000000..65d7374 --- /dev/null +++ b/TFIDFbee/TFIDFbee/RecordParser.cs @@ -0,0 +1,108 @@ +using System; +using System.Text; + +namespace TFIDFbee +{ + public class RecordParser + { + private enum RPState + { + WaitKey, + ParseKey, + WaitKeyConfirm, + WaitValue, + ParseValue + } + private readonly StringBuilder _builder = new StringBuilder(); + private RPState State = RPState.WaitKey; + private char _previous = '\0'; + private string _key; + private string _value; + private readonly Action _callback; + + public RecordParser(Action callback) + { + _callback = callback; + } + + public void Append(string text) + { + foreach (var ch in text) + { + switch (State) + { + case RPState.WaitKey: + if (ch.Equals('"')) + { + State = RPState.ParseKey; + _builder.Clear(); + } + break; + case RPState.ParseKey: + if (ch.Equals('"') && _previous != '\\') + { + if (_builder.Length > 0) + { + State = RPState.WaitKeyConfirm; + } + else + { + State = RPState.WaitKey; + } + } + else + { + _builder.Append(ch); + } + break; + case RPState.WaitKeyConfirm: + if (ch.Equals(':')) + { + _key = _builder.ToString(); + State = RPState.WaitValue; + } + else if (ch == ' ' || ch == '\r' || ch == '\n') + { + // nothing + } + else + { + State = RPState.WaitKey; + } + break; + case RPState.WaitValue: + if (ch.Equals('"')) + { + State = RPState.ParseValue; + _builder.Clear(); + } + else if (ch == ' ' || ch == '\r' || ch == '\n') + { + // nothing + } + else + { + State = RPState.WaitKey; + } + break; + case RPState.ParseValue: + if (ch.Equals('"') && _previous != '\\') + { + if (_builder.Length > 0) + { + _value = _builder.ToString(); + _callback(_key, _value); + } + State = RPState.WaitKey; + } + else + { + _builder.Append(ch); + } + break; + } + _previous = ch; + } + } + } +} diff --git a/TFIDFbee/TFIDFbee/TFIDFbee.csproj b/TFIDFbee/TFIDFbee/TFIDFbee.csproj index dc03126..dad7995 100644 --- a/TFIDFbee/TFIDFbee/TFIDFbee.csproj +++ b/TFIDFbee/TFIDFbee/TFIDFbee.csproj @@ -7,13 +7,8 @@ - - - - - - - + + diff --git a/ZeroLevel/Services/Semantic/Helpers/BagOfWords.cs b/ZeroLevel/Services/Semantic/Helpers/BagOfWords.cs index 4d1ac3c..6fd4ecc 100644 --- a/ZeroLevel/Services/Semantic/Helpers/BagOfWords.cs +++ b/ZeroLevel/Services/Semantic/Helpers/BagOfWords.cs @@ -11,14 +11,16 @@ namespace ZeroLevel.Services.Semantic.Helpers public class BagOfWords : IBinarySerializable { - private readonly ConcurrentDictionary _words = - new ConcurrentDictionary(); + private ConcurrentDictionary _words; int _words_count = -1; long _number_of_documents = 0; public long NumberOfDocuments => _number_of_documents; public int NumberOfWords => _words.Count; + public BagOfWords() => + _words = new ConcurrentDictionary(); + /// /// Набор документов, слова в документе должны быть лемматизированы/стеммированы, и быть уникальными /// @@ -85,12 +87,16 @@ namespace ZeroLevel.Services.Semantic.Helpers public void Deserialize(IBinaryReader reader) { - throw new NotImplementedException(); + this._number_of_documents = reader.ReadLong(); + this._words_count = reader.ReadInt32(); + this._words = reader.ReadDictionaryAsConcurrent(); } public void Serialize(IBinaryWriter writer) { - throw new NotImplementedException(); + writer.WriteLong(this._number_of_documents); + writer.WriteInt32(this._words_count); + writer.WriteDictionary(this._words); } } } diff --git a/ZeroLevel/Services/Serialization/IBinaryReader.cs b/ZeroLevel/Services/Serialization/IBinaryReader.cs index d1f4226..1a90a7a 100644 --- a/ZeroLevel/Services/Serialization/IBinaryReader.cs +++ b/ZeroLevel/Services/Serialization/IBinaryReader.cs @@ -48,52 +48,60 @@ namespace ZeroLevel.Services.Serialization #region Extensions - T Read() where T : IBinarySerializable; - - T ReadCompatible(); - + #region Arrays + T[] ReadArray() where T : IBinarySerializable, new(); + string[] ReadStringArray(); + IPAddress[] ReadIPArray(); + IPEndPoint[] ReadIPEndPointArray(); + Guid[] ReadGuidArray(); + DateTime[] ReadDateTimeArray(); + Int64[] ReadInt64Array(); + Int32[] ReadInt32Array(); + UInt64[] ReadUInt64Array(); + UInt32[] ReadUInt32Array(); + char[] ReadCharArray(); + short[] ReadShortArray(); + ushort[] ReadUShortArray(); + float[] ReadFloatArray(); + Double[] ReadDoubleArray(); + bool[] ReadBooleanArray(); + byte[] ReadByteArray(); + byte[][] ReadByteArrayArray(); + decimal[] ReadDecimalArray(); + TimeSpan[] ReadTimeSpanArray(); + #endregion + + #region Collections List ReadCollection() where T : IBinarySerializable, new(); - - Dictionary ReadDictionary(); - - ConcurrentDictionary ReadDictionaryAsConcurrent(); - List ReadStringCollection(); - List ReadGuidCollection(); - List ReadDateTimeCollection(); List ReadCharCollection(); List ReadInt64Collection(); - List ReadInt32Collection(); - List ReadDoubleCollection(); - List ReadDecimalCollection(); - List ReadTimeSpanCollection(); - List ReadFloatCollection(); - List ReadBooleanCollection(); - List ReadByteCollection(); - List ReadByteArrayCollection(); - List ReadIPCollection(); - List ReadIPEndPointCollection(); - List ReadUInt64Collection(); - List ReadUInt32Collection(); - List ReadShortCollection(); - List ReadUShortCollection(); + #endregion + + T Read() where T : IBinarySerializable; + + T ReadCompatible(); + + Dictionary ReadDictionary(); + ConcurrentDictionary ReadDictionaryAsConcurrent(); + #endregion Extensions Stream Stream { get; } diff --git a/ZeroLevel/Services/Serialization/IBinaryWriter.cs b/ZeroLevel/Services/Serialization/IBinaryWriter.cs index d92764f..2e405e8 100644 --- a/ZeroLevel/Services/Serialization/IBinaryWriter.cs +++ b/ZeroLevel/Services/Serialization/IBinaryWriter.cs @@ -49,49 +49,55 @@ namespace ZeroLevel.Services.Serialization #region Extensions + #region Arrays + void WriteArray(T[] array) where T : IBinarySerializable; + void WriteArray(string[] array); + void WriteArray(IPAddress[] array); + void WriteArray(IPEndPoint[] array); + void WriteArray(Guid[] array); + void WriteArray(DateTime[] array); + void WriteArray(UInt64[] array); + void WriteArray(UInt32[] array); + void WriteArray(char[] array); + void WriteArray(short[] array); + void WriteArray(ushort[] array); + void WriteArray(Int64[] array); + void WriteArray(Int32[] array); + void WriteArray(float[] array); + void WriteArray(Double[] array); + void WriteArray(bool[] array); + void WriteArray(byte[] array); + void WriteArray(byte[][] array); + void WriteArray(decimal[] array); + void WriteArray(TimeSpan[] array); + #endregion + + #region Collections void WriteCollection(IEnumerable collection) where T : IBinarySerializable; - - void WriteDictionary(Dictionary collection); - void WriteDictionary(ConcurrentDictionary collection); - - void WriteCollection(IEnumerable collection); void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); - void WriteCollection(IEnumerable collection); + #endregion + + void WriteDictionary(Dictionary collection); + void WriteDictionary(ConcurrentDictionary collection); void Write(T item) where T : IBinarySerializable; diff --git a/ZeroLevel/Services/Serialization/MemoryStreamReader.cs b/ZeroLevel/Services/Serialization/MemoryStreamReader.cs index 456a4a5..aaf6a56 100644 --- a/ZeroLevel/Services/Serialization/MemoryStreamReader.cs +++ b/ZeroLevel/Services/Serialization/MemoryStreamReader.cs @@ -224,6 +224,7 @@ namespace ZeroLevel.Services.Serialization #region Extensions + #region Collections public List ReadCollection() where T : IBinarySerializable, new() { @@ -241,57 +242,6 @@ namespace ZeroLevel.Services.Serialization return collection; } - - public Dictionary ReadDictionary() - { - int count = ReadInt32(); - var collection = new Dictionary(count); - if (count > 0) - { - TKey key; - TValue value; - for (int i = 0; i < count; i++) - { - key = ReadCompatible(); - value = ReadCompatible(); - collection.Add(key, value); - } - } - return collection; - } - - public ConcurrentDictionary ReadDictionaryAsConcurrent() - { - int count = ReadInt32(); - var collection = new ConcurrentDictionary(); - if (count > 0) - { - TKey key; - TValue value; - for (int i = 0; i < count; i++) - { - key = ReadCompatible(); - value = ReadCompatible(); - collection.TryAdd(key, value); - } - } - return collection; - } - - public T ReadCompatible() - { - return MessageSerializer.DeserializeCompatible(this); - } - - public T Read() where T : IBinarySerializable - { - byte type = ReadByte(); - if (type == 0) return default(T); - var item = (T)Activator.CreateInstance(); - item.Deserialize(this); - return item; - } - public List ReadStringCollection() { int count = ReadInt32(); @@ -417,6 +367,7 @@ namespace ZeroLevel.Services.Serialization } return collection; } + public List ReadCharCollection() { int count = ReadInt32(); @@ -430,6 +381,7 @@ namespace ZeroLevel.Services.Serialization } return collection; } + public List ReadShortCollection() { int count = ReadInt32(); @@ -555,7 +507,344 @@ namespace ZeroLevel.Services.Serialization } return collection; } + #endregion + + #region Arrays + public T[] ReadArray() + where T : IBinarySerializable, new() + { + int count = ReadInt32(); + var array = new T[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + var item = new T(); + item.Deserialize(this); + array[i] = item; + } + } + return array; + } + + public string[] ReadStringArray() + { + int count = ReadInt32(); + var array = new string[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadString(); + } + } + return array; + } + + public IPAddress[] ReadIPArray() + { + int count = ReadInt32(); + var array = new IPAddress[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadIP(); + } + } + return array; + } + + public IPEndPoint[] ReadIPEndPointArray() + { + int count = ReadInt32(); + var array = new IPEndPoint[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadIPEndpoint(); + } + } + return array; + } + + public Guid[] ReadGuidArray() + { + int count = ReadInt32(); + var array = new Guid[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadGuid(); + } + } + return array; + } + + public DateTime[] ReadDateTimeArray() + { + int count = ReadInt32(); + var array = new DateTime[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = (ReadDateTime() ?? DateTime.MinValue); + } + } + return array; + } + public Int64[] ReadInt64Array() + { + int count = ReadInt32(); + var array = new Int64[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadLong(); + } + } + return array; + } + + public Int32[] ReadInt32Array() + { + int count = ReadInt32(); + var array = new Int32[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadInt32(); + } + } + return array; + } + + public UInt64[] ReadUInt64Array() + { + int count = ReadInt32(); + var array = new UInt64[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadULong(); + } + } + return array; + } + + public UInt32[] ReadUInt32Array() + { + int count = ReadInt32(); + var array = new UInt32[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadUInt32(); + } + } + return array; + } + + public char[] ReadCharArray() + { + int count = ReadInt32(); + var array = new char[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadChar(); + } + } + return array; + } + + public short[] ReadShortArray() + { + int count = ReadInt32(); + var array = new short[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadShort(); + } + } + return array; + } + + public ushort[] ReadUShortArray() + { + int count = ReadInt32(); + var array = new ushort[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadUShort(); + } + } + return array; + } + + public float[] ReadFloatArray() + { + int count = ReadInt32(); + var array = new float[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadFloat(); + } + } + return array; + } + + public Double[] ReadDoubleArray() + { + int count = ReadInt32(); + var array = new Double[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadDouble(); + } + } + return array; + } + + public bool[] ReadBooleanArray() + { + int count = ReadInt32(); + var array = new bool[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadBoolean(); + } + } + return array; + } + + public byte[] ReadByteArray() + { + int count = ReadInt32(); + var array = new byte[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadByte(); + } + } + return array; + } + + public byte[][] ReadByteArrayArray() + { + int count = ReadInt32(); + var array = new byte[count][]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadBytes(); + } + } + return array; + } + + public decimal[] ReadDecimalArray() + { + int count = ReadInt32(); + var array = new decimal[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadDecimal(); + } + } + return array; + } + + public TimeSpan[] ReadTimeSpanArray() + { + int count = ReadInt32(); + var array = new TimeSpan[count]; + if (count > 0) + { + for (int i = 0; i < count; i++) + { + array[i] = ReadTimeSpan(); + } + } + return array; + } + #endregion + + + + public Dictionary ReadDictionary() + { + int count = ReadInt32(); + var collection = new Dictionary(count); + if (count > 0) + { + TKey key; + TValue value; + for (int i = 0; i < count; i++) + { + key = ReadCompatible(); + value = ReadCompatible(); + collection.Add(key, value); + } + } + return collection; + } + + public ConcurrentDictionary ReadDictionaryAsConcurrent() + { + int count = ReadInt32(); + var collection = new ConcurrentDictionary(); + if (count > 0) + { + TKey key; + TValue value; + for (int i = 0; i < count; i++) + { + key = ReadCompatible(); + value = ReadCompatible(); + collection.TryAdd(key, value); + } + } + return collection; + } + + public T ReadCompatible() + { + return MessageSerializer.DeserializeCompatible(this); + } + + public T Read() where T : IBinarySerializable + { + byte type = ReadByte(); + if (type == 0) return default(T); + var item = (T)Activator.CreateInstance(); + item.Deserialize(this); + return item; + } #endregion Extensions public void Dispose() diff --git a/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs b/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs index 95b211a..1884220 100644 --- a/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs +++ b/ZeroLevel/Services/Serialization/MemoryStreamWriter.cs @@ -249,20 +249,7 @@ namespace ZeroLevel.Services.Serialization #region Extension - public void Write(T item) - where T : IBinarySerializable - { - if (item != null) - { - WriteByte(1); - item.Serialize(this); - } - else - { - WriteByte(0); - } - } - + #region Collections public void WriteCollection(IEnumerable collection) where T : IBinarySerializable { @@ -371,6 +358,7 @@ namespace ZeroLevel.Services.Serialization } } } + public void WriteCollection(IEnumerable collection) { WriteInt32(collection?.Count() ?? 0); @@ -479,12 +467,6 @@ namespace ZeroLevel.Services.Serialization } } - public void WriteCompatible(T item) - { - var buffer = MessageSerializer.SerializeCompatible(item); - _stream.Write(buffer, 0, buffer.Length); - } - public void WriteCollection(IEnumerable collection) { WriteInt32(collection?.Count() ?? 0); @@ -497,21 +479,283 @@ namespace ZeroLevel.Services.Serialization } } - - public void WriteDictionary(Dictionary collection) + public void WriteCollection(IEnumerable collection) { WriteInt32(collection?.Count() ?? 0); if (collection != null) { foreach (var item in collection) { - WriteCompatible(item.Key); - WriteCompatible(item.Value); + WriteTimeSpan(item); } } } + #endregion - public void WriteDictionary(ConcurrentDictionary collection) + #region Arrays + public void WriteArray(T[] array) + where T : IBinarySerializable + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + array[i].Serialize(this); + } + } + } + + public void WriteArray(string[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteString(array[i]); + } + } + } + + public void WriteArray(IPAddress[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteIP(array[i]); + } + } + } + + public void WriteArray(IPEndPoint[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteIPEndpoint(array[i]); + } + } + } + + public void WriteArray(Guid[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteGuid(array[i]); + } + } + } + + public void WriteArray(DateTime[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteDateTime(array[i]); + } + } + } + + public void WriteArray(UInt64[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteULong(array[i]); + } + } + } + + public void WriteArray(UInt32[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteUInt32(array[i]); + } + } + } + + public void WriteArray(char[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteChar(array[i]); + } + } + } + + public void WriteArray(short[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteShort(array[i]); + } + } + } + + public void WriteArray(ushort[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteUShort(array[i]); + } + } + } + + public void WriteArray(Int64[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteLong(array[i]); + } + } + } + + public void WriteArray(Int32[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteInt32(array[i]); + } + } + } + + public void WriteArray(float[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteFloat(array[i]); + } + } + } + + public void WriteArray(Double[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteDouble(array[i]); + } + } + } + + public void WriteArray(bool[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteBoolean(array[i]); + } + } + } + + public void WriteArray(byte[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteByte(array[i]); + } + } + } + + public void WriteArray(byte[][] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteBytes(array[i]); + } + } + } + + public void WriteArray(decimal[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteDecimal(array[i]); + } + } + } + + public void WriteArray(TimeSpan[] array) + { + WriteInt32(array?.Length ?? 0); + if (array != null) + { + for (int i = 0; i < array.Length; i++) + { + WriteTimeSpan(array[i]); + } + } + } + #endregion + + public void WriteCompatible(T item) + { + var buffer = MessageSerializer.SerializeCompatible(item); + _stream.Write(buffer, 0, buffer.Length); + } + + public void Write(T item) + where T : IBinarySerializable + { + if (item != null) + { + WriteByte(1); + item.Serialize(this); + } + else + { + WriteByte(0); + } + } + + public void WriteDictionary(Dictionary collection) { WriteInt32(collection?.Count() ?? 0); if (collection != null) @@ -524,14 +768,15 @@ namespace ZeroLevel.Services.Serialization } } - public void WriteCollection(IEnumerable collection) + public void WriteDictionary(ConcurrentDictionary collection) { WriteInt32(collection?.Count() ?? 0); if (collection != null) { foreach (var item in collection) { - WriteTimeSpan(item); + WriteCompatible(item.Key); + WriteCompatible(item.Value); } } } diff --git a/ZeroLevel/Services/Serialization/MessageSerializer.cs b/ZeroLevel/Services/Serialization/MessageSerializer.cs index 8669c3f..57880ba 100644 --- a/ZeroLevel/Services/Serialization/MessageSerializer.cs +++ b/ZeroLevel/Services/Serialization/MessageSerializer.cs @@ -1,58 +1,12 @@ using System; using System.Collections.Concurrent; using System.Collections.Generic; +using System.IO; namespace ZeroLevel.Services.Serialization { public static class MessageSerializer { - public static T Deserialize(byte[] data) - where T : IBinarySerializable - { - if (data == null || data.Length == 0) return default(T); - using (var reader = new MemoryStreamReader(data)) - { - var result = Activator.CreateInstance(); - result.Deserialize(reader); - return result; - } - } - - public static object Deserialize(Type type, byte[] data) - { - if (data == null || data.Length == 0) return null; - using (var reader = new MemoryStreamReader(data)) - { - var result = (IBinarySerializable)Activator.CreateInstance(type); - result.Deserialize(reader); - return result; - } - } - - public static List DeserializeCollection(byte[] data) - where T : IBinarySerializable - { - List collection = null; - if (data != null && data.Length > 0) - { - using (var reader = new MemoryStreamReader(data)) - { - int count = reader.ReadInt32(); - collection = new List(count); - if (count > 0) - { - for (int i = 0; i < count; i++) - { - var item = Activator.CreateInstance(); - item.Deserialize(reader); - collection.Add(item); - } - } - } - } - return collection; - } - public static byte[] Serialize(T obj) where T : IBinarySerializable { @@ -119,6 +73,53 @@ namespace ZeroLevel.Services.Serialization } } + public static T Deserialize(byte[] data) + where T : IBinarySerializable + { + if (data == null || data.Length == 0) return default(T); + using (var reader = new MemoryStreamReader(data)) + { + var result = Activator.CreateInstance(); + result.Deserialize(reader); + return result; + } + } + + public static object Deserialize(Type type, byte[] data) + { + if (data == null || data.Length == 0) return null; + using (var reader = new MemoryStreamReader(data)) + { + var result = (IBinarySerializable)Activator.CreateInstance(type); + result.Deserialize(reader); + return result; + } + } + + public static List DeserializeCollection(byte[] data) + where T : IBinarySerializable + { + List collection = null; + if (data != null && data.Length > 0) + { + using (var reader = new MemoryStreamReader(data)) + { + int count = reader.ReadInt32(); + collection = new List(count); + if (count > 0) + { + for (int i = 0; i < count; i++) + { + var item = Activator.CreateInstance(); + item.Deserialize(reader); + collection.Add(item); + } + } + } + } + return collection; + } + public static T DeserializeCompatible(byte[] data) { if (data == null || data.Length == 0) return default(T); @@ -206,5 +207,156 @@ namespace ZeroLevel.Services.Serialization } } } + + #region Stream + public static void Serialize(Stream stream, T obj) + where T : IBinarySerializable + { + if (obj == null) return; + using (var writer = new MemoryStreamWriter(stream)) + { + obj.Serialize(writer); + } + } + + public static void Serialize(Stream stream, IEnumerable items) + where T : IBinarySerializable + { + if (items == null) return; + using (var writer = new MemoryStreamWriter(stream)) + { + writer.WriteCollection(items); + } + } + + public static void SerializeCompatible(Stream stream, object obj) + { + if (null == obj) + { + return; + } + var direct_seriazlizable = (obj as IBinarySerializable); + if (direct_seriazlizable != null) + { + using (var writer = new MemoryStreamWriter(stream)) + { + direct_seriazlizable.Serialize(writer); + } + } + else + { + using (var writer = new MemoryStreamWriter(stream)) + { + PrimitiveTypeSerializer.Serialize(writer, obj); + } + } + } + + public static void SerializeCompatible(Stream stream, T obj) + { + if (null == obj) + { + return; + } + var direct_seriazlizable = (obj as IBinarySerializable); + if (direct_seriazlizable != null) + { + using (var writer = new MemoryStreamWriter(stream)) + { + direct_seriazlizable.Serialize(writer); + } + } + else + { + using (var writer = new MemoryStreamWriter(stream)) + { + PrimitiveTypeSerializer.Serialize(writer, obj); + } + } + } + + public static T Deserialize(Stream stream) + where T : IBinarySerializable + { + if (stream == null) return default(T); + using (var reader = new MemoryStreamReader(stream)) + { + var result = Activator.CreateInstance(); + result.Deserialize(reader); + return result; + } + } + + public static object Deserialize(Type type, Stream stream) + { + if (stream == null) return null; + using (var reader = new MemoryStreamReader(stream)) + { + var result = (IBinarySerializable)Activator.CreateInstance(type); + result.Deserialize(reader); + return result; + } + } + + public static List DeserializeCollection(Stream stream) + where T : IBinarySerializable + { + List collection = null; + if (stream != null) + { + using (var reader = new MemoryStreamReader(stream)) + { + int count = reader.ReadInt32(); + collection = new List(count); + if (count > 0) + { + for (int i = 0; i < count; i++) + { + var item = Activator.CreateInstance(); + item.Deserialize(reader); + collection.Add(item); + } + } + } + } + return collection; + } + + public static T DeserializeCompatible(Stream stream) + { + if (stream == null) return default(T); + if (typeof(IBinarySerializable).IsAssignableFrom(typeof(T))) + { + using (var reader = new MemoryStreamReader(stream)) + { + var direct = (IBinarySerializable)Activator.CreateInstance(); + direct.Deserialize(reader); + return (T)direct; + } + } + using (var reader = new MemoryStreamReader(stream)) + { + return PrimitiveTypeSerializer.Deserialize(reader); + } + } + + public static object DeserializeCompatible(Type type, Stream stream) + { + if (stream == null) return null; + if (typeof(IBinarySerializable).IsAssignableFrom(type)) + { + using (var reader = new MemoryStreamReader(stream)) + { + var direct = (IBinarySerializable)Activator.CreateInstance(type); + direct.Deserialize(reader); + return direct; + } + } + using (var reader = new MemoryStreamReader(stream)) + { + return PrimitiveTypeSerializer.Deserialize(reader, type); + } + } + #endregion } } \ No newline at end of file diff --git a/ZeroLevel/Services/Serialization/PrimitiveTypeSerializer.cs b/ZeroLevel/Services/Serialization/PrimitiveTypeSerializer.cs index 36beba0..f655971 100644 --- a/ZeroLevel/Services/Serialization/PrimitiveTypeSerializer.cs +++ b/ZeroLevel/Services/Serialization/PrimitiveTypeSerializer.cs @@ -46,6 +46,7 @@ namespace ZeroLevel.Services.Serialization } private readonly static Dictionary _cachee = new Dictionary(); private readonly static Dictionary _enumTypesCachee = new Dictionary(); + private readonly static Dictionary _arrayTypesCachee = new Dictionary(); private static void PreloadCachee() { @@ -69,6 +70,25 @@ namespace ZeroLevel.Services.Serialization _cachee.Add(typeof(IPEndPoint), Create()); _cachee.Add(typeof(IPAddress), Create()); + _cachee.Add(typeof(char[]), Create()); + _cachee.Add(typeof(Boolean[]), Create()); + _cachee.Add(typeof(Byte[][]), Create()); + _cachee.Add(typeof(Int32[]), Create()); + _cachee.Add(typeof(UInt32[]), Create()); + _cachee.Add(typeof(Int64[]), Create()); + _cachee.Add(typeof(UInt64[]), Create()); + _cachee.Add(typeof(Double[]), Create()); + _cachee.Add(typeof(float[]), Create()); + _cachee.Add(typeof(short[]), Create()); + _cachee.Add(typeof(ushort[]), Create()); + _cachee.Add(typeof(Decimal[]), Create()); + _cachee.Add(typeof(DateTime[]), Create()); + _cachee.Add(typeof(Guid[]), Create()); + _cachee.Add(typeof(String[]), Create()); + _cachee.Add(typeof(TimeSpan[]), Create()); + _cachee.Add(typeof(IPEndPoint[]), Create()); + _cachee.Add(typeof(IPAddress[]), Create()); + _cachee.Add(typeof(IEnumerable), Create>()); _cachee.Add(typeof(IEnumerable), Create>()); _cachee.Add(typeof(IEnumerable), Create>()); @@ -89,6 +109,25 @@ namespace ZeroLevel.Services.Serialization _cachee.Add(typeof(IEnumerable), Create>()); _cachee.Add(typeof(IEnumerable), Create>()); + _arrayTypesCachee.Add(typeof(char), typeof(char[])); + _arrayTypesCachee.Add(typeof(Boolean), typeof(Boolean[])); + _arrayTypesCachee.Add(typeof(Byte[]), typeof(Byte[][])); + _arrayTypesCachee.Add(typeof(Int32), typeof(Int32[])); + _arrayTypesCachee.Add(typeof(UInt32), typeof(UInt32[])); + _arrayTypesCachee.Add(typeof(Int64), typeof(Int64[])); + _arrayTypesCachee.Add(typeof(UInt64), typeof(UInt64[])); + _arrayTypesCachee.Add(typeof(Double), typeof(Double[])); + _arrayTypesCachee.Add(typeof(float), typeof(float[])); + _arrayTypesCachee.Add(typeof(short), typeof(short[])); + _arrayTypesCachee.Add(typeof(ushort), typeof(ushort[])); + _arrayTypesCachee.Add(typeof(Decimal), typeof(Decimal[])); + _arrayTypesCachee.Add(typeof(DateTime), typeof(DateTime[])); + _arrayTypesCachee.Add(typeof(Guid), typeof(Guid[])); + _arrayTypesCachee.Add(typeof(String), typeof(String[])); + _arrayTypesCachee.Add(typeof(TimeSpan), typeof(TimeSpan[])); + _arrayTypesCachee.Add(typeof(IPEndPoint), typeof(IPEndPoint[])); + _arrayTypesCachee.Add(typeof(IPAddress), typeof(IPAddress[])); + _enumTypesCachee.Add(typeof(char), typeof(IEnumerable)); _enumTypesCachee.Add(typeof(Boolean), typeof(IEnumerable)); _enumTypesCachee.Add(typeof(Byte), typeof(IEnumerable)); @@ -210,102 +249,200 @@ namespace ZeroLevel.Services.Serialization wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), "WriteTimeSpan").First(); } // + // Arrays + // + else if (type == typeof(Int32[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadInt32Array").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(char[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadCharArray").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(UInt32[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadUInt32Array").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(Boolean[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadBooleanArray").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(Byte[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadByteArray").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(Byte[][])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadByteArrayArray").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(DateTime[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadDateTimeArray").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(Double[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadDoubleArray").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(float[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadFloatArray").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(Guid[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadGuidArray").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(IPAddress[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadIPArray").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(IPEndPoint[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadIPEndPointArray").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(Int64[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadInt64Array").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(UInt64[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadUInt64Array").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(Int16[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadShortArray").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(UInt16[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadUShortArray").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(String[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadStringArray").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(Decimal[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadDecimalArray").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + else if (type == typeof(TimeSpan[])) + { + wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadTimeSpanArray").First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate()).First(); + } + // // Collections // else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadInt32Collection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadCharCollection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadUInt32Collection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadBooleanCollection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadByteCollection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadByteArrayCollection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadDateTimeCollection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadDoubleCollection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadFloatCollection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadGuidCollection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadIPCollection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadIPEndPointCollection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadInt64Collection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadUInt64Collection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadShortCollection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadUShortCollection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadStringCollection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadDecimalCollection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } else if (type == typeof(IEnumerable)) { wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadTimeSpanCollection").First(); - wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate()).First(); + wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate()).First(); } // // Not supported @@ -317,13 +454,20 @@ namespace ZeroLevel.Services.Serialization return wrapper; } - private static Func CreatePredicate() + private static Func CreateCollectionPredicate() { var typeArg = typeof(T).GetGenericArguments().First(); return mi => mi.Name.Equals("WriteCollection", StringComparison.Ordinal) && mi.GetParameters().First().ParameterType.GetGenericArguments().First().IsAssignableFrom(typeArg); } + private static Func CreateArrayPredicate() + { + var typeArg = typeof(T).GetElementType(); + return mi => mi.Name.Equals("WriteArray", StringComparison.Ordinal) && + mi.GetParameters().First().ParameterType.GetElementType().IsAssignableFrom(typeArg); + } + private readonly static Dictionary _concrete_type_cachee = new Dictionary(); private readonly static object _concrete_type_cachee_locker = new object(); @@ -348,25 +492,31 @@ namespace ZeroLevel.Services.Serialization else if (TypeHelpers.IsAssignableToGenericType(type, typeof(IEnumerable<>))) { Type elementType; + var dict = _enumTypesCachee; + var writeName = "WriteCollection"; + var readName = "ReadCollection"; if (TypeHelpers.IsArray(type)) { elementType = type.GetElementType(); + dict = _arrayTypesCachee; + writeName = "WriteArray"; + readName = "ReadArray"; } else { elementType = type.GetGenericArguments().First(); } - if (_enumTypesCachee.ContainsKey(elementType)) + if (dict.ContainsKey(elementType)) { - _concrete_type_cachee[type] = _cachee[_enumTypesCachee[elementType]]; + _concrete_type_cachee[type] = _cachee[dict[elementType]]; } else if (typeof(IBinarySerializable).IsAssignableFrom(elementType)) { var wrapper = new Wrapper { Invoker = InvokeWrapper.Create() }; - wrapper.ReadId = wrapper.Invoker.ConfigureGeneric(typeof(MemoryStreamReader), elementType, "ReadCollection").First(); + wrapper.ReadId = wrapper.Invoker.ConfigureGeneric(typeof(MemoryStreamReader), elementType, readName).First(); wrapper.WriteId = wrapper.Invoker.ConfigureGeneric(typeof(MemoryStreamWriter), elementType, - mi => mi.Name.Equals("WriteCollection") && mi.IsGenericMethod).First(); + mi => mi.Name.Equals(writeName) && mi.IsGenericMethod).First(); _concrete_type_cachee[type] = wrapper; } }