pull/1/head
Ogoun 5 years ago
parent ed1983b715
commit c9b9eadad6

@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="LemmaSharp-std" Version="1.0.1" />
<PackageReference Include="LemmaSharpPreBuilt-std" Version="1.0.1" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\ZeroLevel\ZeroLevel.csproj" />
</ItemGroup>
</Project>

@ -0,0 +1,7 @@
namespace Lemmatization
{
public class Sentence
{
public Token[] Tokens;
}
}

@ -0,0 +1,8 @@
namespace Lemmatization
{
public class Token
{
public TokenType Type;
public string Value;
}
}

@ -0,0 +1,35 @@
namespace Lemmatization
{
public enum TokenType
{
Unknown,
/// <summary>
/// Знак пукнтуации
/// </summary>
Punctuation,
/// <summary>
/// Аббревиатура
/// </summary>
Аbbreviation,
/// <summary>
/// Слово
/// </summary>
Word,
/// <summary>
/// Идентификатор (может содержать не только буквы)
/// </summary>
Identity,
/// <summary>
/// Число
/// </summary>
Number,
/// <summary>
/// Номер телефона
/// </summary>
PhoneNumber,
/// <summary>
/// Ссылка
/// </summary>
Link
}
}

@ -0,0 +1,21 @@
using LemmaSharp;
using ZeroLevel.Services.Semantic;
namespace Lemmatization
{
public class LemmaLexer
: ILexer
{
private readonly ILemmatizer _lemmatizer;
public LemmaLexer()
{
_lemmatizer = new LemmatizerPrebuiltFull(LanguagePrebuilt.Russian);
}
public string Lex(string word)
{
return _lemmatizer.Lemmatize(word.Trim().ToLowerInvariant());
}
}
}

@ -0,0 +1,206 @@
using System;
using System.Collections.Generic;
namespace Lemmatization
{
public class SpecTextReader
{
private int _position;
private readonly string _template;
public bool EOF => _position >= _template?.Length;
public bool StartPosition => _position == 0;
public bool LastPosition => _position == _template?.Length - 1;
public char Current => EOF ? char.MinValue : _template[_position];
public char Next => EOF || LastPosition ? char.MinValue : _template[_position + 1];
public char Preview => StartPosition ? char.MinValue : _template[_position - 1];
public SpecTextReader(string template)
{
_template = template;
_position = 0;
}
public bool Move(int offset = 1)
{
if (EOF) return false;
if (LastPosition) { _position = _template.Length; return false; }
_position += offset;
if (_position >= _template.Length)
{
_position = _template.Length;
}
return true;
}
public int SkipSpaces()
{
int count = 0;
while (EOF == false && char.IsWhiteSpace(Current)) { Move(); count++; }
return count;
}
public void SkipBreaks()
{
while (EOF == false && char.IsWhiteSpace(Current)) Move();
}
public bool MoveBack()
{
_position = _position - 1;
if (_position < 0)
{
_position = 0;
return false;
}
return true;
}
public int FindOffsetTo(char symbol)
{
if (_position == -1 || EOF || LastPosition) return -1;
var search_position = _position;
var sym = _template[search_position];
while (search_position < _template.Length && false == sym.Equals(symbol))
{
search_position++;
sym = _template[search_position];
}
return sym.Equals(symbol) ? search_position - _position : -1;
}
public bool Test(char sym, int offset = 0)
{
var index = _position + offset;
if (index < 0 || index >= _template.Length) return false;
return _template[index].Equals(sym);
}
public string ReadIdentity()
{
string identity = string.Empty;
var offset = _position;
if (offset < _template.Length && char.IsLetter(_template[offset]))
{
var index = offset + 1;
while (index < _template.Length && (char.IsLetterOrDigit(_template[index]) || _template[index] == '_' || _template[index] == '-'))
index++;
identity = _template.Substring(offset, index - offset);
}
return identity.ToLowerInvariant();
}
public string ReadWord()
{
string identity = string.Empty;
var offset = _position;
if (offset < _template.Length && char.IsLetterOrDigit(_template[offset]))
{
var index = offset + 1;
while (index < _template.Length && char.IsLetterOrDigit(_template[index]))
index++;
identity = _template.Substring(offset, index - offset);
}
return identity;
}
public static Token[] ParseToTokens(string line)
{
var list = new List<Token>();
char[] buffer = new char[64];
int count = 0;
var add = new Action<char>(ch =>
{
buffer[count++] = ch;
if (buffer.Length == count)
{
// При нехватке места в буфере, расширяем в два раза место
var arr = new char[buffer.Length * 2];
for (var k = 0; k < buffer.Length; k++) { arr[k] = buffer[k]; }
buffer = arr;
}
});
TokenType tt = TokenType.Unknown;
for (int i = 0; i < line.Length; i++)
{
if (char.IsLetter(line[i]))
{
if (tt == TokenType.Unknown) tt = TokenType.Word;
else if (tt == TokenType.Number) tt = TokenType.Identity;
add(line[i]);
}
else if (char.IsDigit(line[i]))
{
if (tt == TokenType.Unknown) tt = TokenType.Number;
else if (tt == TokenType.Word) tt = TokenType.Identity;
add(line[i]);
}
else if (char.IsWhiteSpace(line[i]) && tt != TokenType.Unknown)
{
if (count > 0)
{
list.Add(new Token { Type = tt, Value = new string(buffer, 0, count) });
count = 0;
}
}
else
{
if (count > 0)
{
list.Add(new Token { Type = tt, Value = new string(buffer, 0, count) });
count = 0;
}
if (char.IsWhiteSpace(line[i]) == false)
{
list.Add(new Token { Type = TokenType.Punctuation, Value = line[i].ToString() });
}
}
}
if (count > 0)
{
list.Add(new Token { Type = tt, Value = new string(buffer, 0, count) });
}
return list.ToArray();
}
public static IEnumerable<Sentence> ReadSentenses(string text)
{
if (false == string.IsNullOrEmpty(text))
{
char[] buffer = new char[512];
int count = 0;
var add = new Action<char>(ch =>
{
buffer[count++] = ch;
if (buffer.Length == count)
{
// При нехватке места в буфере, расширяем в два раза место
var arr = new char[buffer.Length * 2];
for (var k = 0; k < buffer.Length; k++) { arr[k] = buffer[k]; }
buffer = arr;
}
});
for (int i = 0; i < text.Length; i++)
{
switch (text[i])
{
case '.':
if (count > 0)
{
yield return new Sentence { Tokens = ParseToTokens(new string(buffer, 0, count)) };
count = 0;
}
break;
default:
add(text[i]);
break;
}
}
}
}
}
}

@ -5,9 +5,9 @@ VisualStudioVersion = 16.0.29709.97
MinimumVisualStudioVersion = 10.0.40219.1 MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TFIDFbee", "TFIDFbee\TFIDFbee.csproj", "{7B39E0A1-3DE4-4702-8D61-5C9A6CF164C6}" Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TFIDFbee", "TFIDFbee\TFIDFbee.csproj", "{7B39E0A1-3DE4-4702-8D61-5C9A6CF164C6}"
EndProject EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Lemmatization", "..\HoKeMs\Lemmatization\Lemmatization.csproj", "{BF9F7C1E-098B-4815-BA35-8A9845C66663}" Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Lemmatization", "..\Lemmatization\Lemmatization.csproj", "{98102DAA-F649-45FD-BBE9-F393DBF82275}"
EndProject EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ZeroLevel", "..\..\GIT\Zero\ZeroLevel\ZeroLevel.csproj", "{5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}" Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ZeroLevel", "..\ZeroLevel\ZeroLevel.csproj", "{6AF46F95-EA67-4258-96B1-7BBC57EB965D}"
EndProject EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
@ -25,22 +25,22 @@ Global
{7B39E0A1-3DE4-4702-8D61-5C9A6CF164C6}.Release|Any CPU.Build.0 = Release|Any CPU {7B39E0A1-3DE4-4702-8D61-5C9A6CF164C6}.Release|Any CPU.Build.0 = Release|Any CPU
{7B39E0A1-3DE4-4702-8D61-5C9A6CF164C6}.Release|x64.ActiveCfg = Release|Any CPU {7B39E0A1-3DE4-4702-8D61-5C9A6CF164C6}.Release|x64.ActiveCfg = Release|Any CPU
{7B39E0A1-3DE4-4702-8D61-5C9A6CF164C6}.Release|x64.Build.0 = Release|Any CPU {7B39E0A1-3DE4-4702-8D61-5C9A6CF164C6}.Release|x64.Build.0 = Release|Any CPU
{BF9F7C1E-098B-4815-BA35-8A9845C66663}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {98102DAA-F649-45FD-BBE9-F393DBF82275}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{BF9F7C1E-098B-4815-BA35-8A9845C66663}.Debug|Any CPU.Build.0 = Debug|Any CPU {98102DAA-F649-45FD-BBE9-F393DBF82275}.Debug|Any CPU.Build.0 = Debug|Any CPU
{BF9F7C1E-098B-4815-BA35-8A9845C66663}.Debug|x64.ActiveCfg = Debug|x64 {98102DAA-F649-45FD-BBE9-F393DBF82275}.Debug|x64.ActiveCfg = Debug|Any CPU
{BF9F7C1E-098B-4815-BA35-8A9845C66663}.Debug|x64.Build.0 = Debug|x64 {98102DAA-F649-45FD-BBE9-F393DBF82275}.Debug|x64.Build.0 = Debug|Any CPU
{BF9F7C1E-098B-4815-BA35-8A9845C66663}.Release|Any CPU.ActiveCfg = Release|Any CPU {98102DAA-F649-45FD-BBE9-F393DBF82275}.Release|Any CPU.ActiveCfg = Release|Any CPU
{BF9F7C1E-098B-4815-BA35-8A9845C66663}.Release|Any CPU.Build.0 = Release|Any CPU {98102DAA-F649-45FD-BBE9-F393DBF82275}.Release|Any CPU.Build.0 = Release|Any CPU
{BF9F7C1E-098B-4815-BA35-8A9845C66663}.Release|x64.ActiveCfg = Release|Any CPU {98102DAA-F649-45FD-BBE9-F393DBF82275}.Release|x64.ActiveCfg = Release|Any CPU
{BF9F7C1E-098B-4815-BA35-8A9845C66663}.Release|x64.Build.0 = Release|Any CPU {98102DAA-F649-45FD-BBE9-F393DBF82275}.Release|x64.Build.0 = Release|Any CPU
{5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Debug|Any CPU.Build.0 = Debug|Any CPU {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Debug|x64.ActiveCfg = Debug|x64 {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Debug|x64.ActiveCfg = Debug|x64
{5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Debug|x64.Build.0 = Debug|x64 {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Debug|x64.Build.0 = Debug|x64
{5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Release|Any CPU.ActiveCfg = Release|Any CPU {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Release|Any CPU.ActiveCfg = Release|Any CPU
{5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Release|Any CPU.Build.0 = Release|Any CPU {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Release|Any CPU.Build.0 = Release|Any CPU
{5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Release|x64.ActiveCfg = Release|x64 {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Release|x64.ActiveCfg = Release|x64
{5FF0C954-7FB8-49F4-9E97-9DCC933D45FF}.Release|x64.Build.0 = Release|x64 {6AF46F95-EA67-4258-96B1-7BBC57EB965D}.Release|x64.Build.0 = Release|x64
EndGlobalSection EndGlobalSection
GlobalSection(SolutionProperties) = preSolution GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE HideSolutionNode = FALSE

@ -1,63 +1,103 @@
using Accord.MachineLearning; using Lemmatization;
using Lemmatization;
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.IO; using System.IO;
using System.Linq; using System.Linq;
using System.Text; using TFIDFbee.Reader;
using ZeroLevel; using ZeroLevel;
using ZeroLevel.Services.Semantic; using ZeroLevel.Services.Semantic;
using ZeroLevel.Services.Semantic.Helpers; using ZeroLevel.Services.Semantic.Helpers;
using ZeroLevel.Services.Serialization;
namespace TFIDFbee namespace TFIDFbee
{ {
class Program class Program
{ {
private const string source = @"D:\Desktop\lenta-ru-data-set_19990901_20171204.json"; private const string source = @"E:\Desktop\lenta-ru-data-set_19990901_20171204\lenta-ru-data-set_19990901_20171204.json";
private readonly static ILexProvider _lexer = new LexProvider(new LemmaLexer()); private readonly static ILexProvider _lexer = new LexProvider(new LemmaLexer());
static void Main(string[] args) static void Main(string[] args)
{ {
Log.AddConsoleLogger(ZeroLevel.Logging.LogLevel.FullDebug);
Configuration.Save(Configuration.ReadFromApplicationConfig()); Configuration.Save(Configuration.ReadFromApplicationConfig());
/*var codebook = new TFIDF() IDocumentReader reader = new StateMachineReader(source, s => ExtractLemmas(s));
BagOfWords codebook;
if (File.Exists("model.bin"))
{ {
Tf = TermFrequency.Log, Log.Info("Load model from file");
Idf = InverseDocumentFrequency.Default, using (var stream = new FileStream("model.bin", FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
UpdateDictionary = true {
};*/ codebook = MessageSerializer.Deserialize<BagOfWords>(stream);
var codebook = new ZeroLevel.Services.Semantic.Helpers.BagOfWords(); }
foreach (var batch in ParseBatches(1000)) }
else
{ {
codebook.Learn(batch); Log.Info("Create and train model");
Console.WriteLine($"Documents: {codebook.NumberOfDocuments}"); codebook = new BagOfWords();
Console.WriteLine($"Words: {codebook.NumberOfWords}"); foreach (var batch in reader.ReadBatches(1000))
{
codebook.Learn(batch);
Log.Info($"\r\n\tDocuments: {codebook.NumberOfDocuments}\r\n\tWords: {codebook.NumberOfWords}");
}
using (var stream = new FileStream("model.bin", FileMode.Create, FileAccess.Write, FileShare.ReadWrite))
{
MessageSerializer.Serialize<BagOfWords>(stream, codebook);
}
} }
var vectors = new List<SparceVector>(); Log.Info("Build document vectors");
foreach (var docs in ReadRawDocumentBatches(1000)) List<SparceVector> vectors;
if (File.Exists("vectors.bin"))
{ {
foreach (var doc in docs) Log.Info("Load vectors from file");
using (var stream = new FileStream("vectors.bin", FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
{
vectors = MessageSerializer.DeserializeCompatible<List<SparceVector>>(stream);
}
}
else
{
Log.Info("Create vectors");
vectors = new List<SparceVector>();
foreach (var docs in reader.ReadRawDocumentBatches(1000))
{
foreach (var doc in docs)
{
var words = _lexer.ExtractLexTokens(doc.Item2).Select(t => t.Token).Concat(_lexer.ExtractLexTokens(doc.Item1).Select(t => t.Token)).ToArray();
vectors.Add(codebook.Transform(words));
}
}
using (var stream = new FileStream("vectors.bin", FileMode.Create, FileAccess.Write, FileShare.ReadWrite))
{ {
var words = _lexer.ExtractLexTokens(doc.Item2).Select(t => t.Token)/*.Concat(_lexer.ExtractLexTokens(doc.Text).Select(t => t.Token))*/.ToArray(); MessageSerializer.SerializeCompatible<List<SparceVector>>(stream, vectors);
vectors.Add(codebook.Transform(words));
} }
} }
Log.Info("Find similar documents");
var list = new List<Tuple<double, int, int>>(); var list = new List<Tuple<double, int, int>>();
long total_count = (vectors.Count * vectors.Count);
long count = 0;
for (int i = 0; i < vectors.Count; i++) for (int i = 0; i < vectors.Count; i++)
{ {
for (int j = i + 1; j < vectors.Count - 1; j++) for (int j = i + 1; j < vectors.Count - 1; j++)
{ {
count++;
if (count % 100000 == 0)
{
Log.Info($"Progress: {(int)(count * 100.0d / (double)total_count)} %.\tFound similars: {list.Count}.");
}
if (i == j) continue; if (i == j) continue;
var diff = vectors[i].Measure(vectors[j]); var diff = vectors[i].Measure(vectors[j]);
if (diff > double.Epsilon) if (diff > 0.885d)
{ {
list.Add(Tuple.Create(diff, i, j)); list.Add(Tuple.Create(diff, i, j));
} }
} }
} }
var to_present = list.OrderBy(e => e.Item1).Take(200).ToArray(); Log.Info("Prepare to show similar documents");
var to_present = list.OrderBy(e => e.Item1).Take(2000).ToArray();
var to_present_map = new Dictionary<int, Tuple<string, string>>(); var to_present_map = new Dictionary<int, Tuple<string, string>>();
foreach (var e in to_present) foreach (var e in to_present)
{ {
@ -72,7 +112,7 @@ namespace TFIDFbee
} }
int index = 0; int index = 0;
foreach (var docs in ReadRawDocumentBatches(1000)) foreach (var docs in reader.ReadRawDocumentBatches(1000))
{ {
foreach (var doc in docs) foreach (var doc in docs)
{ {
@ -84,14 +124,22 @@ namespace TFIDFbee
} }
} }
Log.Info("Show similar documents");
index = 0; index = 0;
foreach (var e in to_present) using (var output = new StreamWriter("out.txt"))
{ {
Console.WriteLine($"#{index++}: {e.Item1}"); foreach (var e in to_present)
Console.WriteLine(to_present_map[e.Item2].Item1); {
Console.WriteLine(to_present_map[e.Item3].Item2); output.WriteLine($"#{index++}: {e.Item1}");
Console.WriteLine("--------------------"); output.WriteLine("-------------1--------------");
Console.WriteLine(); output.WriteLine(to_present_map[e.Item2].Item1);
output.WriteLine(to_present_map[e.Item2].Item2);
output.WriteLine("-------------2--------------");
output.WriteLine(to_present_map[e.Item3].Item1);
output.WriteLine(to_present_map[e.Item3].Item2);
output.WriteLine("#############################");
output.WriteLine();
}
} }
Console.WriteLine("Completed"); Console.WriteLine("Completed");
@ -105,263 +153,5 @@ namespace TFIDFbee
.Select(t => t.Token) .Select(t => t.Token)
.Where(s => s.Any(c => char.IsLetter(c))); .Where(s => s.Any(c => char.IsLetter(c)));
} }
public static IEnumerable<string[][]> ReadBatches(int size)
{
var list = new List<string[]>();
foreach (var batch in ReadDocumentBatches(size))
{
yield return batch.ToArray();
list.Clear();
}
}
public static IEnumerable<IEnumerable<string[]>> ReadDocumentBatches(int size)
{
string line;
var batch = new List<string[]>();
string title = null;
string text = null;
using (StreamReader reader = new StreamReader(source))
{
while ((line = reader.ReadLine()) != null)
{
var titleIndex = line.IndexOf("\"metaTitle\":");
if (titleIndex >= 0)
{
var start = line.IndexOf("\"", titleIndex + 12);
var end = line.LastIndexOf("\"");
if (start < end && start != -1 && end != -1)
{
title = line.Substring(start + 1, end - start - 1);
}
}
else
{
var textIndex = line.IndexOf("\"plaintext\":");
if (textIndex >= 0 && title != null)
{
var start = line.IndexOf("\"", textIndex + 12);
var end = line.LastIndexOf("\"");
if (start < end && start != -1 && end != -1)
{
text = line.Substring(start + 1, end - start - 1);
batch.Add(ExtractLemmas(title).Concat(ExtractLemmas(text)).ToArray());
if (batch.Count >= size)
{
yield return batch;
batch.Clear();
GC.Collect(2);
}
title = null;
text = null;
}
}
}
}
}
if (batch.Count > 0)
{
yield return batch;
}
}
public static IEnumerable<IEnumerable<Tuple<string, string>>> ReadRawDocumentBatches(int size)
{
string line;
var batch = new List<Tuple<string, string>>();
string title = null;
string text = null;
using (StreamReader reader = new StreamReader(source))
{
while ((line = reader.ReadLine()) != null)
{
var titleIndex = line.IndexOf("\"metaTitle\":");
if (titleIndex >= 0)
{
var start = line.IndexOf("\"", titleIndex + 12);
var end = line.LastIndexOf("\"");
if (start < end && start != -1 && end != -1)
{
title = line.Substring(start + 1, end - start - 1);
}
}
else
{
var textIndex = line.IndexOf("\"plaintext\":");
if (textIndex >= 0 && title != null)
{
var start = line.IndexOf("\"", textIndex + 12);
var end = line.LastIndexOf("\"");
if (start < end && start != -1 && end != -1)
{
text = line.Substring(start + 1, end - start - 1);
batch.Add(Tuple.Create(title, text));
if (batch.Count >= size)
{
yield return batch;
batch.Clear();
GC.Collect(2);
}
title = null;
text = null;
}
}
}
}
}
if (batch.Count > 0)
{
yield return batch;
}
}
private class RecordParser
{
private enum RPState
{
WaitKey,
ParseKey,
WaitKeyConfirm,
WaitValue,
ParseValue
}
private readonly StringBuilder _builder = new StringBuilder();
private RPState State = RPState.WaitKey;
private char _previous = '\0';
private string _key;
private string _value;
private readonly Action<string, string> _callback;
public RecordParser(Action<string, string> callback)
{
_callback = callback;
}
public void Append(string text)
{
foreach (var ch in text)
{
switch (State)
{
case RPState.WaitKey:
if (ch.Equals('"'))
{
State = RPState.ParseKey;
_builder.Clear();
}
break;
case RPState.ParseKey:
if (ch.Equals('"') && _previous != '\\')
{
if (_builder.Length > 0)
{
State = RPState.WaitKeyConfirm;
}
else
{
State = RPState.WaitKey;
}
}
else
{
_builder.Append(ch);
}
break;
case RPState.WaitKeyConfirm:
if (ch.Equals(':'))
{
_key = _builder.ToString();
State = RPState.WaitValue;
}
else if (ch == ' ' || ch == '\r' || ch == '\n')
{
// nothing
}
else
{
State = RPState.WaitKey;
}
break;
case RPState.WaitValue:
if (ch.Equals('"'))
{
State = RPState.ParseValue;
_builder.Clear();
}
else if (ch == ' ' || ch == '\r' || ch == '\n')
{
// nothing
}
else
{
State = RPState.WaitKey;
}
break;
case RPState.ParseValue:
if (ch.Equals('"') && _previous != '\\')
{
if (_builder.Length > 0)
{
_value = _builder.ToString();
_callback(_key, _value);
}
State = RPState.WaitKey;
}
else
{
_builder.Append(ch);
}
break;
}
_previous = ch;
}
}
}
public static IEnumerable<string[][]> ParseBatches(int size)
{
var list = new List<string[]>();
foreach (var record in Parse())
{
list.Add(record);
if (list.Count > size)
{
yield return list.ToArray();
list.Clear();
}
}
if (list.Count > 0)
{
yield return list.ToArray();
}
}
public static IEnumerable<string[]> Parse()
{
var result = new string[2];
var parser = new RecordParser((k, v) =>
{
switch (k)
{
case "metaTitle": result[0] = v; break;
case "plaintext": result[1] = v; break;
}
});
char[] buffer = new char[16536];
int count = 0;
using (StreamReader reader = new StreamReader(source))
{
count = reader.Read(buffer, 0, buffer.Length);
parser.Append(new string(buffer, 0, count));
if (!string.IsNullOrEmpty(result[0]) && !string.IsNullOrEmpty(result[1]))
{
yield return result;
result[0] = null;
result[1] = null;
}
}
}
} }
} }

@ -0,0 +1,11 @@
using System;
using System.Collections.Generic;
namespace TFIDFbee.Reader
{
public interface IDocumentReader
{
IEnumerable<string[][]> ReadBatches(int size);
public IEnumerable<IEnumerable<Tuple<string, string>>> ReadRawDocumentBatches(int size);
}
}

@ -0,0 +1,130 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace TFIDFbee.Reader
{
public class JsonByLineReader
: IDocumentReader
{
private readonly string _file;
private readonly Func<string, IEnumerable<string>> _lexer;
public JsonByLineReader(string file, Func<string, IEnumerable<string>> lexer)
{
_file = file;
_lexer = lexer;
}
public IEnumerable<string[][]> ReadBatches(int size)
{
var list = new List<string[]>();
foreach (var batch in ReadDocumentBatches(size))
{
yield return batch.ToArray();
list.Clear();
}
}
private IEnumerable<IEnumerable<string[]>> ReadDocumentBatches(int size)
{
string line;
var batch = new List<string[]>();
string title = null;
string text = null;
using (StreamReader reader = new StreamReader(_file))
{
while ((line = reader.ReadLine()) != null)
{
var titleIndex = line.IndexOf("\"metaTitle\":");
if (titleIndex >= 0)
{
var start = line.IndexOf("\"", titleIndex + 12);
var end = line.LastIndexOf("\"");
if (start < end && start != -1 && end != -1)
{
title = line.Substring(start + 1, end - start - 1);
}
}
else
{
var textIndex = line.IndexOf("\"plaintext\":");
if (textIndex >= 0 && title != null)
{
var start = line.IndexOf("\"", textIndex + 12);
var end = line.LastIndexOf("\"");
if (start < end && start != -1 && end != -1)
{
text = line.Substring(start + 1, end - start - 1);
batch.Add(_lexer(title).Concat(_lexer(text)).ToArray());
if (batch.Count >= size)
{
yield return batch;
batch.Clear();
GC.Collect(2);
}
title = null;
text = null;
}
}
}
}
}
if (batch.Count > 0)
{
yield return batch;
}
}
public IEnumerable<IEnumerable<Tuple<string, string>>> ReadRawDocumentBatches(int size)
{
string line;
var batch = new List<Tuple<string, string>>();
string title = null;
string text = null;
using (StreamReader reader = new StreamReader(_file))
{
while ((line = reader.ReadLine()) != null)
{
var titleIndex = line.IndexOf("\"metaTitle\":");
if (titleIndex >= 0)
{
var start = line.IndexOf("\"", titleIndex + 12);
var end = line.LastIndexOf("\"");
if (start < end && start != -1 && end != -1)
{
title = line.Substring(start + 1, end - start - 1);
}
}
else
{
var textIndex = line.IndexOf("\"plaintext\":");
if (textIndex >= 0 && title != null)
{
var start = line.IndexOf("\"", textIndex + 12);
var end = line.LastIndexOf("\"");
if (start < end && start != -1 && end != -1)
{
text = line.Substring(start + 1, end - start - 1);
batch.Add(Tuple.Create(title, text));
if (batch.Count >= size)
{
yield return batch;
batch.Clear();
GC.Collect(2);
}
title = null;
text = null;
}
}
}
}
}
if (batch.Count > 0)
{
yield return batch;
}
}
}
}

@ -0,0 +1,85 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace TFIDFbee.Reader
{
public class StateMachineReader
: IDocumentReader
{
private readonly string _file;
private readonly Func<string, IEnumerable<string>> _lexer;
public StateMachineReader(string file, Func<string, IEnumerable<string>> lexer)
{
_file = file;
_lexer = lexer;
}
private IEnumerable<string[]> Parse()
{
var result = new string[2];
var parser = new RecordParser((k, v) =>
{
switch (k)
{
case "metaTitle": result[0] = v; break;
case "plaintext": result[1] = v; break;
}
});
char[] buffer = new char[16536];
int count = 0;
using (StreamReader reader = new StreamReader(_file))
{
while ((count = reader.Read(buffer, 0, buffer.Length)) > 0)
{
parser.Append(new string(buffer, 0, count));
if (!string.IsNullOrEmpty(result[0]) && !string.IsNullOrEmpty(result[1]))
{
yield return result;
result[0] = null;
result[1] = null;
}
}
}
}
public IEnumerable<string[][]> ReadBatches(int size)
{
var list = new List<string[]>();
foreach (var record in Parse())
{
list.Add((_lexer(record[0]).Concat(_lexer(record[1])).ToArray()));
if (list.Count > size)
{
yield return list.ToArray();
list.Clear();
}
}
if (list.Count > 0)
{
yield return list.ToArray();
}
}
public IEnumerable<IEnumerable<Tuple<string, string>>> ReadRawDocumentBatches(int size)
{
var list = new List<Tuple<string, string>>();
foreach (var record in Parse())
{
list.Add(Tuple.Create(record[0], record[1]));
if (list.Count > size)
{
yield return list.ToArray();
list.Clear();
}
}
if (list.Count > 0)
{
yield return list.ToArray();
}
}
}
}

@ -0,0 +1,108 @@
using System;
using System.Text;
namespace TFIDFbee
{
public class RecordParser
{
private enum RPState
{
WaitKey,
ParseKey,
WaitKeyConfirm,
WaitValue,
ParseValue
}
private readonly StringBuilder _builder = new StringBuilder();
private RPState State = RPState.WaitKey;
private char _previous = '\0';
private string _key;
private string _value;
private readonly Action<string, string> _callback;
public RecordParser(Action<string, string> callback)
{
_callback = callback;
}
public void Append(string text)
{
foreach (var ch in text)
{
switch (State)
{
case RPState.WaitKey:
if (ch.Equals('"'))
{
State = RPState.ParseKey;
_builder.Clear();
}
break;
case RPState.ParseKey:
if (ch.Equals('"') && _previous != '\\')
{
if (_builder.Length > 0)
{
State = RPState.WaitKeyConfirm;
}
else
{
State = RPState.WaitKey;
}
}
else
{
_builder.Append(ch);
}
break;
case RPState.WaitKeyConfirm:
if (ch.Equals(':'))
{
_key = _builder.ToString();
State = RPState.WaitValue;
}
else if (ch == ' ' || ch == '\r' || ch == '\n')
{
// nothing
}
else
{
State = RPState.WaitKey;
}
break;
case RPState.WaitValue:
if (ch.Equals('"'))
{
State = RPState.ParseValue;
_builder.Clear();
}
else if (ch == ' ' || ch == '\r' || ch == '\n')
{
// nothing
}
else
{
State = RPState.WaitKey;
}
break;
case RPState.ParseValue:
if (ch.Equals('"') && _previous != '\\')
{
if (_builder.Length > 0)
{
_value = _builder.ToString();
_callback(_key, _value);
}
State = RPState.WaitKey;
}
else
{
_builder.Append(ch);
}
break;
}
_previous = ch;
}
}
}
}

@ -7,13 +7,8 @@
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="Accord" Version="3.8.0" /> <ProjectReference Include="..\..\Lemmatization\Lemmatization.csproj" />
<PackageReference Include="Accord.MachineLearning" Version="3.8.0" /> <ProjectReference Include="..\..\ZeroLevel\ZeroLevel.csproj" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\GIT\Zero\ZeroLevel\ZeroLevel.csproj" />
<ProjectReference Include="..\..\HoKeMs\Lemmatization\Lemmatization.csproj" />
</ItemGroup> </ItemGroup>
</Project> </Project>

@ -11,14 +11,16 @@ namespace ZeroLevel.Services.Semantic.Helpers
public class BagOfWords : public class BagOfWords :
IBinarySerializable IBinarySerializable
{ {
private readonly ConcurrentDictionary<string, int[]> _words = private ConcurrentDictionary<string, int[]> _words;
new ConcurrentDictionary<string, int[]>();
int _words_count = -1; int _words_count = -1;
long _number_of_documents = 0; long _number_of_documents = 0;
public long NumberOfDocuments => _number_of_documents; public long NumberOfDocuments => _number_of_documents;
public int NumberOfWords => _words.Count; public int NumberOfWords => _words.Count;
public BagOfWords() =>
_words = new ConcurrentDictionary<string, int[]>();
/// <summary> /// <summary>
/// Набор документов, слова в документе должны быть лемматизированы/стеммированы, и быть уникальными /// Набор документов, слова в документе должны быть лемматизированы/стеммированы, и быть уникальными
/// </summary> /// </summary>
@ -85,12 +87,16 @@ namespace ZeroLevel.Services.Semantic.Helpers
public void Deserialize(IBinaryReader reader) public void Deserialize(IBinaryReader reader)
{ {
throw new NotImplementedException(); this._number_of_documents = reader.ReadLong();
this._words_count = reader.ReadInt32();
this._words = reader.ReadDictionaryAsConcurrent<string, int[]>();
} }
public void Serialize(IBinaryWriter writer) public void Serialize(IBinaryWriter writer)
{ {
throw new NotImplementedException(); writer.WriteLong(this._number_of_documents);
writer.WriteInt32(this._words_count);
writer.WriteDictionary<string, int[]>(this._words);
} }
} }
} }

@ -48,52 +48,60 @@ namespace ZeroLevel.Services.Serialization
#region Extensions #region Extensions
T Read<T>() where T : IBinarySerializable; #region Arrays
T[] ReadArray<T>() where T : IBinarySerializable, new();
T ReadCompatible<T>(); string[] ReadStringArray();
IPAddress[] ReadIPArray();
IPEndPoint[] ReadIPEndPointArray();
Guid[] ReadGuidArray();
DateTime[] ReadDateTimeArray();
Int64[] ReadInt64Array();
Int32[] ReadInt32Array();
UInt64[] ReadUInt64Array();
UInt32[] ReadUInt32Array();
char[] ReadCharArray();
short[] ReadShortArray();
ushort[] ReadUShortArray();
float[] ReadFloatArray();
Double[] ReadDoubleArray();
bool[] ReadBooleanArray();
byte[] ReadByteArray();
byte[][] ReadByteArrayArray();
decimal[] ReadDecimalArray();
TimeSpan[] ReadTimeSpanArray();
#endregion
#region Collections
List<T> ReadCollection<T>() where T : IBinarySerializable, new(); List<T> ReadCollection<T>() where T : IBinarySerializable, new();
Dictionary<TKey, TValue> ReadDictionary<TKey, TValue>();
ConcurrentDictionary<TKey, TValue> ReadDictionaryAsConcurrent<TKey, TValue>();
List<string> ReadStringCollection(); List<string> ReadStringCollection();
List<Guid> ReadGuidCollection(); List<Guid> ReadGuidCollection();
List<DateTime> ReadDateTimeCollection(); List<DateTime> ReadDateTimeCollection();
List<char> ReadCharCollection(); List<char> ReadCharCollection();
List<Int64> ReadInt64Collection(); List<Int64> ReadInt64Collection();
List<Int32> ReadInt32Collection(); List<Int32> ReadInt32Collection();
List<Double> ReadDoubleCollection(); List<Double> ReadDoubleCollection();
List<Decimal> ReadDecimalCollection(); List<Decimal> ReadDecimalCollection();
List<TimeSpan> ReadTimeSpanCollection(); List<TimeSpan> ReadTimeSpanCollection();
List<float> ReadFloatCollection(); List<float> ReadFloatCollection();
List<bool> ReadBooleanCollection(); List<bool> ReadBooleanCollection();
List<byte> ReadByteCollection(); List<byte> ReadByteCollection();
List<byte[]> ReadByteArrayCollection(); List<byte[]> ReadByteArrayCollection();
List<IPAddress> ReadIPCollection(); List<IPAddress> ReadIPCollection();
List<IPEndPoint> ReadIPEndPointCollection(); List<IPEndPoint> ReadIPEndPointCollection();
List<UInt64> ReadUInt64Collection(); List<UInt64> ReadUInt64Collection();
List<UInt32> ReadUInt32Collection(); List<UInt32> ReadUInt32Collection();
List<short> ReadShortCollection(); List<short> ReadShortCollection();
List<ushort> ReadUShortCollection(); List<ushort> ReadUShortCollection();
#endregion
T Read<T>() where T : IBinarySerializable;
T ReadCompatible<T>();
Dictionary<TKey, TValue> ReadDictionary<TKey, TValue>();
ConcurrentDictionary<TKey, TValue> ReadDictionaryAsConcurrent<TKey, TValue>();
#endregion Extensions #endregion Extensions
Stream Stream { get; } Stream Stream { get; }

@ -49,49 +49,55 @@ namespace ZeroLevel.Services.Serialization
#region Extensions #region Extensions
#region Arrays
void WriteArray<T>(T[] array) where T : IBinarySerializable;
void WriteArray(string[] array);
void WriteArray(IPAddress[] array);
void WriteArray(IPEndPoint[] array);
void WriteArray(Guid[] array);
void WriteArray(DateTime[] array);
void WriteArray(UInt64[] array);
void WriteArray(UInt32[] array);
void WriteArray(char[] array);
void WriteArray(short[] array);
void WriteArray(ushort[] array);
void WriteArray(Int64[] array);
void WriteArray(Int32[] array);
void WriteArray(float[] array);
void WriteArray(Double[] array);
void WriteArray(bool[] array);
void WriteArray(byte[] array);
void WriteArray(byte[][] array);
void WriteArray(decimal[] array);
void WriteArray(TimeSpan[] array);
#endregion
#region Collections
void WriteCollection<T>(IEnumerable<T> collection) void WriteCollection<T>(IEnumerable<T> collection)
where T : IBinarySerializable; where T : IBinarySerializable;
void WriteDictionary<TKey, TValue>(Dictionary<TKey, TValue> collection);
void WriteDictionary<TKey, TValue>(ConcurrentDictionary<TKey, TValue> collection);
void WriteCollection(IEnumerable<string> collection); void WriteCollection(IEnumerable<string> collection);
void WriteCollection(IEnumerable<char> collection); void WriteCollection(IEnumerable<char> collection);
void WriteCollection(IEnumerable<Guid> collection); void WriteCollection(IEnumerable<Guid> collection);
void WriteCollection(IEnumerable<DateTime> collection); void WriteCollection(IEnumerable<DateTime> collection);
void WriteCollection(IEnumerable<Int64> collection); void WriteCollection(IEnumerable<Int64> collection);
void WriteCollection(IEnumerable<Int32> collection); void WriteCollection(IEnumerable<Int32> collection);
void WriteCollection(IEnumerable<UInt64> collection); void WriteCollection(IEnumerable<UInt64> collection);
void WriteCollection(IEnumerable<UInt32> collection); void WriteCollection(IEnumerable<UInt32> collection);
void WriteCollection(IEnumerable<short> collection); void WriteCollection(IEnumerable<short> collection);
void WriteCollection(IEnumerable<ushort> collection); void WriteCollection(IEnumerable<ushort> collection);
void WriteCollection(IEnumerable<Double> collection); void WriteCollection(IEnumerable<Double> collection);
void WriteCollection(IEnumerable<Decimal> collection); void WriteCollection(IEnumerable<Decimal> collection);
void WriteCollection(IEnumerable<TimeSpan> collection); void WriteCollection(IEnumerable<TimeSpan> collection);
void WriteCollection(IEnumerable<float> collection); void WriteCollection(IEnumerable<float> collection);
void WriteCollection(IEnumerable<bool> collection); void WriteCollection(IEnumerable<bool> collection);
void WriteCollection(IEnumerable<byte> collection); void WriteCollection(IEnumerable<byte> collection);
void WriteCollection(IEnumerable<byte[]> collection); void WriteCollection(IEnumerable<byte[]> collection);
void WriteCollection(IEnumerable<IPEndPoint> collection); void WriteCollection(IEnumerable<IPEndPoint> collection);
void WriteCollection(IEnumerable<IPAddress> collection); void WriteCollection(IEnumerable<IPAddress> collection);
#endregion
void WriteDictionary<TKey, TValue>(Dictionary<TKey, TValue> collection);
void WriteDictionary<TKey, TValue>(ConcurrentDictionary<TKey, TValue> collection);
void Write<T>(T item) void Write<T>(T item)
where T : IBinarySerializable; where T : IBinarySerializable;

@ -224,6 +224,7 @@ namespace ZeroLevel.Services.Serialization
#region Extensions #region Extensions
#region Collections
public List<T> ReadCollection<T>() public List<T> ReadCollection<T>()
where T : IBinarySerializable, new() where T : IBinarySerializable, new()
{ {
@ -241,57 +242,6 @@ namespace ZeroLevel.Services.Serialization
return collection; return collection;
} }
public Dictionary<TKey, TValue> ReadDictionary<TKey, TValue>()
{
int count = ReadInt32();
var collection = new Dictionary<TKey, TValue>(count);
if (count > 0)
{
TKey key;
TValue value;
for (int i = 0; i < count; i++)
{
key = ReadCompatible<TKey>();
value = ReadCompatible<TValue>();
collection.Add(key, value);
}
}
return collection;
}
public ConcurrentDictionary<TKey, TValue> ReadDictionaryAsConcurrent<TKey, TValue>()
{
int count = ReadInt32();
var collection = new ConcurrentDictionary<TKey, TValue>();
if (count > 0)
{
TKey key;
TValue value;
for (int i = 0; i < count; i++)
{
key = ReadCompatible<TKey>();
value = ReadCompatible<TValue>();
collection.TryAdd(key, value);
}
}
return collection;
}
public T ReadCompatible<T>()
{
return MessageSerializer.DeserializeCompatible<T>(this);
}
public T Read<T>() where T : IBinarySerializable
{
byte type = ReadByte();
if (type == 0) return default(T);
var item = (T)Activator.CreateInstance<T>();
item.Deserialize(this);
return item;
}
public List<string> ReadStringCollection() public List<string> ReadStringCollection()
{ {
int count = ReadInt32(); int count = ReadInt32();
@ -417,6 +367,7 @@ namespace ZeroLevel.Services.Serialization
} }
return collection; return collection;
} }
public List<char> ReadCharCollection() public List<char> ReadCharCollection()
{ {
int count = ReadInt32(); int count = ReadInt32();
@ -430,6 +381,7 @@ namespace ZeroLevel.Services.Serialization
} }
return collection; return collection;
} }
public List<short> ReadShortCollection() public List<short> ReadShortCollection()
{ {
int count = ReadInt32(); int count = ReadInt32();
@ -555,7 +507,344 @@ namespace ZeroLevel.Services.Serialization
} }
return collection; return collection;
} }
#endregion
#region Arrays
public T[] ReadArray<T>()
where T : IBinarySerializable, new()
{
int count = ReadInt32();
var array = new T[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
var item = new T();
item.Deserialize(this);
array[i] = item;
}
}
return array;
}
public string[] ReadStringArray()
{
int count = ReadInt32();
var array = new string[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadString();
}
}
return array;
}
public IPAddress[] ReadIPArray()
{
int count = ReadInt32();
var array = new IPAddress[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadIP();
}
}
return array;
}
public IPEndPoint[] ReadIPEndPointArray()
{
int count = ReadInt32();
var array = new IPEndPoint[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadIPEndpoint();
}
}
return array;
}
public Guid[] ReadGuidArray()
{
int count = ReadInt32();
var array = new Guid[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadGuid();
}
}
return array;
}
public DateTime[] ReadDateTimeArray()
{
int count = ReadInt32();
var array = new DateTime[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = (ReadDateTime() ?? DateTime.MinValue);
}
}
return array;
}
public Int64[] ReadInt64Array()
{
int count = ReadInt32();
var array = new Int64[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadLong();
}
}
return array;
}
public Int32[] ReadInt32Array()
{
int count = ReadInt32();
var array = new Int32[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadInt32();
}
}
return array;
}
public UInt64[] ReadUInt64Array()
{
int count = ReadInt32();
var array = new UInt64[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadULong();
}
}
return array;
}
public UInt32[] ReadUInt32Array()
{
int count = ReadInt32();
var array = new UInt32[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadUInt32();
}
}
return array;
}
public char[] ReadCharArray()
{
int count = ReadInt32();
var array = new char[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadChar();
}
}
return array;
}
public short[] ReadShortArray()
{
int count = ReadInt32();
var array = new short[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadShort();
}
}
return array;
}
public ushort[] ReadUShortArray()
{
int count = ReadInt32();
var array = new ushort[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadUShort();
}
}
return array;
}
public float[] ReadFloatArray()
{
int count = ReadInt32();
var array = new float[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadFloat();
}
}
return array;
}
public Double[] ReadDoubleArray()
{
int count = ReadInt32();
var array = new Double[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadDouble();
}
}
return array;
}
public bool[] ReadBooleanArray()
{
int count = ReadInt32();
var array = new bool[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadBoolean();
}
}
return array;
}
public byte[] ReadByteArray()
{
int count = ReadInt32();
var array = new byte[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadByte();
}
}
return array;
}
public byte[][] ReadByteArrayArray()
{
int count = ReadInt32();
var array = new byte[count][];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadBytes();
}
}
return array;
}
public decimal[] ReadDecimalArray()
{
int count = ReadInt32();
var array = new decimal[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadDecimal();
}
}
return array;
}
public TimeSpan[] ReadTimeSpanArray()
{
int count = ReadInt32();
var array = new TimeSpan[count];
if (count > 0)
{
for (int i = 0; i < count; i++)
{
array[i] = ReadTimeSpan();
}
}
return array;
}
#endregion
public Dictionary<TKey, TValue> ReadDictionary<TKey, TValue>()
{
int count = ReadInt32();
var collection = new Dictionary<TKey, TValue>(count);
if (count > 0)
{
TKey key;
TValue value;
for (int i = 0; i < count; i++)
{
key = ReadCompatible<TKey>();
value = ReadCompatible<TValue>();
collection.Add(key, value);
}
}
return collection;
}
public ConcurrentDictionary<TKey, TValue> ReadDictionaryAsConcurrent<TKey, TValue>()
{
int count = ReadInt32();
var collection = new ConcurrentDictionary<TKey, TValue>();
if (count > 0)
{
TKey key;
TValue value;
for (int i = 0; i < count; i++)
{
key = ReadCompatible<TKey>();
value = ReadCompatible<TValue>();
collection.TryAdd(key, value);
}
}
return collection;
}
public T ReadCompatible<T>()
{
return MessageSerializer.DeserializeCompatible<T>(this);
}
public T Read<T>() where T : IBinarySerializable
{
byte type = ReadByte();
if (type == 0) return default(T);
var item = (T)Activator.CreateInstance<T>();
item.Deserialize(this);
return item;
}
#endregion Extensions #endregion Extensions
public void Dispose() public void Dispose()

@ -249,20 +249,7 @@ namespace ZeroLevel.Services.Serialization
#region Extension #region Extension
public void Write<T>(T item) #region Collections
where T : IBinarySerializable
{
if (item != null)
{
WriteByte(1);
item.Serialize(this);
}
else
{
WriteByte(0);
}
}
public void WriteCollection<T>(IEnumerable<T> collection) public void WriteCollection<T>(IEnumerable<T> collection)
where T : IBinarySerializable where T : IBinarySerializable
{ {
@ -371,6 +358,7 @@ namespace ZeroLevel.Services.Serialization
} }
} }
} }
public void WriteCollection(IEnumerable<short> collection) public void WriteCollection(IEnumerable<short> collection)
{ {
WriteInt32(collection?.Count() ?? 0); WriteInt32(collection?.Count() ?? 0);
@ -479,12 +467,6 @@ namespace ZeroLevel.Services.Serialization
} }
} }
public void WriteCompatible<T>(T item)
{
var buffer = MessageSerializer.SerializeCompatible(item);
_stream.Write(buffer, 0, buffer.Length);
}
public void WriteCollection(IEnumerable<decimal> collection) public void WriteCollection(IEnumerable<decimal> collection)
{ {
WriteInt32(collection?.Count() ?? 0); WriteInt32(collection?.Count() ?? 0);
@ -497,21 +479,283 @@ namespace ZeroLevel.Services.Serialization
} }
} }
public void WriteCollection(IEnumerable<TimeSpan> collection)
public void WriteDictionary<TKey, TValue>(Dictionary<TKey, TValue> collection)
{ {
WriteInt32(collection?.Count() ?? 0); WriteInt32(collection?.Count() ?? 0);
if (collection != null) if (collection != null)
{ {
foreach (var item in collection) foreach (var item in collection)
{ {
WriteCompatible(item.Key); WriteTimeSpan(item);
WriteCompatible(item.Value);
} }
} }
} }
#endregion
public void WriteDictionary<TKey, TValue>(ConcurrentDictionary<TKey, TValue> collection) #region Arrays
public void WriteArray<T>(T[] array)
where T : IBinarySerializable
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
array[i].Serialize(this);
}
}
}
public void WriteArray(string[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteString(array[i]);
}
}
}
public void WriteArray(IPAddress[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteIP(array[i]);
}
}
}
public void WriteArray(IPEndPoint[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteIPEndpoint(array[i]);
}
}
}
public void WriteArray(Guid[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteGuid(array[i]);
}
}
}
public void WriteArray(DateTime[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteDateTime(array[i]);
}
}
}
public void WriteArray(UInt64[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteULong(array[i]);
}
}
}
public void WriteArray(UInt32[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteUInt32(array[i]);
}
}
}
public void WriteArray(char[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteChar(array[i]);
}
}
}
public void WriteArray(short[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteShort(array[i]);
}
}
}
public void WriteArray(ushort[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteUShort(array[i]);
}
}
}
public void WriteArray(Int64[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteLong(array[i]);
}
}
}
public void WriteArray(Int32[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteInt32(array[i]);
}
}
}
public void WriteArray(float[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteFloat(array[i]);
}
}
}
public void WriteArray(Double[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteDouble(array[i]);
}
}
}
public void WriteArray(bool[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteBoolean(array[i]);
}
}
}
public void WriteArray(byte[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteByte(array[i]);
}
}
}
public void WriteArray(byte[][] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteBytes(array[i]);
}
}
}
public void WriteArray(decimal[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteDecimal(array[i]);
}
}
}
public void WriteArray(TimeSpan[] array)
{
WriteInt32(array?.Length ?? 0);
if (array != null)
{
for (int i = 0; i < array.Length; i++)
{
WriteTimeSpan(array[i]);
}
}
}
#endregion
public void WriteCompatible<T>(T item)
{
var buffer = MessageSerializer.SerializeCompatible(item);
_stream.Write(buffer, 0, buffer.Length);
}
public void Write<T>(T item)
where T : IBinarySerializable
{
if (item != null)
{
WriteByte(1);
item.Serialize(this);
}
else
{
WriteByte(0);
}
}
public void WriteDictionary<TKey, TValue>(Dictionary<TKey, TValue> collection)
{ {
WriteInt32(collection?.Count() ?? 0); WriteInt32(collection?.Count() ?? 0);
if (collection != null) if (collection != null)
@ -524,14 +768,15 @@ namespace ZeroLevel.Services.Serialization
} }
} }
public void WriteCollection(IEnumerable<TimeSpan> collection) public void WriteDictionary<TKey, TValue>(ConcurrentDictionary<TKey, TValue> collection)
{ {
WriteInt32(collection?.Count() ?? 0); WriteInt32(collection?.Count() ?? 0);
if (collection != null) if (collection != null)
{ {
foreach (var item in collection) foreach (var item in collection)
{ {
WriteTimeSpan(item); WriteCompatible(item.Key);
WriteCompatible(item.Value);
} }
} }
} }

@ -1,58 +1,12 @@
using System; using System;
using System.Collections.Concurrent; using System.Collections.Concurrent;
using System.Collections.Generic; using System.Collections.Generic;
using System.IO;
namespace ZeroLevel.Services.Serialization namespace ZeroLevel.Services.Serialization
{ {
public static class MessageSerializer public static class MessageSerializer
{ {
public static T Deserialize<T>(byte[] data)
where T : IBinarySerializable
{
if (data == null || data.Length == 0) return default(T);
using (var reader = new MemoryStreamReader(data))
{
var result = Activator.CreateInstance<T>();
result.Deserialize(reader);
return result;
}
}
public static object Deserialize(Type type, byte[] data)
{
if (data == null || data.Length == 0) return null;
using (var reader = new MemoryStreamReader(data))
{
var result = (IBinarySerializable)Activator.CreateInstance(type);
result.Deserialize(reader);
return result;
}
}
public static List<T> DeserializeCollection<T>(byte[] data)
where T : IBinarySerializable
{
List<T> collection = null;
if (data != null && data.Length > 0)
{
using (var reader = new MemoryStreamReader(data))
{
int count = reader.ReadInt32();
collection = new List<T>(count);
if (count > 0)
{
for (int i = 0; i < count; i++)
{
var item = Activator.CreateInstance<T>();
item.Deserialize(reader);
collection.Add(item);
}
}
}
}
return collection;
}
public static byte[] Serialize<T>(T obj) public static byte[] Serialize<T>(T obj)
where T : IBinarySerializable where T : IBinarySerializable
{ {
@ -119,6 +73,53 @@ namespace ZeroLevel.Services.Serialization
} }
} }
public static T Deserialize<T>(byte[] data)
where T : IBinarySerializable
{
if (data == null || data.Length == 0) return default(T);
using (var reader = new MemoryStreamReader(data))
{
var result = Activator.CreateInstance<T>();
result.Deserialize(reader);
return result;
}
}
public static object Deserialize(Type type, byte[] data)
{
if (data == null || data.Length == 0) return null;
using (var reader = new MemoryStreamReader(data))
{
var result = (IBinarySerializable)Activator.CreateInstance(type);
result.Deserialize(reader);
return result;
}
}
public static List<T> DeserializeCollection<T>(byte[] data)
where T : IBinarySerializable
{
List<T> collection = null;
if (data != null && data.Length > 0)
{
using (var reader = new MemoryStreamReader(data))
{
int count = reader.ReadInt32();
collection = new List<T>(count);
if (count > 0)
{
for (int i = 0; i < count; i++)
{
var item = Activator.CreateInstance<T>();
item.Deserialize(reader);
collection.Add(item);
}
}
}
}
return collection;
}
public static T DeserializeCompatible<T>(byte[] data) public static T DeserializeCompatible<T>(byte[] data)
{ {
if (data == null || data.Length == 0) return default(T); if (data == null || data.Length == 0) return default(T);
@ -206,5 +207,156 @@ namespace ZeroLevel.Services.Serialization
} }
} }
} }
#region Stream
public static void Serialize<T>(Stream stream, T obj)
where T : IBinarySerializable
{
if (obj == null) return;
using (var writer = new MemoryStreamWriter(stream))
{
obj.Serialize(writer);
}
}
public static void Serialize<T>(Stream stream, IEnumerable<T> items)
where T : IBinarySerializable
{
if (items == null) return;
using (var writer = new MemoryStreamWriter(stream))
{
writer.WriteCollection<T>(items);
}
}
public static void SerializeCompatible(Stream stream, object obj)
{
if (null == obj)
{
return;
}
var direct_seriazlizable = (obj as IBinarySerializable);
if (direct_seriazlizable != null)
{
using (var writer = new MemoryStreamWriter(stream))
{
direct_seriazlizable.Serialize(writer);
}
}
else
{
using (var writer = new MemoryStreamWriter(stream))
{
PrimitiveTypeSerializer.Serialize(writer, obj);
}
}
}
public static void SerializeCompatible<T>(Stream stream, T obj)
{
if (null == obj)
{
return;
}
var direct_seriazlizable = (obj as IBinarySerializable);
if (direct_seriazlizable != null)
{
using (var writer = new MemoryStreamWriter(stream))
{
direct_seriazlizable.Serialize(writer);
}
}
else
{
using (var writer = new MemoryStreamWriter(stream))
{
PrimitiveTypeSerializer.Serialize<T>(writer, obj);
}
}
}
public static T Deserialize<T>(Stream stream)
where T : IBinarySerializable
{
if (stream == null) return default(T);
using (var reader = new MemoryStreamReader(stream))
{
var result = Activator.CreateInstance<T>();
result.Deserialize(reader);
return result;
}
}
public static object Deserialize(Type type, Stream stream)
{
if (stream == null) return null;
using (var reader = new MemoryStreamReader(stream))
{
var result = (IBinarySerializable)Activator.CreateInstance(type);
result.Deserialize(reader);
return result;
}
}
public static List<T> DeserializeCollection<T>(Stream stream)
where T : IBinarySerializable
{
List<T> collection = null;
if (stream != null)
{
using (var reader = new MemoryStreamReader(stream))
{
int count = reader.ReadInt32();
collection = new List<T>(count);
if (count > 0)
{
for (int i = 0; i < count; i++)
{
var item = Activator.CreateInstance<T>();
item.Deserialize(reader);
collection.Add(item);
}
}
}
}
return collection;
}
public static T DeserializeCompatible<T>(Stream stream)
{
if (stream == null) return default(T);
if (typeof(IBinarySerializable).IsAssignableFrom(typeof(T)))
{
using (var reader = new MemoryStreamReader(stream))
{
var direct = (IBinarySerializable)Activator.CreateInstance<T>();
direct.Deserialize(reader);
return (T)direct;
}
}
using (var reader = new MemoryStreamReader(stream))
{
return PrimitiveTypeSerializer.Deserialize<T>(reader);
}
}
public static object DeserializeCompatible(Type type, Stream stream)
{
if (stream == null) return null;
if (typeof(IBinarySerializable).IsAssignableFrom(type))
{
using (var reader = new MemoryStreamReader(stream))
{
var direct = (IBinarySerializable)Activator.CreateInstance(type);
direct.Deserialize(reader);
return direct;
}
}
using (var reader = new MemoryStreamReader(stream))
{
return PrimitiveTypeSerializer.Deserialize(reader, type);
}
}
#endregion
} }
} }

@ -46,6 +46,7 @@ namespace ZeroLevel.Services.Serialization
} }
private readonly static Dictionary<Type, Wrapper> _cachee = new Dictionary<Type, Wrapper>(); private readonly static Dictionary<Type, Wrapper> _cachee = new Dictionary<Type, Wrapper>();
private readonly static Dictionary<Type, Type> _enumTypesCachee = new Dictionary<Type, Type>(); private readonly static Dictionary<Type, Type> _enumTypesCachee = new Dictionary<Type, Type>();
private readonly static Dictionary<Type, Type> _arrayTypesCachee = new Dictionary<Type, Type>();
private static void PreloadCachee() private static void PreloadCachee()
{ {
@ -69,6 +70,25 @@ namespace ZeroLevel.Services.Serialization
_cachee.Add(typeof(IPEndPoint), Create<IPEndPoint>()); _cachee.Add(typeof(IPEndPoint), Create<IPEndPoint>());
_cachee.Add(typeof(IPAddress), Create<IPAddress>()); _cachee.Add(typeof(IPAddress), Create<IPAddress>());
_cachee.Add(typeof(char[]), Create<char[]>());
_cachee.Add(typeof(Boolean[]), Create<Boolean[]>());
_cachee.Add(typeof(Byte[][]), Create<Byte[][]>());
_cachee.Add(typeof(Int32[]), Create<Int32[]>());
_cachee.Add(typeof(UInt32[]), Create<UInt32[]>());
_cachee.Add(typeof(Int64[]), Create<Int64[]>());
_cachee.Add(typeof(UInt64[]), Create<UInt64[]>());
_cachee.Add(typeof(Double[]), Create<Double[]>());
_cachee.Add(typeof(float[]), Create<float[]>());
_cachee.Add(typeof(short[]), Create<short[]>());
_cachee.Add(typeof(ushort[]), Create<ushort[]>());
_cachee.Add(typeof(Decimal[]), Create<Decimal[]>());
_cachee.Add(typeof(DateTime[]), Create<DateTime[]>());
_cachee.Add(typeof(Guid[]), Create<Guid[]>());
_cachee.Add(typeof(String[]), Create<String[]>());
_cachee.Add(typeof(TimeSpan[]), Create<TimeSpan[]>());
_cachee.Add(typeof(IPEndPoint[]), Create<IPEndPoint[]>());
_cachee.Add(typeof(IPAddress[]), Create<IPAddress[]>());
_cachee.Add(typeof(IEnumerable<char>), Create<IEnumerable<char>>()); _cachee.Add(typeof(IEnumerable<char>), Create<IEnumerable<char>>());
_cachee.Add(typeof(IEnumerable<Boolean>), Create<IEnumerable<Boolean>>()); _cachee.Add(typeof(IEnumerable<Boolean>), Create<IEnumerable<Boolean>>());
_cachee.Add(typeof(IEnumerable<Byte>), Create<IEnumerable<Byte>>()); _cachee.Add(typeof(IEnumerable<Byte>), Create<IEnumerable<Byte>>());
@ -89,6 +109,25 @@ namespace ZeroLevel.Services.Serialization
_cachee.Add(typeof(IEnumerable<IPEndPoint>), Create<IEnumerable<IPEndPoint>>()); _cachee.Add(typeof(IEnumerable<IPEndPoint>), Create<IEnumerable<IPEndPoint>>());
_cachee.Add(typeof(IEnumerable<IPAddress>), Create<IEnumerable<IPAddress>>()); _cachee.Add(typeof(IEnumerable<IPAddress>), Create<IEnumerable<IPAddress>>());
_arrayTypesCachee.Add(typeof(char), typeof(char[]));
_arrayTypesCachee.Add(typeof(Boolean), typeof(Boolean[]));
_arrayTypesCachee.Add(typeof(Byte[]), typeof(Byte[][]));
_arrayTypesCachee.Add(typeof(Int32), typeof(Int32[]));
_arrayTypesCachee.Add(typeof(UInt32), typeof(UInt32[]));
_arrayTypesCachee.Add(typeof(Int64), typeof(Int64[]));
_arrayTypesCachee.Add(typeof(UInt64), typeof(UInt64[]));
_arrayTypesCachee.Add(typeof(Double), typeof(Double[]));
_arrayTypesCachee.Add(typeof(float), typeof(float[]));
_arrayTypesCachee.Add(typeof(short), typeof(short[]));
_arrayTypesCachee.Add(typeof(ushort), typeof(ushort[]));
_arrayTypesCachee.Add(typeof(Decimal), typeof(Decimal[]));
_arrayTypesCachee.Add(typeof(DateTime), typeof(DateTime[]));
_arrayTypesCachee.Add(typeof(Guid), typeof(Guid[]));
_arrayTypesCachee.Add(typeof(String), typeof(String[]));
_arrayTypesCachee.Add(typeof(TimeSpan), typeof(TimeSpan[]));
_arrayTypesCachee.Add(typeof(IPEndPoint), typeof(IPEndPoint[]));
_arrayTypesCachee.Add(typeof(IPAddress), typeof(IPAddress[]));
_enumTypesCachee.Add(typeof(char), typeof(IEnumerable<char>)); _enumTypesCachee.Add(typeof(char), typeof(IEnumerable<char>));
_enumTypesCachee.Add(typeof(Boolean), typeof(IEnumerable<Boolean>)); _enumTypesCachee.Add(typeof(Boolean), typeof(IEnumerable<Boolean>));
_enumTypesCachee.Add(typeof(Byte), typeof(IEnumerable<Byte>)); _enumTypesCachee.Add(typeof(Byte), typeof(IEnumerable<Byte>));
@ -210,102 +249,200 @@ namespace ZeroLevel.Services.Serialization
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), "WriteTimeSpan").First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), "WriteTimeSpan").First();
} }
// //
// Arrays
//
else if (type == typeof(Int32[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadInt32Array").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(char[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadCharArray").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(UInt32[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadUInt32Array").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(Boolean[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadBooleanArray").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(Byte[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadByteArray").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(Byte[][]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadByteArrayArray").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(DateTime[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadDateTimeArray").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(Double[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadDoubleArray").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(float[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadFloatArray").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(Guid[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadGuidArray").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(IPAddress[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadIPArray").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(IPEndPoint[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadIPEndPointArray").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(Int64[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadInt64Array").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(UInt64[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadUInt64Array").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(Int16[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadShortArray").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(UInt16[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadUShortArray").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(String[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadStringArray").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(Decimal[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadDecimalArray").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
else if (type == typeof(TimeSpan[]))
{
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadTimeSpanArray").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateArrayPredicate<Tw>()).First();
}
//
// Collections // Collections
// //
else if (type == typeof(IEnumerable<Int32>)) else if (type == typeof(IEnumerable<Int32>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadInt32Collection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadInt32Collection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<char>)) else if (type == typeof(IEnumerable<char>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadCharCollection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadCharCollection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<UInt32>)) else if (type == typeof(IEnumerable<UInt32>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadUInt32Collection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadUInt32Collection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<Boolean>)) else if (type == typeof(IEnumerable<Boolean>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadBooleanCollection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadBooleanCollection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<Byte>)) else if (type == typeof(IEnumerable<Byte>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadByteCollection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadByteCollection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<Byte[]>)) else if (type == typeof(IEnumerable<Byte[]>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadByteArrayCollection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadByteArrayCollection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<DateTime>)) else if (type == typeof(IEnumerable<DateTime>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadDateTimeCollection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadDateTimeCollection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<Double>)) else if (type == typeof(IEnumerable<Double>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadDoubleCollection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadDoubleCollection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<float>)) else if (type == typeof(IEnumerable<float>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadFloatCollection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadFloatCollection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<Guid>)) else if (type == typeof(IEnumerable<Guid>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadGuidCollection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadGuidCollection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<IPAddress>)) else if (type == typeof(IEnumerable<IPAddress>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadIPCollection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadIPCollection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<IPEndPoint>)) else if (type == typeof(IEnumerable<IPEndPoint>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadIPEndPointCollection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadIPEndPointCollection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<Int64>)) else if (type == typeof(IEnumerable<Int64>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadInt64Collection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadInt64Collection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<UInt64>)) else if (type == typeof(IEnumerable<UInt64>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadUInt64Collection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadUInt64Collection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<Int16>)) else if (type == typeof(IEnumerable<Int16>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadShortCollection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadShortCollection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<UInt16>)) else if (type == typeof(IEnumerable<UInt16>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadUShortCollection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadUShortCollection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<String>)) else if (type == typeof(IEnumerable<String>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadStringCollection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadStringCollection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<Decimal>)) else if (type == typeof(IEnumerable<Decimal>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadDecimalCollection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadDecimalCollection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
else if (type == typeof(IEnumerable<TimeSpan>)) else if (type == typeof(IEnumerable<TimeSpan>))
{ {
wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadTimeSpanCollection").First(); wrapper.ReadId = wrapper.Invoker.Configure(typeof(MemoryStreamReader), "ReadTimeSpanCollection").First();
wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreatePredicate<Tw>()).First(); wrapper.WriteId = wrapper.Invoker.Configure(typeof(MemoryStreamWriter), CreateCollectionPredicate<Tw>()).First();
} }
// //
// Not supported // Not supported
@ -317,13 +454,20 @@ namespace ZeroLevel.Services.Serialization
return wrapper; return wrapper;
} }
private static Func<MethodInfo, bool> CreatePredicate<T>() private static Func<MethodInfo, bool> CreateCollectionPredicate<T>()
{ {
var typeArg = typeof(T).GetGenericArguments().First(); var typeArg = typeof(T).GetGenericArguments().First();
return mi => mi.Name.Equals("WriteCollection", StringComparison.Ordinal) && return mi => mi.Name.Equals("WriteCollection", StringComparison.Ordinal) &&
mi.GetParameters().First().ParameterType.GetGenericArguments().First().IsAssignableFrom(typeArg); mi.GetParameters().First().ParameterType.GetGenericArguments().First().IsAssignableFrom(typeArg);
} }
private static Func<MethodInfo, bool> CreateArrayPredicate<T>()
{
var typeArg = typeof(T).GetElementType();
return mi => mi.Name.Equals("WriteArray", StringComparison.Ordinal) &&
mi.GetParameters().First().ParameterType.GetElementType().IsAssignableFrom(typeArg);
}
private readonly static Dictionary<Type, Wrapper> _concrete_type_cachee = new Dictionary<Type, Wrapper>(); private readonly static Dictionary<Type, Wrapper> _concrete_type_cachee = new Dictionary<Type, Wrapper>();
private readonly static object _concrete_type_cachee_locker = new object(); private readonly static object _concrete_type_cachee_locker = new object();
@ -348,25 +492,31 @@ namespace ZeroLevel.Services.Serialization
else if (TypeHelpers.IsAssignableToGenericType(type, typeof(IEnumerable<>))) else if (TypeHelpers.IsAssignableToGenericType(type, typeof(IEnumerable<>)))
{ {
Type elementType; Type elementType;
var dict = _enumTypesCachee;
var writeName = "WriteCollection";
var readName = "ReadCollection";
if (TypeHelpers.IsArray(type)) if (TypeHelpers.IsArray(type))
{ {
elementType = type.GetElementType(); elementType = type.GetElementType();
dict = _arrayTypesCachee;
writeName = "WriteArray";
readName = "ReadArray";
} }
else else
{ {
elementType = type.GetGenericArguments().First(); elementType = type.GetGenericArguments().First();
} }
if (_enumTypesCachee.ContainsKey(elementType)) if (dict.ContainsKey(elementType))
{ {
_concrete_type_cachee[type] = _cachee[_enumTypesCachee[elementType]]; _concrete_type_cachee[type] = _cachee[dict[elementType]];
} }
else if (typeof(IBinarySerializable).IsAssignableFrom(elementType)) else if (typeof(IBinarySerializable).IsAssignableFrom(elementType))
{ {
var wrapper = new Wrapper { Invoker = InvokeWrapper.Create() }; var wrapper = new Wrapper { Invoker = InvokeWrapper.Create() };
wrapper.ReadId = wrapper.Invoker.ConfigureGeneric(typeof(MemoryStreamReader), elementType, "ReadCollection").First(); wrapper.ReadId = wrapper.Invoker.ConfigureGeneric(typeof(MemoryStreamReader), elementType, readName).First();
wrapper.WriteId = wrapper.Invoker.ConfigureGeneric(typeof(MemoryStreamWriter), elementType, wrapper.WriteId = wrapper.Invoker.ConfigureGeneric(typeof(MemoryStreamWriter), elementType,
mi => mi.Name.Equals("WriteCollection") && mi.IsGenericMethod).First(); mi => mi.Name.Equals(writeName) && mi.IsGenericMethod).First();
_concrete_type_cachee[type] = wrapper; _concrete_type_cachee[type] = wrapper;
} }
} }

Loading…
Cancel
Save

Powered by TurnKey Linux.