From ba99c9a13497166ffe2c7f0fbe4587868d7d6ba5 Mon Sep 17 00:00:00 2001 From: "a.bozhenov" Date: Mon, 27 May 2019 21:50:34 +0300 Subject: [PATCH] Added IDXReader, SoftMax function --- ZeroLevel.Discovery/Program.cs | 1 - ZeroLevel.Discovery/app.config | 2 + ZeroLevel/Services/Formats/IDX/IDXDataType.cs | 12 ++ ZeroLevel/Services/Formats/IDX/IDXIndex.cs | 36 +++++ ZeroLevel/Services/Formats/IDX/IDXReader.cs | 150 ++++++++++++++++++ ZeroLevel/Services/Logging/Log.cs | 2 +- ZeroLevel/Services/Math/SoftMax.cs | 22 +++ 7 files changed, 223 insertions(+), 2 deletions(-) create mode 100644 ZeroLevel/Services/Formats/IDX/IDXDataType.cs create mode 100644 ZeroLevel/Services/Formats/IDX/IDXIndex.cs create mode 100644 ZeroLevel/Services/Formats/IDX/IDXReader.cs create mode 100644 ZeroLevel/Services/Math/SoftMax.cs diff --git a/ZeroLevel.Discovery/Program.cs b/ZeroLevel.Discovery/Program.cs index d9c28d2..e316dcf 100644 --- a/ZeroLevel.Discovery/Program.cs +++ b/ZeroLevel.Discovery/Program.cs @@ -4,7 +4,6 @@ { private static void Main(string[] args) { - Log.AddConsoleLogger(Services.Logging.LogLevel.System | Services.Logging.LogLevel.FullDebug); Bootstrap.Startup(args); } } diff --git a/ZeroLevel.Discovery/app.config b/ZeroLevel.Discovery/app.config index bc53553..182e8d5 100644 --- a/ZeroLevel.Discovery/app.config +++ b/ZeroLevel.Discovery/app.config @@ -2,5 +2,7 @@ + + \ No newline at end of file diff --git a/ZeroLevel/Services/Formats/IDX/IDXDataType.cs b/ZeroLevel/Services/Formats/IDX/IDXDataType.cs new file mode 100644 index 0000000..0c83c4c --- /dev/null +++ b/ZeroLevel/Services/Formats/IDX/IDXDataType.cs @@ -0,0 +1,12 @@ +namespace ZeroLevel.Services.Formats.IDX +{ + public enum IDXDataType + { + UNSIGNED_BYTE, + SIGNED_BYTE, + SHORT, // 2bytes + INT, // 4bytes + FLOAT, // 4bytes + DOUBLE // 8bytes + } +} diff --git a/ZeroLevel/Services/Formats/IDX/IDXIndex.cs b/ZeroLevel/Services/Formats/IDX/IDXIndex.cs new file mode 100644 index 0000000..7f46ce2 --- /dev/null +++ b/ZeroLevel/Services/Formats/IDX/IDXIndex.cs @@ -0,0 +1,36 @@ +namespace ZeroLevel.Services.Formats.IDX +{ + public class IDXIndex + { + private readonly int[] _measures; + public int[] Cursor { get; private set; } + + public IDXIndex(int[] measures) + { + _measures = measures; + Cursor = new int[_measures.Length]; + Cursor[Cursor.Length - 1] = -1; + } + + public bool MoveNext() + { + Cursor[Cursor.Length - 1]++; + for (int i = Cursor.Length - 1; i >= 0; i--) + { + if (Cursor[i] >= _measures[i]) + { + Cursor[i] = 0; + if (i > 0) + { + Cursor[i - 1]++; + } + } + else + { + return true; + } + } + return false; + } + } +} diff --git a/ZeroLevel/Services/Formats/IDX/IDXReader.cs b/ZeroLevel/Services/Formats/IDX/IDXReader.cs new file mode 100644 index 0000000..b3bfd4d --- /dev/null +++ b/ZeroLevel/Services/Formats/IDX/IDXReader.cs @@ -0,0 +1,150 @@ +using System; +using System.Collections.Generic; +using System.IO; +using ZeroLevel.Services.Serialization; + +namespace ZeroLevel.Services.Formats.IDX +{ + /* + The basic format is + + magic number + size in dimension 0 + size in dimension 1 + size in dimension 2 + ..... + size in dimension N + data + + The magic number is an integer (MSB first). The first 2 bytes are always 0. + + The third byte codes the type of the data: + 0x08: unsigned byte + 0x09: signed byte + 0x0B: short (2 bytes) + 0x0C: int (4 bytes) + 0x0D: float (4 bytes) + 0x0E: double (8 bytes) + + The 4-th byte codes the number of dimensions of the vector/matrix: 1 for vectors, 2 for matrices.... + The sizes in each dimension are 4-byte integers (MSB first, high endian, like in most non-Intel processors). + The data is stored like in a C array, i.e. the index in the last dimension changes the fastest. + + */ + public class IDXReader + : IDisposable + { + public int DimensionsCount { get; private set; } + public int[] DimentionMeasures { get; private set; } + public IDXDataType DataType { get; private set; } + private IDXIndex _index; + private readonly MemoryStreamReader _reader; + + public int[] CurrentIndex => _index.Cursor; + + public IDXReader(string filePath) + { + _reader = new MemoryStreamReader(new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)); + _reader.ReverseByteOrder(true); + // Header + // skip zero bytes + _reader.ReadByte(); + _reader.ReadByte(); + // read data type + switch (_reader.ReadByte()) + { + case 0x08: + DataType = IDXDataType.UNSIGNED_BYTE; + break; + case 0x09: + DataType = IDXDataType.SIGNED_BYTE; + break; + case 0x0B: + DataType = IDXDataType.SHORT; + break; + case 0x0C: + DataType = IDXDataType.INT; + break; + case 0x0D: + DataType = IDXDataType.FLOAT; + break; + case 0x0E: + DataType = IDXDataType.DOUBLE; + break; + } + // read dimensions count + DimensionsCount = _reader.ReadByte(); + DimentionMeasures = new int[DimensionsCount]; + for (int i = 0; i < DimensionsCount; i++) + { + DimentionMeasures[i] = _reader.ReadInt32(); + } + _index = new IDXIndex(DimentionMeasures); + } + + public IEnumerable ReadUnsignedBytes() + { + if (DataType != IDXDataType.UNSIGNED_BYTE) + throw new InvalidOperationException($"Wrong data type read. File datatype: {DataType}"); + while (_index.MoveNext()) + { + yield return _reader.ReadByte(); + } + } + + public IEnumerable ReadSignedBytes() + { + if (DataType != IDXDataType.SIGNED_BYTE) + throw new InvalidOperationException($"Wrong data type read. File datatype: {DataType}"); + while (_index.MoveNext()) + { + yield return unchecked((sbyte)_reader.ReadByte()); + } + } + + public IEnumerable ReadShorts() + { + if (DataType != IDXDataType.SHORT) + throw new InvalidOperationException($"Wrong data type read. File datatype: {DataType}"); + while (_index.MoveNext()) + { + yield return BitConverter.ToInt16(_reader.ReadBuffer(2), 0); + } + } + + public IEnumerable ReadInts() + { + if (DataType != IDXDataType.INT) + throw new InvalidOperationException($"Wrong data type read. File datatype: {DataType}"); + while (_index.MoveNext()) + { + yield return _reader.ReadInt32(); + } + } + + public IEnumerable ReadFloats() + { + if (DataType != IDXDataType.FLOAT) + throw new InvalidOperationException($"Wrong data type read. File datatype: {DataType}"); + while (_index.MoveNext()) + { + yield return _reader.ReadFloat(); + } + } + + public IEnumerable ReadDoubles() + { + if (DataType != IDXDataType.DOUBLE) + throw new InvalidOperationException($"Wrong data type read. File datatype: {DataType}"); + while (_index.MoveNext()) + { + yield return _reader.ReadDouble(); + } + } + + public void Dispose() + { + _reader.Dispose(); + } + } +} diff --git a/ZeroLevel/Services/Logging/Log.cs b/ZeroLevel/Services/Logging/Log.cs index 7fb15a7..7bf6675 100644 --- a/ZeroLevel/Services/Logging/Log.cs +++ b/ZeroLevel/Services/Logging/Log.cs @@ -200,7 +200,7 @@ namespace ZeroLevel { if (config.FirstOrDefault("console", false)) { - AddConsoleLogger(); + AddConsoleLogger(LogLevel.System | LogLevel.FullDebug); } if (config.Contains("log")) { diff --git a/ZeroLevel/Services/Math/SoftMax.cs b/ZeroLevel/Services/Math/SoftMax.cs new file mode 100644 index 0000000..ac8d1de --- /dev/null +++ b/ZeroLevel/Services/Math/SoftMax.cs @@ -0,0 +1,22 @@ +using System; + +namespace ZeroLevel.Services.Mathematic +{ + public static class SoftMax + { + public static double[] Compute(double[] vector) + { + double sum = 0; + for (int i = 0; i < vector.Length; i++) + { + sum += Math.Exp(vector[i]); + } + double[] result = new double[vector.Length]; + for (int i = 0; i < vector.Length; i++) + { + result[i] = Math.Exp(vector[i]) / sum; + } + return result; + } + } +}