From f609e6354f65d262f2e659725b3ffb8aa2e46bbb Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 21 Mar 2022 15:02:17 +0300 Subject: [PATCH] Update NN repo Added new NN tools Added wrappers for: DBFace, FaceNet, ArcFace Improved metrics class --- .../Architectures/FaceDetectors/DBFace.cs | 112 ++++++++ .../FaceRecognition/ArcFaceNet.cs | 64 +++++ .../Architectures/FaceRecognition/FaceNet.cs | 40 +++ .../Architectures/FaceSeacrhService.cs | 166 ++++++++++++ ZeroLevel.NN/Architectures/IEncoder.cs | 14 + ZeroLevel.NN/Architectures/IFaceDetector.cs | 10 + ZeroLevel.NN/Examples/tSNE.txt | 75 ++++++ ZeroLevel.NN/Models/Face.cs | 141 ++++++++++ ZeroLevel.NN/Models/FaceEmbedding.cs | 26 ++ ZeroLevel.NN/Models/FacePoint.cs | 26 ++ .../Models/ImagePreprocessorCropOptions.cs | 37 +++ .../Models/ImagePreprocessorOptions.cs | 115 ++++++++ ZeroLevel.NN/Models/Landmarks.cs | 121 +++++++++ ZeroLevel.NN/Models/OffsetBox.cs | 19 ++ ZeroLevel.NN/Models/PredictionInput.cs | 11 + ZeroLevel.NN/Models/PredictorChannelType.cs | 8 + ZeroLevel.NN/Services/AnchorsGenerator.cs | 227 ++++++++++++++++ ZeroLevel.NN/Services/CommonHelper.cs | 33 +++ ZeroLevel.NN/Services/ImagePreprocessor.cs | 253 ++++++++++++++++++ ZeroLevel.NN/Services/SSDNN.cs | 66 +++++ ZeroLevel.NN/ZeroLevel.NN.csproj | 20 ++ ZeroLevel.sln | 26 +- .../Services/Mathemathics}/Metrics.cs | 106 +++++++- 23 files changed, 1696 insertions(+), 20 deletions(-) create mode 100644 ZeroLevel.NN/Architectures/FaceDetectors/DBFace.cs create mode 100644 ZeroLevel.NN/Architectures/FaceRecognition/ArcFaceNet.cs create mode 100644 ZeroLevel.NN/Architectures/FaceRecognition/FaceNet.cs create mode 100644 ZeroLevel.NN/Architectures/FaceSeacrhService.cs create mode 100644 ZeroLevel.NN/Architectures/IEncoder.cs create mode 100644 ZeroLevel.NN/Architectures/IFaceDetector.cs create mode 100644 ZeroLevel.NN/Examples/tSNE.txt create mode 100644 ZeroLevel.NN/Models/Face.cs create mode 100644 ZeroLevel.NN/Models/FaceEmbedding.cs create mode 100644 ZeroLevel.NN/Models/FacePoint.cs create mode 100644 ZeroLevel.NN/Models/ImagePreprocessorCropOptions.cs create mode 100644 ZeroLevel.NN/Models/ImagePreprocessorOptions.cs create mode 100644 ZeroLevel.NN/Models/Landmarks.cs create mode 100644 ZeroLevel.NN/Models/OffsetBox.cs create mode 100644 ZeroLevel.NN/Models/PredictionInput.cs create mode 100644 ZeroLevel.NN/Models/PredictorChannelType.cs create mode 100644 ZeroLevel.NN/Services/AnchorsGenerator.cs create mode 100644 ZeroLevel.NN/Services/CommonHelper.cs create mode 100644 ZeroLevel.NN/Services/ImagePreprocessor.cs create mode 100644 ZeroLevel.NN/Services/SSDNN.cs create mode 100644 ZeroLevel.NN/ZeroLevel.NN.csproj rename {ZeroLevel.HNSW/Utils => ZeroLevel/Services/Mathemathics}/Metrics.cs (70%) diff --git a/ZeroLevel.NN/Architectures/FaceDetectors/DBFace.cs b/ZeroLevel.NN/Architectures/FaceDetectors/DBFace.cs new file mode 100644 index 0000000..db6d4fc --- /dev/null +++ b/ZeroLevel.NN/Architectures/FaceDetectors/DBFace.cs @@ -0,0 +1,112 @@ +using Microsoft.ML.OnnxRuntime.Tensors; +using SixLabors.ImageSharp; +using ZeroLevel.NN.Models; + +//https://github.com/iwatake2222/play_with_tflite/blob/master/pj_tflite_face_dbface/image_processor/face_detection_engine.cpp + +namespace ZeroLevel.NN +{ + public sealed class DBFace + : SSDNN , IFaceDetector + { + private const int STRIDE = 4; + private const int INPUT_WIDTH = 1216; + private const int INPUT_HEIGHT = 960; + private const float THESHOLD = 0.4f; + private const float IOU_THESHOLD = 0.6f; + private static float[] MEAN = new[] { 0.408f, 0.447f, 0.47f }; + private static float[] STD = new[] { 0.289f, 0.274f, 0.278f }; + + public DBFace(string model_path) + :base(model_path) + { + } + + private static float exp(float v) + { + var gate = 1.0f; + var _base = Math.Exp(gate); + if (Math.Abs(v) < gate) + return (float)(v * _base); + if (v > 0) + { + return (float)Math.Exp(v); + } + return (float)-Math.Exp(-v); + } + + private static FacePoint Landmark(float cx, float cy, + float x, float y, + float scale_w, float scale_h) + { + var p = new FacePoint(); + p.X = (exp(x * 4) + cx) * STRIDE * scale_w; + p.Y = (exp(y * 4) + cy) * STRIDE * scale_h; + return p; + } + + private List Parse(Tensor hm, + Tensor boxes, Tensor landmarks, + int width, int height) + { + float x, y, r, b; + float scale_w = width / (float)(INPUT_WIDTH); + float scale_h = height / (float)(INPUT_HEIGHT); + List bbox_list = new List(); + for (int cx = 0; cx < hm.Dimensions[3]; cx++) + { + for (int cy = 0; cy < hm.Dimensions[2]; cy++) + { + float score = hm[0, 0, cy, cx]; + if (score >= THESHOLD) + { + x = boxes[0, 0, cy, cx]; + y = boxes[0, 1, cy, cx]; + r = boxes[0, 2, cy, cx]; + b = boxes[0, 3, cy, cx]; + + x = (cx - x) * STRIDE; + y = (cy - y) * STRIDE; + r = (cx + r) * STRIDE; + b = (cy + b) * STRIDE; + + var bbox = new Face(); + bbox.X1 = (int)(x * scale_w); + bbox.Y1 = (int)(y * scale_h); + bbox.X2 = (int)(r * scale_w); + bbox.Y2 = (int)(b * scale_h); + bbox.Score = score; + + bbox.Landmarks.LeftEye = Landmark(cx, cy, landmarks[0, 0, cy, cx], landmarks[0, 5, cy, cx], scale_w, scale_h); + bbox.Landmarks.RightEye = Landmark(cx, cy, landmarks[0, 1, cy, cx], landmarks[0, 6, cy, cx], scale_w, scale_h); + bbox.Landmarks.Nose = Landmark(cx, cy, landmarks[0, 2, cy, cx], landmarks[0, 7, cy, cx], scale_w, scale_h); + bbox.Landmarks.LeftMouth = Landmark(cx, cy, landmarks[0, 3, cy, cx], landmarks[0, 8, cy, cx], scale_w, scale_h); + bbox.Landmarks.RightMouth = Landmark(cx, cy, landmarks[0, 4, cy, cx], landmarks[0, 9, cy, cx], scale_w, scale_h); + + bbox_list.Add(bbox); + } + } + } + return bbox_list; + } + + public IList Predict(Image image) + { + var input = MakeInput(image, + new ImagePreprocessorOptions(INPUT_WIDTH, INPUT_HEIGHT, PredictorChannelType.ChannelFirst) + .ApplyNormilization() + .ApplyCorrection(MEAN, STD) + .ApplyAxeInversion()); + List result = null; + Extract(new Dictionary> { { "input", input } }, output => + { + var hm = output["hm"]; + var boxes = output["boxes"]; + var landmark = output["landmarks"]; + result = Parse(hm, boxes, landmark, image.Width, image.Height); + }); + var cleaned_result = Face.Nms(result, IOU_THESHOLD, false); + return cleaned_result; + } + } +} diff --git a/ZeroLevel.NN/Architectures/FaceRecognition/ArcFaceNet.cs b/ZeroLevel.NN/Architectures/FaceRecognition/ArcFaceNet.cs new file mode 100644 index 0000000..6d4fb21 --- /dev/null +++ b/ZeroLevel.NN/Architectures/FaceRecognition/ArcFaceNet.cs @@ -0,0 +1,64 @@ +using Microsoft.ML.OnnxRuntime.Tensors; +using SixLabors.ImageSharp; +using ZeroLevel.NN.Models; + +/* +INPUT + +Image, name: data, shape: 1, 3, 112, 112, format: B, C, H, W, where: +B - batch size +C - channel +H - height +W - width +Channel order is BGR. + +OUTPUT + +Face embeddings, name: fc1, shape: 1, 512, output data format: B, C, where: +B - batch size +C - row-vector of 512 floating points values, face embeddings + +INPUT NORMALIZATION +img -= 127.5 +img /= 128 + +OUTPUT NORMALIZATION +NORM - vector length = 1 + */ + +namespace ZeroLevel.NN +{ + public sealed class ArcFaceNet + : SSDNN, IEncoder + { + private const int INPUT_WIDTH = 112; + private const int INPUT_HEIGHT = 112; + public ArcFaceNet(string modelPath) + : base(modelPath) + { + } + + public int InputW => INPUT_WIDTH; + + public int InputH => INPUT_HEIGHT; + + public float[] Predict(Image image) + { + var input = MakeInput(image, + new ImagePreprocessorOptions(INPUT_WIDTH, INPUT_HEIGHT, PredictorChannelType.ChannelFirst) + .ApplyAxeInversion()); + return Predict(input); + } + + public float[] Predict(Tensor input) + { + float[] embedding = null; + Extract(new Dictionary> { { "data", input } }, d => + { + embedding = d.First().Value.ToArray(); + }); + Norm(embedding); + return embedding; + } + } +} diff --git a/ZeroLevel.NN/Architectures/FaceRecognition/FaceNet.cs b/ZeroLevel.NN/Architectures/FaceRecognition/FaceNet.cs new file mode 100644 index 0000000..448f5cb --- /dev/null +++ b/ZeroLevel.NN/Architectures/FaceRecognition/FaceNet.cs @@ -0,0 +1,40 @@ +using Microsoft.ML.OnnxRuntime.Tensors; +using SixLabors.ImageSharp; +using ZeroLevel.NN.Models; + +namespace ZeroLevel.NN +{ + public sealed class FaceNet + : SSDNN, IEncoder + { + private const int INPUT_WIDTH = 160; + private const int INPUT_HEIGHT = 160; + public FaceNet(string modelPath) + : base(modelPath) + { + } + + public int InputW => INPUT_WIDTH; + public int InputH => INPUT_HEIGHT; + + public float[] Predict(Image image) + { + var input = MakeInput(image, + new ImagePreprocessorOptions(INPUT_WIDTH, INPUT_HEIGHT, PredictorChannelType.ChannelFirst) + .ApplyCorrection((c,px) => (px / 127.5f) - 1f) + .ApplyAxeInversion()); + return Predict(input); + } + + public float[] Predict(Tensor input) + { + float[] embedding = null; + Extract(new Dictionary> { { "input.1", input } }, d => + { + embedding = d.First().Value.ToArray(); + }); + Norm(embedding); + return embedding; + } + } +} diff --git a/ZeroLevel.NN/Architectures/FaceSeacrhService.cs b/ZeroLevel.NN/Architectures/FaceSeacrhService.cs new file mode 100644 index 0000000..68f79d9 --- /dev/null +++ b/ZeroLevel.NN/Architectures/FaceSeacrhService.cs @@ -0,0 +1,166 @@ +using SixLabors.ImageSharp; +using SixLabors.ImageSharp.PixelFormats; +using SixLabors.ImageSharp.Processing; +using ZeroLevel.NN; +using ZeroLevel.NN.Models; + +namespace Zero.NN.Services +{ + public class FaceSeacrhService + { + private readonly IFaceDetector _detector; + private readonly IEncoder _encoder; + private readonly bool _useFaceAlign; + public FaceSeacrhService(IFaceDetector detector, IEncoder encoder, bool useFaceAlign = true) + { + _useFaceAlign = useFaceAlign; + _detector = detector; + _encoder = encoder; + } + + public static Image MakeEyesHorizontal(Image source, Face face) + { + // положение глаз для определения угла поворота + var leftEye = face.Landmarks.LeftEye; + var rightEye = face.Landmarks.RightEye; + var dY = rightEye.Y - leftEye.Y; + var dX = rightEye.X - leftEye.X; + // угол на который нужно повернуть изображение чтбы выравнять глаза + var ra = (float)Math.Atan2(dY, dX); + + // определить размеры и центр лица + var minX = face.Landmarks.Left(); + var minY = face.Landmarks.Top(); + var maxX = face.Landmarks.Right(); + var maxY = face.Landmarks.Bottom(); + + var centerFaceX = (maxX + minX) / 2.0f; + var centerFaceY = (maxY + minY) / 2.0f; + + // определить описывающий лицо прямоугольник с центром в centerFaceX;centerFaceY + var distanceX = face.X2 - face.X1; + var distanceY = face.Y2 - face.Y1; + + var dx = (face.X1 + distanceX / 2.0f) - centerFaceX; + var dy = (face.Y1 + distanceY / 2.0f) - centerFaceY; + + var x1 = face.X1 - dx; + var y1 = face.Y1 - dy; + var x2 = face.X2 - dx; + var y2 = face.Y2 - dy; + + // определить квадрат описывающий прямоугольник с лицом повернутый на 45 градусов + var radius = (float)Math.Sqrt((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)) / 2.0f; + x1 = centerFaceX - radius; + x2 = centerFaceX + radius; + y1 = centerFaceY - radius; + y2 = centerFaceY + radius; + + var cropDx = radius - distanceX / 2.0f; + var cropDy = radius - distanceY / 2.0f; + + using (var fullCrop = ImagePreprocessor.Crop(source, x1, y1, x2, y2)) + { + fullCrop.Mutate(img => img.Rotate((float)(-ra * (180.0f / Math.PI)), KnownResamplers.Bicubic)); + var crop = ImagePreprocessor.Crop(fullCrop, cropDx, cropDy, fullCrop.Width - cropDx, fullCrop.Height - cropDy); + crop.Mutate(img => img.Resize(112, 112, KnownResamplers.Bicubic)); + return crop; + } + } + private Image SpecialCrop(Image image, Face face) + { + var left = face.Landmarks.Left(); // 0.3 + var right = face.Landmarks.Right(); // 0.7 + var top = face.Landmarks.Top(); // 0.4 + var bottom = face.Landmarks.Bottom(); // 0.8 + + var newWidth = (right - left) / 0.4f; + var newHeight = (bottom - top) / 0.4f; + + // привести к квадрату ! + + var cx1 = left - (newWidth * 0.3f); + var cy1 = top - (newHeight * 0.4f); + var cx2 = cx1 + newWidth; + var cy2 = cy1 + newHeight; + + var clipX = new Func(x => + { + if (x < 0) return 0; + if (x > image.Width) return image.Width; + return x; + }); + var clipY = new Func(y => + { + if (y < 0) return 0; + if (y > image.Height) return image.Height; + return y; + }); + + cx1 = clipX(cx1); + cx2 = clipX(cx2); + cy1 = clipY(cy1); + cy2 = clipY(cy2); + + return ImagePreprocessor.Crop(image, cx1, cy1, cx2, cy2); + } + + public IEnumerable GetEmbeddings(Image image) + { + int width = image.Width; + int heigth = image.Height; + var faces = _detector.Predict(image); + foreach (var face in faces) + { + Face.FixInScreen(face, width, heigth); + float[] vector; + if (_useFaceAlign) + { + int x = (int)face.X1; + int y = (int)face.Y1; + int w = (int)(face.X2 - face.X1); + int h = (int)(face.Y2 - face.Y1); + var radius = (float)Math.Sqrt(w * w + h * h) / 2f; + var centerFaceX = (face.X2 + face.X1) / 2.0f; + var centerFaceY = (face.Y2 + face.Y1) / 2.0f; + var around_x1 = centerFaceX - radius; + var around_x2 = centerFaceX + radius; + var around_y1 = centerFaceY - radius; + var around_y2 = centerFaceY + radius; + using (var faceImage = ImagePreprocessor.Crop(image, around_x1, around_y1, around_x2, around_y2)) + { + var matrix = Face.GetTransformMatrix(face); + var builder = new AffineTransformBuilder(); + builder.AppendMatrix(matrix); + faceImage.Mutate(x => x.Transform(builder, KnownResamplers.Bicubic)); + vector = _encoder.Predict(faceImage); + /*var aligned_faces = detector.Predict(faceImage); + if (aligned_faces != null && aligned_faces.Count == 1) + { + using (var ci = SpecialCrop(faceImage, aligned_faces[0])) + { + vector = encoder.Predict(faceImage); + } + } + else + { + vector = encoder.Predict(faceImage); + }*/ + } + } + else + { + using (var faceImage = ImagePreprocessor.Crop(image, face.X1, face.Y1, face.X2, face.Y2)) + { + vector = _encoder.Predict(faceImage); + } + } + yield return new FaceEmbedding + { + Face = face, + Vector = vector + }; + } + } + } +} diff --git a/ZeroLevel.NN/Architectures/IEncoder.cs b/ZeroLevel.NN/Architectures/IEncoder.cs new file mode 100644 index 0000000..d332035 --- /dev/null +++ b/ZeroLevel.NN/Architectures/IEncoder.cs @@ -0,0 +1,14 @@ +using Microsoft.ML.OnnxRuntime.Tensors; +using SixLabors.ImageSharp; + +namespace ZeroLevel.NN +{ + public interface IEncoder + { + int InputW { get; } + int InputH { get; } + + float[] Predict(Image image); + float[] Predict(Tensor input); + } +} diff --git a/ZeroLevel.NN/Architectures/IFaceDetector.cs b/ZeroLevel.NN/Architectures/IFaceDetector.cs new file mode 100644 index 0000000..d65744e --- /dev/null +++ b/ZeroLevel.NN/Architectures/IFaceDetector.cs @@ -0,0 +1,10 @@ +using SixLabors.ImageSharp; +using ZeroLevel.NN.Models; + +namespace ZeroLevel.NN +{ + public interface IFaceDetector + { + IList Predict(Image image); + } +} diff --git a/ZeroLevel.NN/Examples/tSNE.txt b/ZeroLevel.NN/Examples/tSNE.txt new file mode 100644 index 0000000..fb2f0ff --- /dev/null +++ b/ZeroLevel.NN/Examples/tSNE.txt @@ -0,0 +1,75 @@ +public static void DrawTSNE(Dictionary cluster_map, List faces) + { + double[][] t_snefit_vectors = faces.Select(f => f.Vector.Select(e => (double)e).ToArray()).ToArray(); + TSNE tSNE = new TSNE() + { + NumberOfOutputs = 2, + Perplexity = 100 + }; + // Transform to a reduced dimensionality space + var embeddings = tSNE.Transform(t_snefit_vectors); + var xmin = double.MaxValue; + var xmax = double.MinValue; + var ymin = double.MaxValue; + var ymax = double.MinValue; + for (int i = 0; i < embeddings.Length; i++) + { + var lxmin = embeddings[i][0]; + var lxmax = embeddings[i][0]; + if (lxmin < xmin) + xmin = lxmin; + if (lxmax > xmax) + xmax = lxmax; + + var lymin = embeddings[i][1]; + var lymax = embeddings[i][1]; + if (lymin < ymin) + ymin = lymin; + if (lymax > ymax) + ymax = lymax; + } + + var norm_x_scale = 1.0f / (xmax - xmin); + var norm_y_scale = 1.0f / (xmax - xmin); + var xdiff = 0 - xmin; + var ydiff = 0 - ymin; + var centerx = (xmin + xmax) / 2.0f + xdiff; + var centery = (ymin + ymax) / 2.0f + ydiff; + + var width = 2560; + var height = 1440; + + var rnd = new Random((int)Environment.TickCount); + + var clusterIds = cluster_map.Values.Distinct().ToArray(); + var cluster_colors = new Dictionary(); + foreach (var cid in clusterIds) + { + var color = Color.FromRgb((byte)rnd.Next(0, 255), (byte)rnd.Next(0, 255), (byte)rnd.Next(0, 255)); + cluster_colors[cid] = color; + } + + using (var image = new Image(width, height)) + { + for (int i = 0; i < embeddings.Length; i++) + { + var cluster = cluster_map[i]; + var color = cluster_colors[cluster]; + + var x = (int)((embeddings[i][0] + xdiff + centerx) * norm_x_scale * width) - width / 2; + var y = (int)((embeddings[i][1] + ydiff + centery) * norm_y_scale * height) - height / 2; + image.Mutate(im => im.DrawLines( + color, + 4, + new PointF[] { + new PointF(x - 1, y - 1), + new PointF(x + 1, y - 1), + new PointF(x + 1, y + 1), + new PointF(x - 1, y + 1), + new PointF(x - 1, y - 1) + } + )); + } + image.SaveAsJpeg(@"G:\FaceTest\tsne.jpeg"); + } + } \ No newline at end of file diff --git a/ZeroLevel.NN/Models/Face.cs b/ZeroLevel.NN/Models/Face.cs new file mode 100644 index 0000000..3ea6549 --- /dev/null +++ b/ZeroLevel.NN/Models/Face.cs @@ -0,0 +1,141 @@ +using System.Numerics; +using Zero.NN.Models; +using ZeroLevel.Services.Serialization; + +namespace ZeroLevel.NN.Models +{ + public class Face + : IBinarySerializable + { + public float X1; + public float Y1; + public float X2; + public float Y2; + public float Score; + public Landmarks Landmarks = new Landmarks(); + + public float Area => Math.Abs(X2 - X1) * Math.Abs(Y2 - Y1); + + public static float CalculateIoU(Face obj0, Face obj1) + { + var interx0 = Math.Max(obj0.X1, obj1.X1); + var intery0 = Math.Max(obj0.Y1, obj1.Y1); + var interx1 = Math.Min(obj0.X2, obj1.X2); + var intery1 = Math.Min(obj0.Y2, obj1.Y2); + if (interx1 < interx0 || intery1 < intery0) return 0; + var area0 = obj0.Area; + var area1 = obj1.Area; + var areaInter = (interx1 - interx0) * (intery1 - intery0); + var areaSum = area0 + area1 - areaInter; + return (float)(areaInter) / areaSum; + } + public static void FixInScreen(Face bbox, int width, int height) + { + bbox.X1 = Math.Max(0, bbox.X1); + bbox.Y1 = Math.Max(0, bbox.Y1); + bbox.X2 = Math.Min(width, bbox.X2); + bbox.Y2 = Math.Min(height, bbox.Y2); + } + + public static List Nms(List bbox_original_list, float threshold_nms_iou, bool check_class_id) + { + var bbox_nms_list = new List(); + var bbox_list = bbox_original_list.OrderBy(b => b.Score).ToList(); + bool[] is_merged = new bool[bbox_list.Count]; + for (var i = 0; i < bbox_list.Count; i++) + { + is_merged[i] = false; + } + for (var index_high_score = 0; index_high_score < bbox_list.Count; index_high_score++) + { + var candidates = new List(); + if (is_merged[index_high_score]) continue; + + candidates.Add(bbox_list[index_high_score]); + for (var index_low_score = index_high_score + 1; index_low_score < bbox_list.Count; index_low_score++) + { + if (is_merged[index_low_score]) continue; + if (CalculateIoU(bbox_list[index_high_score], bbox_list[index_low_score]) > threshold_nms_iou) + { + candidates.Add(bbox_list[index_low_score]); + is_merged[index_low_score] = true; + } + } + bbox_nms_list.Add(candidates[0]); + } + return bbox_nms_list; + } + + // Normalizes a facial image to a standard size given by outSize. + // Normalization is done based on Dlib's landmark points passed as pointsIn + // After normalization, left corner of the left eye is at (0.3 * w, h/3 ) + // and right corner of the right eye is at ( 0.7 * w, h / 3) where w and h + // are the width and height of outSize. + public static Matrix3x2 GetTransformMatrix(Face face) + { + var w = face.X2 - face.X1; + var h = face.Y2 - face.Y1; + + var leftEyeSrc = new FacePoint((face.Landmarks.LeftEye.X - face.X1) / w, (face.Landmarks.LeftEye.Y - face.Y1) / h); + var rightEyeSrc = new FacePoint((face.Landmarks.RightEye.X - face.X1) / w, (face.Landmarks.RightEye.Y - face.Y1) / h); + + // Corners of the eye in normalized image + var leftEyeDst = new FacePoint(0.3f, 1.0f / 3.0f); + var rightEyeDst = new FacePoint(0.7f, 1.0f / 3.0f); + + return GetTransformMatrix(leftEyeSrc, rightEyeSrc, leftEyeDst, rightEyeDst); + } + + static Matrix3x2 GetTransformMatrix(FacePoint srcLeftEye, FacePoint srcRightEye, + FacePoint dstLeftEye, FacePoint dstRightEye) + { + var s60 = Math.Sin(60.0f * Math.PI / 180.0f); + var c60 = Math.Cos(60.0f * Math.PI / 180.0f); + + // The third point is calculated so that the three points make an equilateral triangle + var xin = c60 * (srcLeftEye.X - srcRightEye.X) - s60 * (srcLeftEye.Y - srcRightEye.Y) + srcRightEye.X; + var yin = s60 * (srcLeftEye.X - srcRightEye.X) + c60 * (srcLeftEye.Y - srcRightEye.Y) + srcRightEye.Y; + + var xout = c60 * (dstLeftEye.X - dstRightEye.X) - s60 * (dstLeftEye.Y - dstRightEye.Y) + dstRightEye.X; + var yout = s60 * (dstLeftEye.X - dstRightEye.X) + c60 * (dstLeftEye.Y - dstRightEye.Y) + dstRightEye.Y; + + System.Drawing.PointF[] source = { + new System.Drawing.PointF(srcLeftEye.X, srcLeftEye.Y), + new System.Drawing.PointF(srcRightEye.X, srcRightEye.Y), + new System.Drawing.PointF((float)xin, (float)yin) + }; + System.Drawing.PointF[] target = { + new System.Drawing.PointF(dstLeftEye.X, dstLeftEye.Y), + new System.Drawing.PointF(dstRightEye.X, dstRightEye.Y), + new System.Drawing.PointF((float)xout, (float)yout) + }; + Aurigma.GraphicsMill.Transforms.Matrix matrix = + Aurigma.GraphicsMill.Transforms.Matrix.CreateFromAffinePoints(source, target); + + return new Matrix3x2( + matrix.Elements[0], matrix.Elements[1], + matrix.Elements[3], matrix.Elements[4], + matrix.Elements[6], matrix.Elements[7]); + } + + public void Serialize(IBinaryWriter writer) + { + writer.WriteFloat(this.X1); + writer.WriteFloat(this.Y1); + writer.WriteFloat(this.X2); + writer.WriteFloat(this.Y2); + writer.WriteFloat(this.Score); + writer.Write(this.Landmarks); + } + + public void Deserialize(IBinaryReader reader) + { + this.X1 = reader.ReadFloat(); + this.Y1 = reader.ReadFloat(); + this.X2 = reader.ReadFloat(); + this.Y2 = reader.ReadFloat(); + this.Score = reader.ReadFloat(); + this.Landmarks = reader.Read(); + } + } +} diff --git a/ZeroLevel.NN/Models/FaceEmbedding.cs b/ZeroLevel.NN/Models/FaceEmbedding.cs new file mode 100644 index 0000000..3e7077c --- /dev/null +++ b/ZeroLevel.NN/Models/FaceEmbedding.cs @@ -0,0 +1,26 @@ +using ZeroLevel.Services.Serialization; + +namespace ZeroLevel.NN.Models +{ + public class FaceEmbedding + : IBinarySerializable + { + public Face Face; + public float[] Vector; + public string Tag; + + public void Deserialize(IBinaryReader reader) + { + this.Face = reader.Read(); + this.Vector = reader.ReadFloatArray(); + this.Tag = reader.ReadString(); + } + + public void Serialize(IBinaryWriter writer) + { + writer.Write(this.Face); + writer.WriteArray(this.Vector); + writer.WriteString(this.Tag); + } + } +} diff --git a/ZeroLevel.NN/Models/FacePoint.cs b/ZeroLevel.NN/Models/FacePoint.cs new file mode 100644 index 0000000..6262d5c --- /dev/null +++ b/ZeroLevel.NN/Models/FacePoint.cs @@ -0,0 +1,26 @@ +using ZeroLevel.Services.Serialization; + +namespace ZeroLevel.NN.Models +{ + public class FacePoint + : IBinarySerializable + { + public float X { get; set; } + public float Y { get; set; } + + public FacePoint() { } + public FacePoint(float x, float y) { X = x; Y = y; } + + public void Serialize(IBinaryWriter writer) + { + writer.WriteFloat(this.X); + writer.WriteFloat(this.Y); + } + + public void Deserialize(IBinaryReader reader) + { + this.X = reader.ReadFloat(); + this.Y = reader.ReadFloat(); + } + } +} diff --git a/ZeroLevel.NN/Models/ImagePreprocessorCropOptions.cs b/ZeroLevel.NN/Models/ImagePreprocessorCropOptions.cs new file mode 100644 index 0000000..caa8ce4 --- /dev/null +++ b/ZeroLevel.NN/Models/ImagePreprocessorCropOptions.cs @@ -0,0 +1,37 @@ +namespace ZeroLevel.NN.Models +{ + /// + /// Crop options + /// + public class ImagePreprocessorCropOptions + { + /// + /// Use split original image to crops + /// + public bool Enabled { get; set; } = false; + /// + /// Put resized original image to batch + /// + public bool SaveOriginal { get; set; } + /// + /// Crop width + /// + public int Width { get; set; } + /// + /// Crop height + /// + public int Height { get; set; } + /// + /// Overlap cropped parts + /// + public bool Overlap { get; set; } + /// + /// Overlap width koefficient (0 - 1) + /// + public float OverlapKoefWidth { get; set; } = 0.8f; + /// + /// Overlap height koefficient (0 - 1) + /// + public float OverlapKoefHeight { get; set; } = 0.8f; + } +} diff --git a/ZeroLevel.NN/Models/ImagePreprocessorOptions.cs b/ZeroLevel.NN/Models/ImagePreprocessorOptions.cs new file mode 100644 index 0000000..64c819c --- /dev/null +++ b/ZeroLevel.NN/Models/ImagePreprocessorOptions.cs @@ -0,0 +1,115 @@ +namespace ZeroLevel.NN.Models +{ + public class ImagePreprocessorOptions + { + public ImagePreprocessorOptions(int inputWidth, int inputHeight, PredictorChannelType channelType) + { + this.InputWidth = inputWidth; + this.InputHeight = inputHeight; + this.ChannelType = channelType; + } + + public ImagePreprocessorOptions UseCrop(int width, int height, bool saveOriginal, bool overlap) + { + Crop.Enabled = true; + Crop.Height = height; + Crop.Width = width; + Crop.Overlap = overlap; + Crop.SaveOriginal = saveOriginal; + return this; + } + + public ImagePreprocessorOptions ApplyNormilization() + { + this.Normalize = true; + return this; + } + + public ImagePreprocessorOptions ApplyAxeInversion() + { + this.InvertXY = true; + return this; + } + + public ImagePreprocessorOptions ApplyCorrection(float[] mean, float[] std) + { + if (this.Correction) + { + throw new InvalidOperationException("Correction setup already"); + } + this.Correction = true; + this.Mean = mean; + this.Std = std; + return this; + } + + public ImagePreprocessorOptions ApplyCorrection(Func correctionFunc) + { + if (this.Correction) + { + throw new InvalidOperationException("Correction setup already"); + } + this.Correction = true; + this.CorrectionFunc = correctionFunc; + return this; + } + + public ImagePreprocessorOptions UseBGR() + { + this.BGR = true; + return this; + } + + /// + /// Channel type, if first tensor dims = [batch_index, channel, x, y], if last, dims = dims = [batch_index, x, y, channel] + /// + public PredictorChannelType ChannelType { get; private set; } + /// + /// Ctop image options + /// + public ImagePreprocessorCropOptions Crop { get; } = new ImagePreprocessorCropOptions(); + /// + /// NN model input height + /// + public int InputHeight { get; private set; } + /// + /// NN model input width + /// + public int InputWidth { get; private set; } + /// + /// Transfrom pixel values to (0-1) range + /// + public bool Normalize { get; private set; } = false; + /// + /// Transform pixel value with mean/std values v=(v-mean)/std + /// + public bool Correction { get; private set; } = false; + /// + /// Mean values if Correction parameter is true + /// + + public Func CorrectionFunc { get; private set; } = null; + + public float[] Mean { get; private set; } + /// + /// Std values if Correction parameter is true + /// + public float[] Std { get; private set; } + /// + /// Put pixel values to tensor in BGR order + /// + public bool BGR { get; set; } = false; + /// + /// Invert width and height in input tensor + /// + public bool InvertXY { get; set; } = false; + /// + /// Channel count (auto calculate) + /// + public int Channels { get; set; } + /// + /// Maximum batch size, decrease if video memory overflow + /// + public int MaxBatchSize { get; set; } = 13; + } +} diff --git a/ZeroLevel.NN/Models/Landmarks.cs b/ZeroLevel.NN/Models/Landmarks.cs new file mode 100644 index 0000000..5ffdade --- /dev/null +++ b/ZeroLevel.NN/Models/Landmarks.cs @@ -0,0 +1,121 @@ +using ZeroLevel.NN.Models; +using ZeroLevel.Services.Serialization; + +namespace Zero.NN.Models +{ + public class Landmarks + : IBinarySerializable + { + public FacePoint RightEye; + public FacePoint LeftEye; + public FacePoint Nose; + public FacePoint RightMouth; + public FacePoint LeftMouth; + + public float Top() + { + var min = RightEye.Y; + if (LeftEye.Y < min) + { + min = LeftEye.Y; + } + if (Nose.Y < min) + { + min = Nose.Y; + } + if (RightMouth.Y < min) + { + min = RightMouth.Y; + } + if (LeftMouth.Y < min) + { + min = LeftMouth.Y; + } + return min; + } + + public float Bottom() + { + var max = RightEye.Y; + if (LeftEye.Y > max) + { + max = LeftEye.Y; + } + if (Nose.Y > max) + { + max = Nose.Y; + } + if (RightMouth.Y > max) + { + max = RightMouth.Y; + } + if (LeftMouth.Y > max) + { + max = LeftMouth.Y; + } + return max; + } + + public float Left() + { + var min = RightEye.X; + if (LeftEye.X < min) + { + min = LeftEye.X; + } + if (Nose.X < min) + { + min = Nose.X; + } + if (RightMouth.X < min) + { + min = RightMouth.X; + } + if (LeftMouth.X < min) + { + min = LeftMouth.X; + } + return min; + } + + public float Right() + { + var max = RightEye.X; + if (LeftEye.X > max) + { + max = LeftEye.X; + } + if (Nose.X > max) + { + max = Nose.X; + } + if (RightMouth.X > max) + { + max = RightMouth.X; + } + if (LeftMouth.X > max) + { + max = LeftMouth.X; + } + return max; + } + + public void Deserialize(IBinaryReader reader) + { + this.RightEye = reader.Read(); + this.LeftEye = reader.Read(); + this.Nose = reader.Read(); + this.RightMouth = reader.Read(); + this.LeftMouth = reader.Read(); + } + + public void Serialize(IBinaryWriter writer) + { + writer.Write(this.RightEye); + writer.Write(this.LeftEye); + writer.Write(this.Nose); + writer.Write(this.RightMouth); + writer.Write(this.LeftMouth); + } + } +} diff --git a/ZeroLevel.NN/Models/OffsetBox.cs b/ZeroLevel.NN/Models/OffsetBox.cs new file mode 100644 index 0000000..bfa25aa --- /dev/null +++ b/ZeroLevel.NN/Models/OffsetBox.cs @@ -0,0 +1,19 @@ +namespace ZeroLevel.NN.Models +{ + public class OffsetBox + { + public int X { get; set; } + public int Y { get; set; } + public int Width { get; set; } + public int Height { get; set; } + + public OffsetBox() { } + public OffsetBox(int x, int y, int w, int h) + { + X = x; + Y = y; + Width = w; + Height = h; + } + } +} diff --git a/ZeroLevel.NN/Models/PredictionInput.cs b/ZeroLevel.NN/Models/PredictionInput.cs new file mode 100644 index 0000000..438f42f --- /dev/null +++ b/ZeroLevel.NN/Models/PredictionInput.cs @@ -0,0 +1,11 @@ +using Microsoft.ML.OnnxRuntime.Tensors; + +namespace ZeroLevel.NN.Models +{ + public class PredictionInput + { + public Tensor Tensor; + public OffsetBox[] Offsets; + public int Count; + } +} diff --git a/ZeroLevel.NN/Models/PredictorChannelType.cs b/ZeroLevel.NN/Models/PredictorChannelType.cs new file mode 100644 index 0000000..b4cf276 --- /dev/null +++ b/ZeroLevel.NN/Models/PredictorChannelType.cs @@ -0,0 +1,8 @@ +namespace ZeroLevel.NN.Models +{ + public enum PredictorChannelType + { + ChannelFirst, + ChannelLast + } +} diff --git a/ZeroLevel.NN/Services/AnchorsGenerator.cs b/ZeroLevel.NN/Services/AnchorsGenerator.cs new file mode 100644 index 0000000..d0a2f24 --- /dev/null +++ b/ZeroLevel.NN/Services/AnchorsGenerator.cs @@ -0,0 +1,227 @@ +/* + +PORTS FROM https://github.com/hollance/BlazeFace-PyTorch/blob/master/Anchors.ipynb + + */ + +namespace Zero.NN.Services +{ + public class Anchor + { + public float cx; + public float cy; + public float w; + public float h; + } + + // Options to generate anchors for SSD object detection models. + public class AnchorOptions + { + // Number of output feature maps to generate the anchors on. + public int num_layers; + + // Min and max scales for generating anchor boxes on feature maps. + public float min_scale; + public float max_scale; + + // Size of input images. + public int input_size_height; + public int input_size_width; + + // The offset for the center of anchors. The value is in the scale of stride. + // E.g. 0.5 meaning 0.5 * |current_stride| in pixels. + public float anchor_offset_x = 0.5f; + public float anchor_offset_y = 0.5f; + + // Strides of each output feature maps + public int[] strides; + + // List of different aspect ratio to generate anchors + public float[] aspect_ratios; + + // A boolean to indicate whether the fixed 3 boxes per location is used in the lowest layer. + public bool reduce_boxes_in_lowest_layer = false; + + // An additional anchor is added with this aspect ratio and a scale + // interpolated between the scale for a layer and the scale for the next layer + // (1.0 for the last layer). This anchor is not included if this value is 0. + public float interpolated_scale_aspect_ratio = 1.0f; + + // Whether use fixed width and height (e.g. both 1.0f) for each anchor. + // This option can be used when the predicted anchor width and height are in + // pixels. + public bool fixed_anchor_size = false; + + #region PRESETS + public static AnchorOptions FaceDetectionBackMobileGpuOptions => new AnchorOptions + { + num_layers = 4, + min_scale = 0.15625f, + max_scale = 0.75f, + input_size_height = 256, + input_size_width = 256, + anchor_offset_x = 0.5f, + anchor_offset_y = 0.5f, + strides = new[] { 16, 32, 32, 32 }, + aspect_ratios = new[] { 1.0f }, + reduce_boxes_in_lowest_layer = false, + interpolated_scale_aspect_ratio = 1.0f, + fixed_anchor_size = true + }; + + public static AnchorOptions FaceDetectionMobileGpuOptions => new AnchorOptions + { + num_layers = 4, + min_scale = 0.1484375f, + max_scale = 0.75f, + input_size_height = 128, + input_size_width = 128, + anchor_offset_x = 0.5f, + anchor_offset_y = 0.5f, + strides = new[] { 8, 16, 16, 16 }, + aspect_ratios = new[] { 1.0f }, + reduce_boxes_in_lowest_layer = false, + interpolated_scale_aspect_ratio = 1.0f, + fixed_anchor_size = true + }; + + public static AnchorOptions MobileSSDOptions => new AnchorOptions + { + num_layers = 6, + min_scale = 0.2f, + max_scale = 0.95f, + input_size_height = 300, + input_size_width = 300, + anchor_offset_x = 0.5f, + anchor_offset_y = 0.5f, + strides = new[] { 16, 32, 64, 128, 256, 512 }, + aspect_ratios = new[] { 1.0f, 2.0f, 0.5f, 3.0f, 0.3333f }, + reduce_boxes_in_lowest_layer = true, + interpolated_scale_aspect_ratio = 1.0f, + fixed_anchor_size = false + }; + #endregion + } + + internal class AnchorsGenerator + { + private static float calculate_scale(float min_scale, float max_scale, float stride_index, float num_strides) + { + return (float)(min_scale + (max_scale - min_scale) * stride_index / (num_strides - 1.0f)); + } + + private readonly AnchorOptions _options; + private readonly List anchors = new List(); + + public IList Anchors => anchors; + + public AnchorsGenerator(AnchorOptions options) + { + if (options == null) + { + throw new ArgumentNullException(nameof(options)); + } + if (options.strides == null) + { + throw new ArgumentNullException(nameof(options.strides)); + } + _options = options; + Generate(); + } + + private void Generate() + { + var strides_size = _options.strides?.Length ?? 0; + if (_options.num_layers != strides_size) + { + throw new ArgumentException($"Expected {_options.num_layers} strides (as num_layer), got {strides_size} strides"); + } + var layer_id = 0; + while (layer_id < strides_size) + { + var anchor_height = new List(); + var anchor_width = new List(); + var aspect_ratios = new List(); + var scales = new List(); + + // For same strides, we merge the anchors in the same order. + var last_same_stride_layer = layer_id; + while ((last_same_stride_layer < strides_size) && (_options.strides[last_same_stride_layer] == _options.strides[layer_id])) + { + var scale = calculate_scale(_options.min_scale, _options.max_scale, last_same_stride_layer, strides_size); + + if (last_same_stride_layer == 0 && _options.reduce_boxes_in_lowest_layer) + { + // For first layer, it can be specified to use predefined anchors. + aspect_ratios.Add(1.0f); + aspect_ratios.Add(2.0f); + aspect_ratios.Add(0.5f); + scales.Add(0.1f); + scales.Add(scale); + scales.Add(scale); + } + else + { + foreach (var aspect_ratio in _options.aspect_ratios) + { + aspect_ratios.Add(aspect_ratio); + scales.Add(scale); + } + if (_options.interpolated_scale_aspect_ratio > 0.0f) + { + var scale_next = (last_same_stride_layer == (strides_size - 1)) + ? 1.0 + : calculate_scale(_options.min_scale, _options.max_scale, last_same_stride_layer + 1, strides_size); + scales.Add((float)Math.Sqrt(scale * scale_next)); + aspect_ratios.Add(_options.interpolated_scale_aspect_ratio); + } + } + last_same_stride_layer += 1; + } + + for (var i = 0; i < aspect_ratios.Count; i++) + { + var ratio_sqrts = (float)Math.Sqrt(aspect_ratios[i]); + anchor_height.Add(scales[i] / ratio_sqrts); + anchor_width.Add(scales[i] * ratio_sqrts); + } + + var stride = _options.strides[layer_id]; + var feature_map_height = (int)(Math.Ceiling((float)_options.input_size_height / stride)); + var feature_map_width = (int)(Math.Ceiling((float)_options.input_size_width / stride)); + + for (var y = 0; y < feature_map_height; y++) + { + for (var x = 0; x < feature_map_width; x++) + { + for (var anchor_id = 0; anchor_id < anchor_height.Count; anchor_id++) + { + var x_center = (x + _options.anchor_offset_x) / feature_map_width; + var y_center = (y + _options.anchor_offset_y) / feature_map_height; + + var anchor = new Anchor + { + cx = x_center, + cy = y_center, + w = 0f, + h = 0f + }; + if (_options.fixed_anchor_size) + { + anchor.w = 1.0f; + anchor.h = 1.0f; + } + else + { + anchor.w = anchor_width[anchor_id]; + anchor.h = anchor_height[anchor_id]; + } + anchors.Add(anchor); + } + } + } + layer_id = last_same_stride_layer; + } + } + } +} diff --git a/ZeroLevel.NN/Services/CommonHelper.cs b/ZeroLevel.NN/Services/CommonHelper.cs new file mode 100644 index 0000000..e927620 --- /dev/null +++ b/ZeroLevel.NN/Services/CommonHelper.cs @@ -0,0 +1,33 @@ +namespace Zero.NN.Services +{ + internal static class CommonHelper + { + public static float Sigmoid(float x) + { + if (x >= 0) + { + return 1.0f / (1.0f + (float)Math.Exp(-x)); + } + else + { + return (float)(Math.Exp(x) / (1.0f + Math.Exp(x))); + } + } + + public static float Logit(float x) + { + if (x == 0) + { + return (float)(int.MinValue); + } + else if (x == 1) + { + return (float)(int.MaxValue); + } + else + { + return (float)Math.Log(x / (1.0f - x)); + } + } + } +} diff --git a/ZeroLevel.NN/Services/ImagePreprocessor.cs b/ZeroLevel.NN/Services/ImagePreprocessor.cs new file mode 100644 index 0000000..c40e4e5 --- /dev/null +++ b/ZeroLevel.NN/Services/ImagePreprocessor.cs @@ -0,0 +1,253 @@ +using Microsoft.ML.OnnxRuntime.Tensors; +using SixLabors.ImageSharp; +using SixLabors.ImageSharp.PixelFormats; +using SixLabors.ImageSharp.Processing; +using ZeroLevel.NN.Models; + +namespace ZeroLevel.NN +{ + public static class ImagePreprocessor + { + private const float NORMALIZATION_SCALE = 1f / 255f; + + private static Func PixelToTensorMethod(ImagePreprocessorOptions options) + { + if (options.Normalize) + { + if (options.Correction) + { + if (options.CorrectionFunc == null) + { + return new Func((b, i) => ((NORMALIZATION_SCALE * (float)b) - options.Mean[i]) / options.Std[i]); + } + else + { + return new Func((b, i) => options.CorrectionFunc.Invoke(i, NORMALIZATION_SCALE * (float)b)); + } + } + else + { + return new Func((b, i) => NORMALIZATION_SCALE * (float)b); + } + } + else if (options.Correction) + { + if (options.CorrectionFunc == null) + { + return new Func((b, i) => (((float)b) - options.Mean[i]) / options.Std[i]); + } + else + { + return new Func((b, i) => options.CorrectionFunc.Invoke(i, (float)b)); + } + } + return new Func((b, _) => (float)b); + } + + private static int CalculateFragmentsCount(Image image, ImagePreprocessorOptions options) + { + int count = 0; + var xs = options.Crop.Overlap ? (int)(options.Crop.Width * options.Crop.OverlapKoefWidth) : options.Crop.Width; + var ys = options.Crop.Overlap ? (int)(options.Crop.Height * options.Crop.OverlapKoefHeight) : options.Crop.Height; + for (var x = 0; x < image.Width - xs; x += xs) + { + for (var y = 0; y < image.Height - ys; y += ys) + { + count++; + } + } + return count; + } + private static void FillTensor(Tensor tensor, Image image, int index, ImagePreprocessorOptions options, Func pixToTensor) + { + var append = options.ChannelType == PredictorChannelType.ChannelFirst + ? new Action, float, int, int, int, int>((t, v, ind, c, i, j) => { t[ind, c, i, j] = v; }) + : new Action, float, int, int, int, int>((t, v, ind, c, i, j) => { t[ind, i, j, c] = v; }); + + ((Image)image).ProcessPixelRows(pixels => + { + if (options.InvertXY) + { + for (int y = 0; y < pixels.Height; y++) + { + Span pixelSpan = pixels.GetRowSpan(y); + for (int x = 0; x < pixels.Width; x++) + { + if (options.BGR) + { + append(tensor, pixToTensor(pixelSpan[x].B, 0), index, 0, y, x); + append(tensor, pixToTensor(pixelSpan[x].G, 1), index, 1, y, x); + append(tensor, pixToTensor(pixelSpan[x].R, 2), index, 2, y, x); + } + else + { + append(tensor, pixToTensor(pixelSpan[x].R, 0), index, 0, y, x); + append(tensor, pixToTensor(pixelSpan[x].G, 1), index, 1, y, x); + append(tensor, pixToTensor(pixelSpan[x].B, 2), index, 2, y, x); + } + } + } + } + else + { + for (int y = 0; y < pixels.Height; y++) + { + Span pixelSpan = pixels.GetRowSpan(y); + for (int x = 0; x < pixels.Width; x++) + { + if (options.BGR) + { + append(tensor, pixToTensor(pixelSpan[x].B, 0), index, 0, x, y); + append(tensor, pixToTensor(pixelSpan[x].G, 1), index, 1, x, y); + append(tensor, pixToTensor(pixelSpan[x].R, 2), index, 2, x, y); + } + else + { + append(tensor, pixToTensor(pixelSpan[x].R, 0), index, 0, x, y); + append(tensor, pixToTensor(pixelSpan[x].G, 1), index, 1, x, y); + append(tensor, pixToTensor(pixelSpan[x].B, 2), index, 2, x, y); + } + } + } + } + }); + } + + private static Tensor InitInputTensor(ImagePreprocessorOptions options, int batchSize = 1) + { + switch (options.ChannelType) + { + case PredictorChannelType.ChannelFirst: + return options.InvertXY + ? new DenseTensor(new[] { batchSize, options.Channels, options.InputHeight, options.InputWidth }) + : new DenseTensor(new[] { batchSize, options.Channels, options.InputWidth, options.InputHeight }); + default: + return options.InvertXY + ? new DenseTensor(new[] { batchSize, options.InputHeight, options.InputWidth, options.Channels }) + : new DenseTensor(new[] { batchSize, options.InputWidth, options.InputHeight, options.Channels }); + } + } + + public static PredictionInput[] ToTensors(this Image image, ImagePreprocessorOptions options) + { + PredictionInput[] result = null; + var pixToTensor = PixelToTensorMethod(options); + options.Channels = image.PixelType.BitsPerPixel >> 3; + + if (options.Crop.Enabled) + { + var fragments = CalculateFragmentsCount(image, options); + int count = CalculateFragmentsCount(image, options) + (options.Crop.SaveOriginal ? 1 : 0); + int offset = count % options.MaxBatchSize; + int count_tensors = count / options.MaxBatchSize + (offset == 0 ? 0 : 1); + var tensors = new PredictionInput[count_tensors]; + for (int i = 0; i < count_tensors; i++) + { + if (i < count_tensors - 1) + { + tensors[i] = new PredictionInput + { + Tensor = InitInputTensor(options, options.MaxBatchSize), + Offsets = new OffsetBox[options.MaxBatchSize], + Count = options.MaxBatchSize + }; + } + else + { + tensors[i] = new PredictionInput + { + Tensor = InitInputTensor(options, offset == 0 ? options.MaxBatchSize : offset), + Offsets = new OffsetBox[offset == 0 ? options.MaxBatchSize : offset], + Count = offset == 0 ? options.MaxBatchSize : offset + }; + } + } + + int tensor_index = 0; + int tensor_part_index = 0; + var xs = options.Crop.Overlap ? (int)(options.Crop.Width * options.Crop.OverlapKoefWidth) : options.Crop.Width; + var ys = options.Crop.Overlap ? (int)(options.Crop.Height * options.Crop.OverlapKoefHeight) : options.Crop.Height; + + if (options.Crop.SaveOriginal) + { + using (var copy = image.Clone(img => img.Resize(options.InputWidth, options.InputHeight, KnownResamplers.Bicubic))) + { + FillTensor(tensors[tensor_index].Tensor, copy, tensor_part_index, options, pixToTensor); + tensors[tensor_index].Offsets[tensor_part_index] = new OffsetBox(0, 0, image.Width, image.Height); + } + tensor_part_index++; + } + for (var x = 0; x < image.Width - xs; x += xs) + { + var startx = x; + var dx = (x + options.Crop.Width) - image.Width; + if (dx > 0) + { + startx -= dx; + } + for (var y = 0; y < image.Height - ys; y += ys) + { + if (tensor_part_index > 0 && tensor_part_index % options.MaxBatchSize == 0) + { + tensor_index++; + tensor_part_index = 0; + } + var starty = y; + var dy = (y + options.Crop.Height) - image.Height; + if (dy > 0) + { + starty -= dy; + } + using (var copy = image + .Clone(img => img + .Crop(new Rectangle(startx, starty, options.Crop.Width, options.Crop.Height)) + .Resize(options.InputWidth, options.InputHeight, KnownResamplers.Bicubic))) + { + FillTensor(tensors[tensor_index].Tensor, copy, tensor_part_index, options, pixToTensor); + tensors[tensor_index].Offsets[tensor_part_index] = new OffsetBox(startx, starty, options.Crop.Width, options.Crop.Height); + } + tensor_part_index++; + } + } + return tensors; + } + + // if resize only + result = new PredictionInput[1]; + using (var copy = image.Clone(img => img.Resize(options.InputWidth, options.InputHeight, KnownResamplers.Bicubic))) + { + Tensor tensor = InitInputTensor(options); + FillTensor(tensor, copy, 0, options, pixToTensor); + result[0] = new PredictionInput { Count = 1, Offsets = null, Tensor = tensor }; + } + return result; + } + + public static Image Crop(Image source, float x1, float y1, float x2, float y2) + { + int left = 0; + int right = 0; + int top = 0; + int bottom = 0; + + int width = (int)(x2 - x1); + int height = (int)(y2 - y1); + + if (x1 < 0) { left = (int)-x1; x1 = 0; } + if (x2 > source.Width) { right = (int)(x2 - source.Width); x2 = source.Width - 1; } + if (y1 < 0) { top = (int)-y1; y1 = 0; } + if (y2 > source.Height) { bottom = (int)(y2 - source.Height); y2 = source.Height - 1; } + + if (left + right + top + bottom > 0) + { + var backgroundImage = new Image(SixLabors.ImageSharp.Configuration.Default, width, height, new Rgb24(0, 0, 0)); + using (var crop = source.Clone(img => img.Crop(new Rectangle((int)x1, (int)y1, (int)(x2 - x1), (int)(y2 - y1))))) + { + backgroundImage.Mutate(bg => bg.DrawImage(crop, new Point(left, top), 1f)); + } + return backgroundImage; + } + return source.Clone(img => img.Crop(new Rectangle((int)x1, (int)y1, (int)(x2 - x1), (int)(y2 - y1)))); + } + } +} diff --git a/ZeroLevel.NN/Services/SSDNN.cs b/ZeroLevel.NN/Services/SSDNN.cs new file mode 100644 index 0000000..4092f8d --- /dev/null +++ b/ZeroLevel.NN/Services/SSDNN.cs @@ -0,0 +1,66 @@ +using Microsoft.ML.OnnxRuntime; +using Microsoft.ML.OnnxRuntime.Tensors; +using SixLabors.ImageSharp; +using SixLabors.ImageSharp.PixelFormats; +using ZeroLevel.NN.Models; + +namespace ZeroLevel.NN +{ + public abstract class SSDNN + : IDisposable + { + private readonly InferenceSession _session; + + public SSDNN(string path) + { + _session = new InferenceSession(path); + } + + protected void Extract(IDictionary> input, Action>> inputHandler) + { + var container = new List(); + foreach (var pair in input) + { + container.Add(NamedOnnxValue.CreateFromTensor(pair.Key, pair.Value)); + } + using (var output = _session.Run(container)) + { + var result = new Dictionary>(); + foreach (var o in output) + { + result.Add(o.Name, o.AsTensor()); + } + inputHandler.Invoke(result); + } + } + + /// + /// Scale input vectors individually to unit norm (vector length). + /// + protected void Norm(float[] vector) + { + var totalSum = vector.Sum(v => v * v); + var length = (float)Math.Sqrt(totalSum); + var inverseLength = 1.0f / length; + for (int i = 0; i < vector.Length; i++) + { + vector[i] *= inverseLength; + } + } + protected PredictionInput[] MakeInputBatch(Image image, ImagePreprocessorOptions options) + { + return ImagePreprocessor.ToTensors(image, options); + } + + protected Tensor MakeInput(Image image, ImagePreprocessorOptions options) + { + var input = ImagePreprocessor.ToTensors(image, options); + return input[0].Tensor; + } + + public void Dispose() + { + _session?.Dispose(); + } + } +} diff --git a/ZeroLevel.NN/ZeroLevel.NN.csproj b/ZeroLevel.NN/ZeroLevel.NN.csproj new file mode 100644 index 0000000..fb7dea4 --- /dev/null +++ b/ZeroLevel.NN/ZeroLevel.NN.csproj @@ -0,0 +1,20 @@ + + + + net6.0 + enable + enable + + + + + + + + + + + + + + diff --git a/ZeroLevel.sln b/ZeroLevel.sln index 2b2fe77..8d14039 100644 --- a/ZeroLevel.sln +++ b/ZeroLevel.sln @@ -51,17 +51,19 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "HNSWDemo", "TestHNSW\HNSWDe EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ZeroNetworkMonitor", "ZeroNetworkMonitor\ZeroNetworkMonitor.csproj", "{B89249F8-BD37-4AF7-9BB5-65855FA3B3FA}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FileTransferClient", "FileTransferTest\FileTransferClient\FileTransferClient.csproj", "{54BB2BA9-DAC0-4162-8BC0-E4A9B898CBB0}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FileTransferClient", "FileTransferTest\FileTransferClient\FileTransferClient.csproj", "{54BB2BA9-DAC0-4162-8BC0-E4A9B898CBB0}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FileTransferServer", "FileTransferTest\FileTransferServer\FileTransferServer.csproj", "{3D0FE0BA-F7B1-4A63-BBA4-C96514A68426}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FileTransferServer", "FileTransferTest\FileTransferServer\FileTransferServer.csproj", "{3D0FE0BA-F7B1-4A63-BBA4-C96514A68426}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Consumer", "TestPipeLine\Consumer\Consumer.csproj", "{1C609DF6-A6B2-453B-9096-D7FD2B29A00E}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Consumer", "TestPipeLine\Consumer\Consumer.csproj", "{1C609DF6-A6B2-453B-9096-D7FD2B29A00E}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Processor", "TestPipeLine\Processor\Processor.csproj", "{5CCFF557-C91F-4DD7-9530-D76FE517DA98}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Processor", "TestPipeLine\Processor\Processor.csproj", "{5CCFF557-C91F-4DD7-9530-D76FE517DA98}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Source", "TestPipeLine\Source\Source.csproj", "{82202433-6426-4737-BAB2-473AC1F74C5D}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Source", "TestPipeLine\Source\Source.csproj", "{82202433-6426-4737-BAB2-473AC1F74C5D}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Watcher", "TestPipeLine\Watcher\Watcher.csproj", "{F70842E7-9A1D-4CC4-9F55-0953AEC9C7C8}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Watcher", "TestPipeLine\Watcher\Watcher.csproj", "{F70842E7-9A1D-4CC4-9F55-0953AEC9C7C8}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ZeroLevel.NN", "ZeroLevel.NN\ZeroLevel.NN.csproj", "{C67E5F2E-B62E-441D-99F5-8ECA6CECE804}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -313,6 +315,18 @@ Global {F70842E7-9A1D-4CC4-9F55-0953AEC9C7C8}.Release|x64.Build.0 = Release|x64 {F70842E7-9A1D-4CC4-9F55-0953AEC9C7C8}.Release|x86.ActiveCfg = Release|Any CPU {F70842E7-9A1D-4CC4-9F55-0953AEC9C7C8}.Release|x86.Build.0 = Release|Any CPU + {C67E5F2E-B62E-441D-99F5-8ECA6CECE804}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C67E5F2E-B62E-441D-99F5-8ECA6CECE804}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C67E5F2E-B62E-441D-99F5-8ECA6CECE804}.Debug|x64.ActiveCfg = Debug|Any CPU + {C67E5F2E-B62E-441D-99F5-8ECA6CECE804}.Debug|x64.Build.0 = Debug|Any CPU + {C67E5F2E-B62E-441D-99F5-8ECA6CECE804}.Debug|x86.ActiveCfg = Debug|Any CPU + {C67E5F2E-B62E-441D-99F5-8ECA6CECE804}.Debug|x86.Build.0 = Debug|Any CPU + {C67E5F2E-B62E-441D-99F5-8ECA6CECE804}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C67E5F2E-B62E-441D-99F5-8ECA6CECE804}.Release|Any CPU.Build.0 = Release|Any CPU + {C67E5F2E-B62E-441D-99F5-8ECA6CECE804}.Release|x64.ActiveCfg = Release|Any CPU + {C67E5F2E-B62E-441D-99F5-8ECA6CECE804}.Release|x64.Build.0 = Release|Any CPU + {C67E5F2E-B62E-441D-99F5-8ECA6CECE804}.Release|x86.ActiveCfg = Release|Any CPU + {C67E5F2E-B62E-441D-99F5-8ECA6CECE804}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/ZeroLevel.HNSW/Utils/Metrics.cs b/ZeroLevel/Services/Mathemathics/Metrics.cs similarity index 70% rename from ZeroLevel.HNSW/Utils/Metrics.cs rename to ZeroLevel/Services/Mathemathics/Metrics.cs index 94d895b..e63c2b2 100644 --- a/ZeroLevel.HNSW/Utils/Metrics.cs +++ b/ZeroLevel/Services/Mathemathics/Metrics.cs @@ -1,10 +1,80 @@ using System; -using System.Linq; -namespace ZeroLevel.HNSW +namespace ZeroLevel.Services.Mathemathics { + public enum KnownMetrics + { + Cosine, Manhattanm, Euclide, Chebyshev + } + + public static class Metrics { + public static Func CreateFloat(KnownMetrics metric) + { + switch (metric) + { + case KnownMetrics.Euclide: + return new Func((u, v) => L2EuclideanDistance(u, v)); + case KnownMetrics.Cosine: + return new Func((u, v) => CosineDistance(u, v)); + case KnownMetrics.Chebyshev: + return new Func((u, v) => ChebyshevDistance(u, v)); + case KnownMetrics.Manhattanm: + return new Func((u, v) => L1ManhattanDistance(u, v)); + } + throw new Exception($"Metric '{metric.ToString()}' not supported for Float type"); + } + + public static Func CreateByte(KnownMetrics metric) + { + switch (metric) + { + case KnownMetrics.Euclide: + return new Func((u, v) => L2EuclideanDistance(u, v)); + case KnownMetrics.Cosine: + return new Func((u, v) => CosineDistance(u, v)); + case KnownMetrics.Chebyshev: + return new Func((u, v) => ChebyshevDistance(u, v)); + case KnownMetrics.Manhattanm: + return new Func((u, v) => L1ManhattanDistance + (u, v)); + } + throw new Exception($"Metric '{metric.ToString()}' not supported for Byte type"); + } + + public static Func CreateLong(KnownMetrics metric) + { + switch (metric) + { + case KnownMetrics.Euclide: + return new Func((u, v) => L2EuclideanDistance(u, v)); + case KnownMetrics.Cosine: + return new Func((u, v) => CosineDistance(u, v)); + case KnownMetrics.Chebyshev: + return new Func((u, v) => ChebyshevDistance(u, v)); + case KnownMetrics.Manhattanm: + return new Func((u, v) => L1ManhattanDistance(u, v)); + } + throw new Exception($"Metric '{metric.ToString()}' not supported for Long type"); + } + + public static Func CreateInt(KnownMetrics metric) + { + switch (metric) + { + case KnownMetrics.Euclide: + return new Func((u, v) => L2EuclideanDistance(u, v)); + case KnownMetrics.Cosine: + return new Func((u, v) => CosineDistance(u, v)); + case KnownMetrics.Chebyshev: + return new Func((u, v) => ChebyshevDistance(u, v)); + case KnownMetrics.Manhattanm: + return new Func((u, v) => L1ManhattanDistance(u, v)); + } + throw new Exception($"Metric '{metric.ToString()}' not supported for Int type"); + } + /// /// The taxicab metric is also known as rectilinear distance, /// L1 distance or L1 norm, city block distance, Manhattan distance, @@ -12,7 +82,7 @@ namespace ZeroLevel.HNSW /// It represents the distance between points in a city road grid. /// It examines the absolute differences between the coordinates of a pair of objects. /// - public static float L1Manhattan(float[] v1, float[] v2) + public static float L1ManhattanDistance(float[] v1, float[] v2) { float res = 0; for (int i = 0; i < v1.Length; i++) @@ -23,7 +93,7 @@ namespace ZeroLevel.HNSW return (res); } - public static float L1Manhattan(byte[] v1, byte[] v2) + public static float L1ManhattanDistance(byte[] v1, byte[] v2) { float res = 0; for (int i = 0; i < v1.Length; i++) @@ -34,7 +104,7 @@ namespace ZeroLevel.HNSW return (res); } - public static float L1Manhattan(int[] v1, int[] v2) + public static float L1ManhattanDistance(int[] v1, int[] v2) { float res = 0; for (int i = 0; i < v1.Length; i++) @@ -45,7 +115,7 @@ namespace ZeroLevel.HNSW return (res); } - public static float L1Manhattan(long[] v1, long[] v2) + public static float L1ManhattanDistance(long[] v1, long[] v2) { float res = 0; for (int i = 0; i < v1.Length; i++) @@ -62,7 +132,7 @@ namespace ZeroLevel.HNSW /// examines the root of square differences between the coordinates of a pair of objects. /// This is most generally known as the Pythagorean theorem. /// - public static float L2Euclidean(float[] v1, float[] v2) + public static float L2EuclideanDistance(float[] v1, float[] v2) { float res = 0; for (int i = 0; i < v1.Length; i++) @@ -73,7 +143,7 @@ namespace ZeroLevel.HNSW return (float)Math.Sqrt(res); } - public static float L2Euclidean(byte[] v1, byte[] v2) + public static float L2EuclideanDistance(byte[] v1, byte[] v2) { float res = 0; for (int i = 0; i < v1.Length; i++) @@ -84,7 +154,7 @@ namespace ZeroLevel.HNSW return (float)Math.Sqrt(res); } - public static float L2Euclidean(int[] v1, int[] v2) + public static float L2EuclideanDistance(int[] v1, int[] v2) { float res = 0; for (int i = 0; i < v1.Length; i++) @@ -95,7 +165,7 @@ namespace ZeroLevel.HNSW return (float)Math.Sqrt(res); } - public static float L2Euclidean(long[] v1, long[] v2) + public static float L2EuclideanDistance(long[] v1, long[] v2) { float res = 0; for (int i = 0; i < v1.Length; i++) @@ -228,7 +298,7 @@ namespace ZeroLevel.HNSW return max; } - public static float Cosine(float[] u, float[] v) + public static float CosineDistance(float[] u, float[] v) { if (u.Length != v.Length) { @@ -249,7 +319,7 @@ namespace ZeroLevel.HNSW return 1 - similarity; } - public static float Cosine(byte[] u, byte[] v) + public static float CosineDistance(byte[] u, byte[] v) { if (u.Length != v.Length) { @@ -270,7 +340,7 @@ namespace ZeroLevel.HNSW return 1 - similarity; } - public static float Cosine(int[] u, int[] v) + public static float CosineDistance(int[] u, int[] v) { if (u.Length != v.Length) { @@ -309,7 +379,7 @@ namespace ZeroLevel.HNSW return 1 - similarity; } - public static float Cosine(long[] u, long[] v) + public static float CosineDistance(long[] u, long[] v) { if (u.Length != v.Length) { @@ -363,5 +433,13 @@ namespace ZeroLevel.HNSW var similarity = dot / (float)(Math.Sqrt(nru) * Math.Sqrt(nrv)); return 1 - similarity; } + + public static float CosineClipped(float[] u, float[] v, float min, float max) + { + var similarity = CosineDistance(u, v); + if (min > similarity) similarity = min; + if (max < similarity) similarity = max; + return similarity; + } } }