using System; using System.Collections; using System.IO; using System.Runtime.CompilerServices; using ZeroLevel.Services.HashFunctions; namespace ZeroLevel.DataStructures { public class BloomFilter { #region Private private struct HIND { public int IndexFirst; public int IndexSecond; public int IndexThird; public int IndexReverse; } private BitArray _primary; private BitArray _second; private BitArray _third; private BitArray _reverse; #endregion public BloomFilter(int bit_size) { var diff = bit_size % 8; if (diff != 0) { bit_size += diff; } _primary = new BitArray(bit_size); _second = new BitArray(bit_size); _third = new BitArray(bit_size); _reverse = new BitArray(bit_size); } private HIND Compute(string line) { var r = Reverse(line); var first = HashMM(line) ^ StringHash.DotNetFullHash(line); var second = HashXX(line) ^ StringHash.DotNetFullHash(r); var third = HashMM(r) ^ StringHash.CustomHash(line); var reverse = HashXX(r) ^ StringHash.CustomHash2(r); var hind = new HIND { IndexFirst = (int)(first % _primary.Length), IndexSecond = (int)(second % _second.Length), IndexThird = (int)(third % _third.Length), IndexReverse = (int)(reverse % _reverse.Length) }; return hind; } private BloomFilter() { } public void Add(string item) { if (item == null || item.Length == 0) return; var hind = Compute(item); Add(hind); } public bool Contains(string item) { if (item == null || item.Length == 0) return true; var hind = Compute(item); return Contains(hind); } /// /// true if added, false if already exists /// public bool TryAdd(string item) { if (item == null || item.Length == 0) return false; var hind = Compute(item); if (Contains(hind)) { return false; } Add(hind); return true; } public static string Reverse(string s) { char[] charArray = s.ToCharArray(); Array.Reverse(charArray); return new string(charArray); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private void Add(HIND hind) { _primary[hind.IndexFirst] = true; _second[hind.IndexSecond] = true; _third[hind.IndexThird] = true; _reverse[hind.IndexReverse] = true; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool Contains(HIND hind) { if (!_primary[hind.IndexFirst]) return false; if (!_second[hind.IndexSecond]) return false; if (!_third[hind.IndexThird]) return false; if (!_reverse[hind.IndexReverse]) return false; return true; } private readonly XXHashUnsafe _hash_xx_32 = new XXHashUnsafe(); [MethodImpl(MethodImplOptions.AggressiveInlining)] private uint HashXX(string line) { return _hash_xx_32.Hash(line); } private readonly Murmur3Unsafe _hash_mm_32 = new Murmur3Unsafe(); [MethodImpl(MethodImplOptions.AggressiveInlining)] private uint HashMM(string line) { return _hash_mm_32.Hash(line); } public bool IsEqual(BloomFilter other) { if (Equals(this._primary, other._primary) == false) return false; if (Equals(this._second, other._second) == false) return false; if (Equals(this._third, other._third) == false) return false; if (Equals(this._reverse, other._reverse) == false) return false; return true; } public bool Equals(BitArray first, BitArray second) { if (first.Length != second.Length) { return false; } for (int i = 0; i < first.Length; i++) { if (first[i] != second[i]) { return false; } } return true; } public byte[] Dump() { var add = new Action((ms, arr) => { int tc = arr.Length / 8; ms.Write(BitConverter.GetBytes(tc), 0, 4); byte[] t = new byte[tc]; arr.CopyTo(t, 0); ms.Write(t, 0, tc); }); using (var ms = new MemoryStream()) { add(ms, _primary); add(ms, _second); add(ms, _third); add(ms, _reverse); return ms.ToArray(); } } public static BloomFilter Load(byte[] data) { var bf = new BloomFilter(); byte[] sizeArr = new byte[4]; var readArray = new Func(stream => { stream.Read(sizeArr, 0, 4); int count = BitConverter.ToInt32(sizeArr, 0); byte[] bfData = new byte[count]; stream.Read(bfData, 0, count); return new BitArray(bfData); }); using (var ms = new MemoryStream(data)) { bf._primary = readArray(ms); bf._second = readArray(ms); bf._third = readArray(ms); bf._reverse = readArray(ms); } return bf; } } }