using System; using System.Collections.Generic; namespace ZeroLevel.Services.Semantic.Helpers { public static class BoyerMoore { /// /// Makes the bad char table. Contains the distance between the last character of the pattern and the rightmost occurrence of the character. /// private static Dictionary BuildBadCharacterTable(string pattern) { var badCharTable = new Dictionary(); int patLength = pattern.Length; for (int i = 0; i < patLength - 1; i++) { badCharTable[pattern[i]] = patLength - 1 - i; } return badCharTable; } /// /// Searches for the first occurrence of a pattern in a target using Boyer-Moore's algorithm. /// /// The to search in. /// The to search for. /// Returns the position of the first occurrence of the pattern. If not found returns -1. public static int BoyerMooreSearchFirst(string target, string pattern) { if (target == null) throw new ArgumentNullException(nameof(target)); if (pattern == null) throw new ArgumentNullException(nameof(pattern)); // Build tables var badCharTable = BuildBadCharacterTable(pattern); // Faster access int patternLength = pattern.Length; int targetLength = target.Length; int endOfSearch = targetLength - patternLength; int i = 0; while (i <= endOfSearch) { // Start mathing int j = patternLength - 1; while (j >= 0 && target[i + j] == pattern[j]) { j--; } if (j < 0) return i; // found a match // If we didn't find a match advance to next position int badChar = badCharTable.ContainsKey(target[i + j]) ? badCharTable[target[i + j]] : 0; int offset = badChar - patternLength + 1 + j; i += 1 < offset ? offset : 1; } // We haven't found anything return -1; } /// /// Searches for all occurences of a pattern in a target using Boyer-Moore's algorithm. /// /// The to search in. /// The to search for. /// Returns of values of the positions at which the pattern occurs. is empty if none found. public static IList BoyerMooreSearchAll(string target, string pattern) { if (target == null) throw new ArgumentNullException(nameof(target)); if (pattern == null) throw new ArgumentNullException(nameof(pattern)); // List with matches var matches = new List(); // Build tables var badCharTable = BuildBadCharacterTable(pattern); // Faster access int patternLength = pattern.Length; int targetLength = target.Length; int endOfSearch = targetLength - patternLength; int i = 0; while (i <= endOfSearch) { int j = patternLength - 1; while (j >= 0 && target[i + j] == pattern[j]) { j--; } if (j < 0) { matches.Add(i); // found a match // Compute next position to start matching again if (i + patternLength < targetLength) { int badChar = badCharTable.ContainsKey(target[i + patternLength]) ? badCharTable[target[i + patternLength]] : 0; i += badChar + 1; } else i++; } else { // If we didn't find a match advance to next position int badChar = badCharTable.ContainsKey(target[i + j]) ? badCharTable[target[i + j]] : 0; int offset = badChar - patternLength + 1 + j; i += 1 < offset ? offset : 1; } } return matches; } /// /// Searches for the first occurrence of multiple patterns in a target using Boyer-Moore's algorithm. /// /// The to search in. /// A of patterns. /// Retruns with keys of the patterns and values of the position of first occurence. /// If a pattern is not found there is no entry in the dictionary. public static Dictionary BoyerMooreMultipleSearchFirst(string target, IList patterns) { if (target == null) throw new ArgumentNullException(nameof(target)); if (patterns == null) throw new ArgumentNullException(nameof(patterns)); // Dictionary with matches var matches = new Dictionary(); for (int i = 0; i < patterns.Count; i++) { int postition = BoyerMooreSearchFirst(target, patterns[i]); if (postition > -1) matches.Add(patterns[i], postition); } return matches; } /// /// Searches for all occurrences of multiple patterns in a target using Boyer-Moore's algorithm. /// /// The to search in. /// A of patterns. /// Retruns with keys of the patterns and of values of the positions at which the pattern occurs. /// If a pattern is not found there is no entry in the dictionary. public static Dictionary> BoyerMooreMultipleSearchAll(string target, IList patterns) { if (target == null) throw new ArgumentNullException(nameof(target)); if (patterns == null) throw new ArgumentNullException(nameof(patterns)); // Dictionary with matches var matches = new Dictionary>(); for (int i = 0; i < patterns.Count; i++) { var postitions = new List(BoyerMooreSearchAll(target, patterns[i])); if (postitions.Count > 0) matches.Add(patterns[i], postitions); } return matches; } } }