using System; using System.Collections.Generic; namespace ZeroLevel.Services.Semantic.Helpers { public static class KnuthMorrisPratt { private static int[] BuildKMPTable(string pattern) { var kmpTable = new int[pattern.Length]; if (kmpTable.Length < 2) { if (kmpTable.Length > 0) kmpTable[0] = -1; return kmpTable; } int tableIndex = 2; // current position in table for computation int patSubstrIndex = 0; // index in the pattern of the current substring // First two values are fixed -1 and 0 kmpTable[0] = -1; // Build table while (tableIndex < kmpTable.Length) { // If the substring continues if (pattern[tableIndex - 1] == pattern[patSubstrIndex]) { kmpTable[tableIndex++] = ++patSubstrIndex; } // It does not but we can fall back else if (patSubstrIndex != 0) { patSubstrIndex = kmpTable[patSubstrIndex]; } // If we ran out of candidates else { kmpTable[tableIndex++] = 0; } } return kmpTable; } /// /// Searches for the first occurrence of a pattern in a target using Knuth–Morris–Pratt's algorithm. /// /// The to search in. /// The to search for. /// Returns the position of the first occurrence of the pattern. If not found returns -1. public static int KnuthMorrisPrattSearchFirst(string target, string pattern) { if (target == null) throw new ArgumentNullException(nameof(target)); if (pattern == null) throw new ArgumentNullException(nameof(pattern)); // Build KMP table var kmpTable = BuildKMPTable(pattern); int matchIndex = 0; // position of the current match int patternIndex = 0; // position in the pattern // Save for faster access int targetLength = target.Length; int patternLength = pattern.Length; while (matchIndex + patternIndex < targetLength) { if (pattern[patternIndex] == target[matchIndex + patternIndex]) { patternIndex++; if (patternIndex == patternLength) return matchIndex; } else // we are not in the middle of a pattern { // if we can backtrack if (kmpTable[patternIndex] > -1) { matchIndex = matchIndex + patternIndex - kmpTable[patternIndex]; patternIndex = kmpTable[patternIndex]; } else // we can't backtrack (the beginning of the word) { matchIndex++; patternIndex = 0; } } } // We haven't found anything return -1; } /// /// Searches for all occurences of a pattern in a target using Knuth–Morris–Pratt's algorithm. /// /// The to search in. /// The to search for. /// Returns of values of the positions at which the pattern occurs. is empty if none found. public static IList KnuthMorrisPrattSearchAll(string target, string pattern) { if (target == null) throw new ArgumentNullException(nameof(target)); if (pattern == null) throw new ArgumentNullException(nameof(pattern)); // List with matches var matches = new List(); // Build KMP table var kmpTable = BuildKMPTable(pattern); int matchIndex = 0; // position of the current match int patternIndex = 0; // position in the pattern // Save for faster access int targetLength = target.Length; int patternLength = pattern.Length; while (matchIndex + patternIndex < targetLength) { if (pattern[patternIndex] == target[matchIndex + patternIndex]) { patternIndex++; if (patternIndex == patternLength) { matches.Add(matchIndex); // Find where the next match will begin patternIndex--; // if we can backtrack if (kmpTable[patternIndex] > -1) { matchIndex = matchIndex + patternIndex - kmpTable[patternIndex]; patternIndex = kmpTable[patternIndex]; } else // we can't backtrack (the beginning of the word) { matchIndex++; patternIndex = 0; } } } else // we are not in the middle of a pattern { // if we can backtrack if (kmpTable[patternIndex] > -1) { matchIndex = matchIndex + patternIndex - kmpTable[patternIndex]; patternIndex = kmpTable[patternIndex]; } else // we can't backtrack (the beginning of the word) { matchIndex++; patternIndex = 0; } } } // We haven't found anything return matches; } /// /// Searches for the first occurrence of multiple patterns in a target using Knuth–Morris–Pratt's algorithm. /// /// The to search in. /// A of patterns. /// Retruns with keys of the patterns and values of the position of first occurence. /// If a pattern is not found there is no entry in the dictionary. public static Dictionary KnuthMorrisPrattMultipleSearchFirst(string target, IList patterns) { if (target == null) throw new ArgumentNullException(nameof(target)); if (patterns == null) throw new ArgumentNullException(nameof(patterns)); // Dictionary with matches var matches = new Dictionary(); for (int i = 0; i < patterns.Count; i++) { int postition = KnuthMorrisPrattSearchFirst(target, patterns[i]); if (postition > -1) matches.Add(patterns[i], postition); } return matches; } /// /// Searches for all occurrences of multiple patterns in a target using Knuth–Morris–Pratt's algorithm. /// /// The to search in. /// A of patterns. /// Retruns with keys of the patterns and of values of the positions at which the pattern occurs. /// If a pattern is not found there is no entry in the dictionary. public static Dictionary> KnuthMorrisPrattMultipleSearchAll(string target, IList patterns) { if (target == null) throw new ArgumentNullException(nameof(target)); if (patterns == null) throw new ArgumentNullException(nameof(patterns)); // Dictionary with matches var matches = new Dictionary>(); for (int i = 0; i < patterns.Count; i++) { var postitions = new List(KnuthMorrisPrattSearchAll(target, patterns[i])); if (postitions.Count > 0) matches.Add(patterns[i], postitions); } return matches; } } }