using System; using System.Collections.Generic; namespace ZeroLevel.Services.Semantic.Helpers { public static class KnuthMorrisPratt { private static int[] BuildKMPTable(string pattern) { var kmpTable = new int[pattern.Length]; if (kmpTable.Length < 2) { if (kmpTable.Length > 0) kmpTable[0] = -1; return kmpTable; } int tableIndex = 2; // current position in table for computation int patSubstrIndex = 0; // index in the pattern of the current substring // First two values are fixed -1 and 0 kmpTable[0] = -1; // Build table while (tableIndex < kmpTable.Length) { // If the substring continues if (pattern[tableIndex - 1] == pattern[patSubstrIndex]) { kmpTable[tableIndex++] = ++patSubstrIndex; } // It does not but we can fall back else if (patSubstrIndex != 0) { patSubstrIndex = kmpTable[patSubstrIndex]; } // If we ran out of candidates else { kmpTable[tableIndex++] = 0; } } return kmpTable; } /// <summary> /// Searches for the first occurrence of a pattern in a target <see cref="string"/> using Knuth–Morris–Pratt's algorithm. /// </summary> /// <param name="target">The <see cref="string"/> to search in.</param> /// <param name="pattern">The <see cref="string"/> to search for.</param> /// <returns>Returns the position of the first occurrence of the pattern. If not found returns -1.</returns> public static int KnuthMorrisPrattSearchFirst(string target, string pattern) { if (target == null) throw new ArgumentNullException(nameof(target)); if (pattern == null) throw new ArgumentNullException(nameof(pattern)); // Build KMP table var kmpTable = BuildKMPTable(pattern); int matchIndex = 0; // position of the current match int patternIndex = 0; // position in the pattern // Save for faster access int targetLength = target.Length; int patternLength = pattern.Length; while (matchIndex + patternIndex < targetLength) { if (pattern[patternIndex] == target[matchIndex + patternIndex]) { patternIndex++; if (patternIndex == patternLength) return matchIndex; } else // we are not in the middle of a pattern { // if we can backtrack if (kmpTable[patternIndex] > -1) { matchIndex = matchIndex + patternIndex - kmpTable[patternIndex]; patternIndex = kmpTable[patternIndex]; } else // we can't backtrack (the beginning of the word) { matchIndex++; patternIndex = 0; } } } // We haven't found anything return -1; } /// <summary> /// Searches for all occurences of a pattern in a target <see cref="string"/> using Knuth–Morris–Pratt's algorithm. /// </summary> /// <param name="target">The <see cref="string"/> to search in.</param> /// <param name="pattern">The <see cref="string"/> to search for.</param> /// <returns>Returns <see cref="IList{T}"/> of <see cref="int"/> values of the positions at which the pattern occurs. <see cref="IList{T}"/> is empty if none found.</returns> public static IList<int> KnuthMorrisPrattSearchAll(string target, string pattern) { if (target == null) throw new ArgumentNullException(nameof(target)); if (pattern == null) throw new ArgumentNullException(nameof(pattern)); // List with matches var matches = new List<int>(); // Build KMP table var kmpTable = BuildKMPTable(pattern); int matchIndex = 0; // position of the current match int patternIndex = 0; // position in the pattern // Save for faster access int targetLength = target.Length; int patternLength = pattern.Length; while (matchIndex + patternIndex < targetLength) { if (pattern[patternIndex] == target[matchIndex + patternIndex]) { patternIndex++; if (patternIndex == patternLength) { matches.Add(matchIndex); // Find where the next match will begin patternIndex--; // if we can backtrack if (kmpTable[patternIndex] > -1) { matchIndex = matchIndex + patternIndex - kmpTable[patternIndex]; patternIndex = kmpTable[patternIndex]; } else // we can't backtrack (the beginning of the word) { matchIndex++; patternIndex = 0; } } } else // we are not in the middle of a pattern { // if we can backtrack if (kmpTable[patternIndex] > -1) { matchIndex = matchIndex + patternIndex - kmpTable[patternIndex]; patternIndex = kmpTable[patternIndex]; } else // we can't backtrack (the beginning of the word) { matchIndex++; patternIndex = 0; } } } // We haven't found anything return matches; } /// <summary> /// Searches for the first occurrence of multiple patterns in a target <see cref="string"/> using Knuth–Morris–Pratt's algorithm. /// </summary> /// <param name="target">The <see cref="string"/> to search in.</param> /// <param name="patterns">A <see cref="IList{T}"/> of <see cref="string"/> patterns.</param> /// <returns>Retruns <see cref="Dictionary{TKey, TValue}"/> with <see cref="string"/> keys of the patterns and <see cref="int"/> values of the position of first occurence. /// If a pattern is not found there is no entry in the dictionary.</returns> public static Dictionary<string, int> KnuthMorrisPrattMultipleSearchFirst(string target, IList<string> patterns) { if (target == null) throw new ArgumentNullException(nameof(target)); if (patterns == null) throw new ArgumentNullException(nameof(patterns)); // Dictionary with matches var matches = new Dictionary<string, int>(); for (int i = 0; i < patterns.Count; i++) { int postition = KnuthMorrisPrattSearchFirst(target, patterns[i]); if (postition > -1) matches.Add(patterns[i], postition); } return matches; } /// <summary> /// Searches for all occurrences of multiple patterns in a target <see cref="string"/> using Knuth–Morris–Pratt's algorithm. /// </summary> /// <param name="target">The <see cref="string"/> to search in.</param> /// <param name="patterns">A <see cref="IList{T}"/> of <see cref="string"/> patterns.</param> /// <returns>Retruns <see cref="Dictionary{TKey, TValue}"/> with <see cref="string"/> keys of the patterns and <see cref="List{T}"/> of <see cref="int"/> values of the positions at which the pattern occurs. /// If a pattern is not found there is no entry in the dictionary.</returns> public static Dictionary<string, List<int>> KnuthMorrisPrattMultipleSearchAll(string target, IList<string> patterns) { if (target == null) throw new ArgumentNullException(nameof(target)); if (patterns == null) throw new ArgumentNullException(nameof(patterns)); // Dictionary with matches var matches = new Dictionary<string, List<int>>(); for (int i = 0; i < patterns.Count; i++) { var postitions = new List<int>(KnuthMorrisPrattSearchAll(target, patterns[i])); if (postitions.Count > 0) matches.Add(patterns[i], postitions); } return matches; } } }