using System;
using System.Collections.Generic;
namespace ZeroLevel.Services.Semantic.Helpers
{
public static class KnuthMorrisPratt
{
private static int[] BuildKMPTable(string pattern)
{
var kmpTable = new int[pattern.Length];
if (kmpTable.Length < 2)
{
if (kmpTable.Length > 0)
kmpTable[0] = -1;
return kmpTable;
}
int tableIndex = 2; // current position in table for computation
int patSubstrIndex = 0; // index in the pattern of the current substring
// First two values are fixed -1 and 0
kmpTable[0] = -1;
// Build table
while (tableIndex < kmpTable.Length)
{
// If the substring continues
if (pattern[tableIndex - 1] == pattern[patSubstrIndex])
{
kmpTable[tableIndex++] = ++patSubstrIndex;
}
// It does not but we can fall back
else if (patSubstrIndex != 0)
{
patSubstrIndex = kmpTable[patSubstrIndex];
}
// If we ran out of candidates
else
{
kmpTable[tableIndex++] = 0;
}
}
return kmpTable;
}
///
/// Searches for the first occurrence of a pattern in a target using Knuth–Morris–Pratt's algorithm.
///
/// The to search in.
/// The to search for.
/// Returns the position of the first occurrence of the pattern. If not found returns -1.
public static int KnuthMorrisPrattSearchFirst(string target, string pattern)
{
if (target == null) throw new ArgumentNullException(nameof(target));
if (pattern == null) throw new ArgumentNullException(nameof(pattern));
// Build KMP table
var kmpTable = BuildKMPTable(pattern);
int matchIndex = 0; // position of the current match
int patternIndex = 0; // position in the pattern
// Save for faster access
int targetLength = target.Length;
int patternLength = pattern.Length;
while (matchIndex + patternIndex < targetLength)
{
if (pattern[patternIndex] == target[matchIndex + patternIndex])
{
patternIndex++;
if (patternIndex == patternLength)
return matchIndex;
}
else // we are not in the middle of a pattern
{
// if we can backtrack
if (kmpTable[patternIndex] > -1)
{
matchIndex = matchIndex + patternIndex - kmpTable[patternIndex];
patternIndex = kmpTable[patternIndex];
}
else // we can't backtrack (the beginning of the word)
{
matchIndex++;
patternIndex = 0;
}
}
}
// We haven't found anything
return -1;
}
///
/// Searches for all occurences of a pattern in a target using Knuth–Morris–Pratt's algorithm.
///
/// The to search in.
/// The to search for.
/// Returns of values of the positions at which the pattern occurs. is empty if none found.
public static IList KnuthMorrisPrattSearchAll(string target, string pattern)
{
if (target == null) throw new ArgumentNullException(nameof(target));
if (pattern == null) throw new ArgumentNullException(nameof(pattern));
// List with matches
var matches = new List();
// Build KMP table
var kmpTable = BuildKMPTable(pattern);
int matchIndex = 0; // position of the current match
int patternIndex = 0; // position in the pattern
// Save for faster access
int targetLength = target.Length;
int patternLength = pattern.Length;
while (matchIndex + patternIndex < targetLength)
{
if (pattern[patternIndex] == target[matchIndex + patternIndex])
{
patternIndex++;
if (patternIndex == patternLength)
{
matches.Add(matchIndex);
// Find where the next match will begin
patternIndex--;
// if we can backtrack
if (kmpTable[patternIndex] > -1)
{
matchIndex = matchIndex + patternIndex - kmpTable[patternIndex];
patternIndex = kmpTable[patternIndex];
}
else // we can't backtrack (the beginning of the word)
{
matchIndex++;
patternIndex = 0;
}
}
}
else // we are not in the middle of a pattern
{
// if we can backtrack
if (kmpTable[patternIndex] > -1)
{
matchIndex = matchIndex + patternIndex - kmpTable[patternIndex];
patternIndex = kmpTable[patternIndex];
}
else // we can't backtrack (the beginning of the word)
{
matchIndex++;
patternIndex = 0;
}
}
}
// We haven't found anything
return matches;
}
///
/// Searches for the first occurrence of multiple patterns in a target using Knuth–Morris–Pratt's algorithm.
///
/// The to search in.
/// A of patterns.
/// Retruns with keys of the patterns and values of the position of first occurence.
/// If a pattern is not found there is no entry in the dictionary.
public static Dictionary KnuthMorrisPrattMultipleSearchFirst(string target, IList patterns)
{
if (target == null) throw new ArgumentNullException(nameof(target));
if (patterns == null) throw new ArgumentNullException(nameof(patterns));
// Dictionary with matches
var matches = new Dictionary();
for (int i = 0; i < patterns.Count; i++)
{
int postition = KnuthMorrisPrattSearchFirst(target, patterns[i]);
if (postition > -1)
matches.Add(patterns[i], postition);
}
return matches;
}
///
/// Searches for all occurrences of multiple patterns in a target using Knuth–Morris–Pratt's algorithm.
///
/// The to search in.
/// A of patterns.
/// Retruns with keys of the patterns and of values of the positions at which the pattern occurs.
/// If a pattern is not found there is no entry in the dictionary.
public static Dictionary> KnuthMorrisPrattMultipleSearchAll(string target, IList patterns)
{
if (target == null) throw new ArgumentNullException(nameof(target));
if (patterns == null) throw new ArgumentNullException(nameof(patterns));
// Dictionary with matches
var matches = new Dictionary>();
for (int i = 0; i < patterns.Count; i++)
{
var postitions = new List(KnuthMorrisPrattSearchAll(target, patterns[i]));
if (postitions.Count > 0)
matches.Add(patterns[i], postitions);
}
return matches;
}
}
}