You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Zero/ZeroLevel/Services/Semantic/Search/KnuthMorrisPratt.cs

216 lines
8.9 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

using System;
using System.Collections.Generic;
namespace ZeroLevel.Services.Semantic.Helpers
{
public static class KnuthMorrisPratt
{
private static int[] BuildKMPTable(string pattern)
{
var kmpTable = new int[pattern.Length];
if (kmpTable.Length < 2)
{
if (kmpTable.Length > 0)
kmpTable[0] = -1;
return kmpTable;
}
int tableIndex = 2; // current position in table for computation
int patSubstrIndex = 0; // index in the pattern of the current substring
// First two values are fixed -1 and 0
kmpTable[0] = -1;
// Build table
while (tableIndex < kmpTable.Length)
{
// If the substring continues
if (pattern[tableIndex - 1] == pattern[patSubstrIndex])
{
kmpTable[tableIndex++] = ++patSubstrIndex;
}
// It does not but we can fall back
else if (patSubstrIndex != 0)
{
patSubstrIndex = kmpTable[patSubstrIndex];
}
// If we ran out of candidates
else
{
kmpTable[tableIndex++] = 0;
}
}
return kmpTable;
}
/// <summary>
/// Searches for the first occurrence of a pattern in a target <see cref="string"/> using KnuthMorrisPratt's algorithm.
/// </summary>
/// <param name="target">The <see cref="string"/> to search in.</param>
/// <param name="pattern">The <see cref="string"/> to search for.</param>
/// <returns>Returns the position of the first occurrence of the pattern. If not found returns -1.</returns>
public static int KnuthMorrisPrattSearchFirst(string target, string pattern)
{
if (target == null) throw new ArgumentNullException(nameof(target));
if (pattern == null) throw new ArgumentNullException(nameof(pattern));
// Build KMP table
var kmpTable = BuildKMPTable(pattern);
int matchIndex = 0; // position of the current match
int patternIndex = 0; // position in the pattern
// Save for faster access
int targetLength = target.Length;
int patternLength = pattern.Length;
while (matchIndex + patternIndex < targetLength)
{
if (pattern[patternIndex] == target[matchIndex + patternIndex])
{
patternIndex++;
if (patternIndex == patternLength)
return matchIndex;
}
else // we are not in the middle of a pattern
{
// if we can backtrack
if (kmpTable[patternIndex] > -1)
{
matchIndex = matchIndex + patternIndex - kmpTable[patternIndex];
patternIndex = kmpTable[patternIndex];
}
else // we can't backtrack (the beginning of the word)
{
matchIndex++;
patternIndex = 0;
}
}
}
// We haven't found anything
return -1;
}
/// <summary>
/// Searches for all occurences of a pattern in a target <see cref="string"/> using KnuthMorrisPratt's algorithm.
/// </summary>
/// <param name="target">The <see cref="string"/> to search in.</param>
/// <param name="pattern">The <see cref="string"/> to search for.</param>
/// <returns>Returns <see cref="IList{T}"/> of <see cref="int"/> values of the positions at which the pattern occurs. <see cref="IList{T}"/> is empty if none found.</returns>
public static IList<int> KnuthMorrisPrattSearchAll(string target, string pattern)
{
if (target == null) throw new ArgumentNullException(nameof(target));
if (pattern == null) throw new ArgumentNullException(nameof(pattern));
// List with matches
var matches = new List<int>();
// Build KMP table
var kmpTable = BuildKMPTable(pattern);
int matchIndex = 0; // position of the current match
int patternIndex = 0; // position in the pattern
// Save for faster access
int targetLength = target.Length;
int patternLength = pattern.Length;
while (matchIndex + patternIndex < targetLength)
{
if (pattern[patternIndex] == target[matchIndex + patternIndex])
{
patternIndex++;
if (patternIndex == patternLength)
{
matches.Add(matchIndex);
// Find where the next match will begin
patternIndex--;
// if we can backtrack
if (kmpTable[patternIndex] > -1)
{
matchIndex = matchIndex + patternIndex - kmpTable[patternIndex];
patternIndex = kmpTable[patternIndex];
}
else // we can't backtrack (the beginning of the word)
{
matchIndex++;
patternIndex = 0;
}
}
}
else // we are not in the middle of a pattern
{
// if we can backtrack
if (kmpTable[patternIndex] > -1)
{
matchIndex = matchIndex + patternIndex - kmpTable[patternIndex];
patternIndex = kmpTable[patternIndex];
}
else // we can't backtrack (the beginning of the word)
{
matchIndex++;
patternIndex = 0;
}
}
}
// We haven't found anything
return matches;
}
/// <summary>
/// Searches for the first occurrence of multiple patterns in a target <see cref="string"/> using KnuthMorrisPratt's algorithm.
/// </summary>
/// <param name="target">The <see cref="string"/> to search in.</param>
/// <param name="patterns">A <see cref="IList{T}"/> of <see cref="string"/> patterns.</param>
/// <returns>Retruns <see cref="Dictionary{TKey, TValue}"/> with <see cref="string"/> keys of the patterns and <see cref="int"/> values of the position of first occurence.
/// If a pattern is not found there is no entry in the dictionary.</returns>
public static Dictionary<string, int> KnuthMorrisPrattMultipleSearchFirst(string target, IList<string> patterns)
{
if (target == null) throw new ArgumentNullException(nameof(target));
if (patterns == null) throw new ArgumentNullException(nameof(patterns));
// Dictionary with matches
var matches = new Dictionary<string, int>();
for (int i = 0; i < patterns.Count; i++)
{
int postition = KnuthMorrisPrattSearchFirst(target, patterns[i]);
if (postition > -1)
matches.Add(patterns[i], postition);
}
return matches;
}
/// <summary>
/// Searches for all occurrences of multiple patterns in a target <see cref="string"/> using KnuthMorrisPratt's algorithm.
/// </summary>
/// <param name="target">The <see cref="string"/> to search in.</param>
/// <param name="patterns">A <see cref="IList{T}"/> of <see cref="string"/> patterns.</param>
/// <returns>Retruns <see cref="Dictionary{TKey, TValue}"/> with <see cref="string"/> keys of the patterns and <see cref="List{T}"/> of <see cref="int"/> values of the positions at which the pattern occurs.
/// If a pattern is not found there is no entry in the dictionary.</returns>
public static Dictionary<string, List<int>> KnuthMorrisPrattMultipleSearchAll(string target, IList<string> patterns)
{
if (target == null) throw new ArgumentNullException(nameof(target));
if (patterns == null) throw new ArgumentNullException(nameof(patterns));
// Dictionary with matches
var matches = new Dictionary<string, List<int>>();
for (int i = 0; i < patterns.Count; i++)
{
var postitions = new List<int>(KnuthMorrisPrattSearchAll(target, patterns[i]));
if (postitions.Count > 0)
matches.Add(patterns[i], postitions);
}
return matches;
}
}
}

Powered by TurnKey Linux.