KMP Algorithm

Knuth-Morris-Pratt (KMP) is an algorithm for pattern matching in a string similar to the Sliding-window technique. The primary difference in KMP is that the algorithm does not backtrack on the text string and instead creates a lookup up index in the pattern string.

To accomplish this, a prefix look up table is generated where all the sub-strings in the pattern string that match the largest repeating prefix are given index values. This index value is used as a point of reference for where to backtrack in the pattern string when a match is not found. This allows pattern matching without the need to back track in the text string.

Complexity

KMP has a time complexity of $O(n + m)$ where $n$ is the text length and $m$ is the pattern length. It has an auxiliary space of $O(m)$.

Examples

python

def search(pattern, text):
    results = []
    lps = setup_lps(pattern)
    j = 0
    for i, ch in enumerate(text):
        # if miss, rewind using the index found in the LPS
        while j and pattern[j] != ch:
            j = lps[j - 1]
        # match
        if pattern[j] == ch:
            # iterate until the match is confirmed
            if j == len(pattern) - 1:
                results.append(i - j)
                j = lps[j]
            else:
                j += 1
    return results

def setup_lps(pattern) -> List[int]:
    lps = [0] * len(pattern)
    prefixPointer = 0
    for i in range(1, len(pattern)):
        # rewind until match or back to the beginning
        while prefixPointer and pattern[i] != pattern[prefixPointer]:
            prefixPointer = lps[prefixPointer - 1]
        # increment the pointer and record the index
        if pattern[prefixPointer] == pattern[i]:
            prefixPointer += 1
            lps[i] = prefixPointer
    return lps

go

func search(pattern, text string) []int {
	results := []int{}
	textLength := len(text)
	patternLength := len(pattern)
	i := 0
	j := 0
	// pre-process the pattern
	lps := setupLPS(pattern)
	for (textLength - i) >= (patternLength - j) {
		if pattern[j] == text[i] {
			i++
			j++
		}
		if j == patternLength {
			results = append(results, i-j)
			j = lps[j-1]
			continue
		}
		if i < textLength && pattern[j] != text[i] {
			if j != 0 {
				j = lps[j-1]
			} else {
				i++
			}
		}
	}
	return results
}

func setupLPS(pattern string) []int {
	lps := make([]int, len(pattern))
	prefixPointer := 0
	i := 1
	for i < len(pattern) {
		// increment the pointer and record the index
		if pattern[i] == pattern[prefixPointer] {
			prefixPointer += 1
			lps[i] = prefixPointer
			i++
		} else {
			if prefixPointer != 0 {
				prefixPointer = lps[prefixPointer-1]
			} else {
				lps[i] = 0
				i++
			}
		}
	}
	return lps
}