From 6f267c29e6134c5f3160ab02ed4f59d92d7ecb95 Mon Sep 17 00:00:00 2001 From: "Rodney, Tiara" Date: Sun, 4 May 2025 01:31:37 +0200 Subject: [PATCH 1/2] feat(string): init kmp string search --- src/byteb4rb1e_utils/string.py | 51 ++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 src/byteb4rb1e_utils/string.py diff --git a/src/byteb4rb1e_utils/string.py b/src/byteb4rb1e_utils/string.py new file mode 100644 index 0000000..3261a03 --- /dev/null +++ b/src/byteb4rb1e_utils/string.py @@ -0,0 +1,51 @@ +from typing import List, Union + +from byteb4rb1e_utils.collections import CircularBuffer + + +class KnuthMorrisPratt: + """Knuth-Morris-Pratt string searching algorithm implemented as a class + + https://gwern.net/doc/cs/algorithm/1977-knuth.pdf + """ + def __init__(self, pattern: bytes): + """ + """ + self._table = KnuthMorrisPratt.build_table(pattern) + self._pattern = pattern + + @staticmethod + def build_table(pattern: bytes) -> List[int]: + """builds the failure table + """ + table = [0] * len(pattern) + j = 0 + for i in range(1, len(pattern)): + while j > 0 and pattern[i] != pattern[j]: + j = table[j - 1] + if pattern[i] == pattern[j]: + j += 1 + table[i] = j + return table + + def match_linear( + self, + data: Union[bytes, bytearray, memoryview], + start: int = 0 + ) -> bool: + """match against a linear fixed-size buffer + + :returns: index of the match or -1 if not found + """ + m, j = len(self.pattern), 0 + + for i in range(start, len(data)): + while j > 0 and data[i] != self.pattern[j]: + j = self._table[j - 1] + + if data[i] == self.pattern[j]: + j += 1 + + if j == m: + return i - m + 1 + return -1 From b9a67eb0a596c85c29621fa18316b07eaf231dea Mon Sep 17 00:00:00 2001 From: "Rodney, Tiara" Date: Sun, 4 May 2025 02:34:05 +0200 Subject: [PATCH 2/2] feat(string): add circular buffer support for KMP search --- src/byteb4rb1e_utils/string.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/byteb4rb1e_utils/string.py b/src/byteb4rb1e_utils/string.py index 3261a03..f59babf 100644 --- a/src/byteb4rb1e_utils/string.py +++ b/src/byteb4rb1e_utils/string.py @@ -49,3 +49,20 @@ class KnuthMorrisPratt: if j == m: return i - m + 1 return -1 + + def match_circular(self, data: CircularBuffer): + """Finds the boundary using KMP, handling circular wraparound.""" + i, j = data.start, 0 # Start checking from the oldest data in the buffer + + while j < len(boundary) and (data.filled or i != data.end): + if data.buf[i] == self.pattern[j]: + i = (i + 1) % data.size + j += 1 + if j == len(self.pattern): # Full match found + return True + elif j > 0: + j = table[j - 1] + else: + i = (i + 1) % data.size + + return False