chore: rename package

This commit is contained in:
Tiara Rodney 2025-06-20 20:33:37 +02:00
parent dd57ecabb9
commit 1fb1e0d0bf
No known key found for this signature in database
GPG key ID: 5F43FAB4FBE5B5EB
11 changed files with 7 additions and 7 deletions

View file

@ -1,91 +0,0 @@
from typing import Optional
class RollingHash:
"""implementation of Rabin-Karp rolling hash
"""
#: default base
base: int = 31
#: default modulus
mod: int = 10**9 + 7
#: current computed hash
_hash: int
#: prime number base (e.g., 31)
_base: int
#: large prime modulus (to prevent overflow)
_mod: int
# Precomputation of ``base^(length-1) % mod`` for removing the old byte when
# rolling over
_hbase_factor: int
def __init__(
self,
data: bytes,
base: Optional[int] = None,
mod: Optional[int] = None
):
"""Initialize the rolling hash with a given base and modulus.
base: Prime number base (e.g., 31)
mod: Large prime modulus to prevent overflow
length: Length of the pattern to match
"""
self._base = base if base else RollingHash.base
self._mod = mod if mod else RollingHash.mod
self._hash = RollingHash.compute_initial_hash(
data,
self._base,
self._mod
)
self._hbase_factor = pow(self._base, len(data) - 1, self._mod)
@staticmethod
def compute_initial_hash(
data: bytes,
base: int,
mod: int,
) -> int:
"""Compute the hash for the initial window (first `length` bytes).
rather use this standalone for computing the hash of the search pattern,
to avoid the overhead of instantiating an object.
:param data: data to build hash for
:param base:
:param: mod:
:returns: hash of data
"""
hash_ = 0
for i in range(len(data)):
# computing the modulus at each iteration, as to avoid the summed
# integer to be chunky, as in HUUUUGEE...
hash_ = (hash_ * base + data[i]) % mod
return hash_
def roll(self, old_byte: int, new_byte: int) -> int:
"""Efficiently update hash by removing ``old_byte`` and adding
``new_byte``
The old_byte removal uses a pre-computed value of the highest base used
in the polynomial calculation. This speeds things up a bit.
I was thinking about a way on how to store the old_byte efficiently
within the class object, but that would require storing the entire data,
basically doubling the memory consumption as the data must definetly
also live outside of the class object. A memoryview could solve this
problem, but at the cost of making the implementation more complex, so
this will have to do.
:param old_byte: The ordinal of the first byte in buffer to roll over
:param new_byte: The ordinal of the byte newly appended to the buffer
"""
# Remove old
self._hash = (self._hash - old_byte * self._hbase_factor) % self.mod
# Add new
self._hash = (self._hash * self.base + new_byte) % self.mod
return self._hash