chore: rename package

This commit is contained in:
Tiara Rodney 2025-06-20 20:33:37 +02:00
parent dd57ecabb9
commit 1fb1e0d0bf
No known key found for this signature in database
GPG key ID: 5F43FAB4FBE5B5EB
11 changed files with 7 additions and 7 deletions

View file

@ -0,0 +1,37 @@
class CircularBuffer:
"""circular buffer implementation for managing streamed data
"""
#: internal buffer storage maintaining a fixed size
buf: bytearray
#: maximum capacity of the buffer
size: int
#: index of the oldest element in the buffer
start: int
#: index where the next element will be inserted
end: int
#: indicates whether the buffer has overwritten older data
filled: bool
def __init__(self, size: int):
"""initializes the circular buffer with a fixed capacity
:param size: maximum number of bytes the buffer can hold
"""
self.buf = bytearray(size)
self.size = size
self.start = 0
self.end = 0
self.filled = False
def append(self, data: bytes):
"""adds data to the circular buffer, overwriting old data if necessary
:param data: byte sequence to append to the buffer
"""
for byte in data:
self.buf[self.end] = byte
self.end = (self.end + 1) % self.size
if self.end == self.start: # Overwriting case
self.start = (self.start + 1) % self.size
self.filled = True

View file

View file

@ -0,0 +1,129 @@
from dataclasses import dataclass
from http.server import SimpleHTTPRequestHandler
from byteb4rb1e.utils.io import ChunksIO
@dataclass
class HandlerOptions:
"""configuration options of the HTTP POST method handler
"""
max_chunk_size: int = ChunksIO.max_chunk_size
# default (in memory) buffer size in bytes (from KiB) of the sliding buffer
# reading from the pure (unchunked) client read stream
buffer_size: int = 512 * 1024
@dataclass
class ServerOptions:
"""configuration options of the HTTP server
"""
handler: HandlerOptions
hostname: str = ''
port: int = 8000
class MultipartUploadHandler(SimpleHTTPRequestHandler):
"""Simple, yet compliant HTTP/1.0 MIME Multipart Upload Handler
Implementation of a RFC1341 & RFC7578 compliant server for handling
multipart uploads.
This is meant as a utility for debugging MIME Multipart upload clients
Support for:
- client 'Expect' header
- chunked transfer-encoding
"""
media_subtypes = [
'mixed',
'alternative',
'parallel',
'digest',
'form-data'
]
def do_POST(self):
h_content_type = self.headers.get('Content-Type')
h_expect = self.headers.get('Expect')
h_transfer_encoding = self.headers.get('Transfer-Encoding')
if h_content_type == None:
self.send_error(400, 'Missing \'Content-Type\' header')
content_type_segments = [s.strip() for s in h_content_type.split(';')]
try:
media_type, media_subtype = content_type_segments[0].split('/', 1)
except IndexError:
self.send_error(
400,
'no value was supplied for \'Content-Type\' header'
)
except ValueError:
self.send_error(
400,
'unable to parse media type and subtype from ' +
'first (semicolon-delimited) segment of \'Content-Type\' ' +
f'header value: {content_type_segments[0]}'
)
if media_type != 'multipart':
self.send_error(
400,
'unsupported media type in \'Content-Type\' header value: ' +
f'{media_type}'
)
elif media_subtype not in self.media_subtypes:
self.send_error(
400,
'unsupported media sub-type in \'Content-Type\' header value: ' +
f'{media_type}. Must be one of {", ".join(self.media_subtypes)}'
)
if h_transfer_encoding:
if h_transfer_encoding != 'chunked':
self.send_error(
501,
f'unable to handle transfer-encoding: {h_transfer_encoding}'
)
content_type_params = {v[0].strip():v[1].strip() for v in [
s.split('=', 1) for s in content_type_segments[1:]
]}
boundary = content_type_params.get('boundary', '')
boundary_len = len(boundary)
if boundary == '':
self.send_error(
400,
'missing \'boundary\' parameter in \'Content-Type\' header field'
)
elif boundary_len > 70:
self.send_error(
400,
'\'boundary\' parameter value in \'Content-Type\' too long. ' +
f'Is {boundary_len} characters long, must be less than 70.'
)
del content_type_params['boundary']
content_type_params_keys = content_type_params.keys()
if len(content_type_params_keys) > 0:
self.send_error(
400,
'None other than \'boundary\' parameter in \'Content-Type\'' +
'header expected. Also received ' +
'{\', \'.join(content_type_param_keys)}'
)
self.handle_expect_100()
# read the first 4-bytes of the body to check if it has a preamble
# indication
# well great... curl is not RFC 1341 compliant. And RFC 1341 is asking
# for tolerance towards non-compliant clients...
self.send_response(200, 'OK')
self.end_headers()

View file

@ -0,0 +1,88 @@
from argparse import (
ArgumentParser,
ArgumentDefaultsHelpFormatter,
RawDescriptionHelpFormatter
)
from dataclasses import dataclass
from http.server import HTTPServer
from io import BytesIO, IOBase
from typing import Optional, Tuple, List
from byteb4rb1e.utils.http.server import (
HandlerOptions,
MultipartUploadHandler,
ServerOptions,
)
from byteb4rb1e.utils.io import ChunksIO
__doc__ = """tsmuds - Tiara's Simple Multipart Upload Debugging Server
This is a simple standalone implementation of a HTTP/1.x multipart upload server
using the Python 3.9+ standard library - with an interface catered explicitly
towards debugging misbehaved clients.
Examples:
python3 tsmuds.py --port 8000
"""
__author__ = "Tiara Rodney <tiara.rodney@administratrix.de>"
class CustomArgparseFormatter(
ArgumentDefaultsHelpFormatter,
RawDescriptionHelpFormatter
):
"""custom formatter that shows defaults for arguments as well as keeps
my docstring unformatted.
"""
argparser = ArgumentParser(
prog = 'byteb4rb1e.http.server',
formatter_class = CustomArgparseFormatter,
description = __doc__,
epilog = f"""(c) 2025, {__author__}
This software is licensed under the Creative Commons Attribution 4.0
International License (CC BY 4.0). For more details, visit:
https://creativecommons.org/licenses/by/4.0/
"""
)
argparser.add_argument(
'--port',
type=int,
default=ServerOptions.port,
help="bind to this port"
)
argparser.add_argument(
'-b',
'--bind',
type=str,
default=ServerOptions.hostname,
help="bind to this address"
)
argparser.add_argument(
'--max-chunk-size',
type=int,
metavar='INT',
default=(ChunksIO.max_chunk_size / (1024 ** 2)),
help="""maximum allowed size of chunk (in MiB) when RFC 9112 chunk
transfer encoding is requested by client"""
)
args = argparser.parse_args()
server_options = ServerOptions(
port = args.port,
handler = HandlerOptions(
max_chunk_size = args.max_chunk_size
),
)
with HTTPServer(server_options, MultipartUploadHandler) as httpd:
print("serving at port", server_options.port)
httpd.serve_forever()

View file

@ -0,0 +1,233 @@
from io import BytesIO, IOBase
import math
from typing import Optional, Tuple, List
class ChunksIO(IOBase):
"""handler for HTTP/1.1 chunked transfer-encoded (RFC 9112 §7) byte streams
Compact and predictable implementation of a RFC 9112 compliant stream
handler, which exposes a common IOBase interface for treating chunked byte
streams as pure, unencoded byte streams.
.. notice::
The implementation is currently only concerned with read operations,
though the layout is prepared for an easy straightforward implementation
of write operations.
"""
#: maximum allowed size of a chunk
# MiB by default, just guessing 10 MiB is a sensible limit
max_chunk_size = int(10 * (1024 ** 2))
#: optional write-through buffer
_buffer: BytesIO
#: chunks sizes
_chunks_size: List[Tuple[int, int]]
#: index of current chunk
_current_chunk: int
#: cursor position on the underlying stream, as the stream is not expected
# to implement ``tell()``. Limiting factor of how large the stream may be.
# Look at ``sys.maxsize`` for more information.
_cursor: int
#: chunk encoded stream
_stream: BytesIO
def __init__(
self,
stream: BytesIO,
buffer: Optional[BytesIO] = None,
):
"""initialize the instance
.. notice::
The write-through buffer is required to be seekable, writable and
readable and MUST be considered locked during any operation of the
ChunksIO implementation. The buffer's cursor position does not
reflect the cursor position of the underlying stream.
:param stream: a byte-stream to abstract
:param buffer: write-through buffer for all read operations on the
underlying stream. This can be useful, if the data needs
to be accessed again later on.
:param max_chunk_size: the maximum size of a single chunk (excluding
it's bytes size segment)
"""
if stream.readable() == False:
raise Exception('expected readable stream')
if buffer != None:
if buffer.writable() == False:
raise Exception('expected writable buffer')
self._buffer = buffer
self._chunks_size = []
self._current_chunk = 0
self._cursor = 0
self._stream = stream
super().__init__()
@staticmethod
def get_chunk_size(
stream: BytesIO,
max_size: int,
) -> Tuple[int, int]:
"""get the size of the next chunk from a RFC 9112 (§7) chunk encoded
byte stream
stream cursor position is assumed to be at the start of the preceeding
byte size segment of chunk data. The max_size parameter is converted to
its bytes representation, to determine early on if a read is feasible
and won't cause a denial-of-service.
:param stream: the stream to read the chunk size from
:param max_size: the maximum allowed size a chunk can be. I wasn't able
to find a definitive limit defined in the RFC so this
is guess working and at least curl has a pretty big
chunk size of more than 6 MiB.
:returns: tuple of the size of the bytes size segment and the data
bytes size, whose sum is the total size of the chunk
"""
_terminator = b'\r\n'
chunk_size = b''
terminator = b''
# calculate the number of bytes the max_size byte representation
# requires. This is a precaution so that chunks can't be arbitrarily
# long.
max_size_bytes = math.ceil(max_size.bit_length() / 8)
# the iteration could be handled with less system calls by reading a
# larger *chunk* of data and iterating over that in-memory cache.
# Though, this would come at the expense of unpredictable memory
# consumption and would require a write-through buffer by default, in
# addition to making the implementation more complex.
for _ in range(max_size_bytes + len(_terminator)):
buf = stream.read(1)
if buf in _terminator: terminator += buf
else: chunk_size += buf
if terminator == _terminator:
if (not chunk_size):
raise ValueError(
'terminator reached without having parsed ' +
'any byte size'
)
return (
len(chunk_size + terminator),
int.from_bytes(chunk_size, byteorder='big')
)
raise ValueError(
'unable to reach terminator with a max chunk size of ' +
f'{max_size / (1024 ** 2)} MiB'
)
def read(self, size = -1) -> bytes:
"""read an arbitrary amount of data from the underlying stream.
"""
buffer = b''
# if no chunk has been read yet
if len(self._chunks_size) == 0:
# determine the size of the initial chunk
try:
ichunk_size = ChunksIO.get_chunk_size(
self._stream,
self.max_chunk_size
)
except ValueError as e:
raise ValueError(
f'chunk #{self._current_chunk}: {e}'
) from e
self._chunks_size.append(ichunk_size)
self._cursor += self._chunks_size[self._current_chunk][0]
# end position of current chunk
cc_end = sum(
[sum(c) for c in self._chunks_size[:self._current_chunk + 1]]
)
# if the requested read end position exceeds the end position of the
# current chunk and it's not the end chunk
if self._cursor + size > cc_end and \
self._chunks_size[self._current_chunk][1] != 0:
# size of remaining bytes to read from current chunk
cc_remaining = cc_end - self._cursor
buffer += self._stream.read(cc_remaining)
if len(buffer) != cc_remaining:
raise ValueError(
f'chunk #{self._current_chunk}: stream yielded too few bytes'
)
if self._buffer: self._buffer.write(buffer)
# determine the size of the next chunk
try:
chunk_size = ChunksIO.get_chunk_size(
self._stream,
self.max_chunk_size
)
except ValueError as e:
raise ValueError(
f'chunk #{self._current_chunk + 1}: {e}'
) from e
self._chunks_size.append(chunk_size)
self._current_chunk += 1
self._cursor += self._chunks_size[self._current_chunk][0]
size = size - cc_remaining
buffer += self._stream.read(size)
if self._buffer: self._buffer.write(buffer)
self._cursor += len(buffer)
return buffer
def readable() -> bool:
"""
"""
return True
def readChunk() -> bytes:
"""read until the end of a chunk
if buffered and cursor is not at the start position of a chunk, position
will be seeked backwards, prior to reading. If unbuffered and not at the
start position of a chunk, exception will be raised.
"""
if self._cursor != self._offset:
raise Exception(
'cursor not at starting position of a chunk. Mixing ' +
'read() and readChunk() calls is currently not supported.'
)
buffer = self.read(self._chunks_size[self._current_chunk])
if self._buffer: self._buffer.write(buffer)
self._cursor += len(buffer)
return buffer
def readChunks() -> bytes:
"""yield all chunks until the terminating 0 byte chunk is reached
"""
def tell() -> int:
"""return the current stream position
"""
return this._cursor

View file

@ -0,0 +1,91 @@
from typing import Optional
class RollingHash:
"""implementation of Rabin-Karp rolling hash
"""
#: default base
base: int = 31
#: default modulus
mod: int = 10**9 + 7
#: current computed hash
_hash: int
#: prime number base (e.g., 31)
_base: int
#: large prime modulus (to prevent overflow)
_mod: int
# Precomputation of ``base^(length-1) % mod`` for removing the old byte when
# rolling over
_hbase_factor: int
def __init__(
self,
data: bytes,
base: Optional[int] = None,
mod: Optional[int] = None
):
"""Initialize the rolling hash with a given base and modulus.
base: Prime number base (e.g., 31)
mod: Large prime modulus to prevent overflow
length: Length of the pattern to match
"""
self._base = base if base else RollingHash.base
self._mod = mod if mod else RollingHash.mod
self._hash = RollingHash.compute_initial_hash(
data,
self._base,
self._mod
)
self._hbase_factor = pow(self._base, len(data) - 1, self._mod)
@staticmethod
def compute_initial_hash(
data: bytes,
base: int,
mod: int,
) -> int:
"""Compute the hash for the initial window (first `length` bytes).
rather use this standalone for computing the hash of the search pattern,
to avoid the overhead of instantiating an object.
:param data: data to build hash for
:param base:
:param: mod:
:returns: hash of data
"""
hash_ = 0
for i in range(len(data)):
# computing the modulus at each iteration, as to avoid the summed
# integer to be chunky, as in HUUUUGEE...
hash_ = (hash_ * base + data[i]) % mod
return hash_
def roll(self, old_byte: int, new_byte: int) -> int:
"""Efficiently update hash by removing ``old_byte`` and adding
``new_byte``
The old_byte removal uses a pre-computed value of the highest base used
in the polynomial calculation. This speeds things up a bit.
I was thinking about a way on how to store the old_byte efficiently
within the class object, but that would require storing the entire data,
basically doubling the memory consumption as the data must definetly
also live outside of the class object. A memoryview could solve this
problem, but at the cost of making the implementation more complex, so
this will have to do.
:param old_byte: The ordinal of the first byte in buffer to roll over
:param new_byte: The ordinal of the byte newly appended to the buffer
"""
# Remove old
self._hash = (self._hash - old_byte * self._hbase_factor) % self.mod
# Add new
self._hash = (self._hash * self.base + new_byte) % self.mod
return self._hash