chore: rename package
This commit is contained in:
parent
dd57ecabb9
commit
1fb1e0d0bf
11 changed files with 7 additions and 7 deletions
37
src/byteb4rb1e/utils/collections.py
Normal file
37
src/byteb4rb1e/utils/collections.py
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
class CircularBuffer:
|
||||
"""circular buffer implementation for managing streamed data
|
||||
"""
|
||||
#: internal buffer storage maintaining a fixed size
|
||||
buf: bytearray
|
||||
#: maximum capacity of the buffer
|
||||
size: int
|
||||
#: index of the oldest element in the buffer
|
||||
start: int
|
||||
#: index where the next element will be inserted
|
||||
end: int
|
||||
#: indicates whether the buffer has overwritten older data
|
||||
filled: bool
|
||||
|
||||
def __init__(self, size: int):
|
||||
"""initializes the circular buffer with a fixed capacity
|
||||
|
||||
:param size: maximum number of bytes the buffer can hold
|
||||
"""
|
||||
self.buf = bytearray(size)
|
||||
self.size = size
|
||||
self.start = 0
|
||||
self.end = 0
|
||||
self.filled = False
|
||||
|
||||
def append(self, data: bytes):
|
||||
"""adds data to the circular buffer, overwriting old data if necessary
|
||||
|
||||
:param data: byte sequence to append to the buffer
|
||||
"""
|
||||
for byte in data:
|
||||
self.buf[self.end] = byte
|
||||
self.end = (self.end + 1) % self.size
|
||||
if self.end == self.start: # Overwriting case
|
||||
self.start = (self.start + 1) % self.size
|
||||
self.filled = True
|
||||
|
||||
0
src/byteb4rb1e/utils/http/__init__.py
Normal file
0
src/byteb4rb1e/utils/http/__init__.py
Normal file
129
src/byteb4rb1e/utils/http/server/__init__.py
Normal file
129
src/byteb4rb1e/utils/http/server/__init__.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
from dataclasses import dataclass
|
||||
from http.server import SimpleHTTPRequestHandler
|
||||
|
||||
from byteb4rb1e.utils.io import ChunksIO
|
||||
|
||||
|
||||
@dataclass
|
||||
class HandlerOptions:
|
||||
"""configuration options of the HTTP POST method handler
|
||||
"""
|
||||
max_chunk_size: int = ChunksIO.max_chunk_size
|
||||
# default (in memory) buffer size in bytes (from KiB) of the sliding buffer
|
||||
# reading from the pure (unchunked) client read stream
|
||||
buffer_size: int = 512 * 1024
|
||||
|
||||
|
||||
@dataclass
|
||||
class ServerOptions:
|
||||
"""configuration options of the HTTP server
|
||||
"""
|
||||
handler: HandlerOptions
|
||||
hostname: str = ''
|
||||
port: int = 8000
|
||||
|
||||
|
||||
class MultipartUploadHandler(SimpleHTTPRequestHandler):
|
||||
"""Simple, yet compliant HTTP/1.0 MIME Multipart Upload Handler
|
||||
|
||||
Implementation of a RFC1341 & RFC7578 compliant server for handling
|
||||
multipart uploads.
|
||||
|
||||
This is meant as a utility for debugging MIME Multipart upload clients
|
||||
|
||||
Support for:
|
||||
- client 'Expect' header
|
||||
- chunked transfer-encoding
|
||||
"""
|
||||
media_subtypes = [
|
||||
'mixed',
|
||||
'alternative',
|
||||
'parallel',
|
||||
'digest',
|
||||
'form-data'
|
||||
]
|
||||
|
||||
def do_POST(self):
|
||||
h_content_type = self.headers.get('Content-Type')
|
||||
h_expect = self.headers.get('Expect')
|
||||
h_transfer_encoding = self.headers.get('Transfer-Encoding')
|
||||
|
||||
if h_content_type == None:
|
||||
self.send_error(400, 'Missing \'Content-Type\' header')
|
||||
|
||||
content_type_segments = [s.strip() for s in h_content_type.split(';')]
|
||||
try:
|
||||
media_type, media_subtype = content_type_segments[0].split('/', 1)
|
||||
except IndexError:
|
||||
self.send_error(
|
||||
400,
|
||||
'no value was supplied for \'Content-Type\' header'
|
||||
)
|
||||
except ValueError:
|
||||
self.send_error(
|
||||
400,
|
||||
'unable to parse media type and subtype from ' +
|
||||
'first (semicolon-delimited) segment of \'Content-Type\' ' +
|
||||
f'header value: {content_type_segments[0]}'
|
||||
)
|
||||
|
||||
if media_type != 'multipart':
|
||||
self.send_error(
|
||||
400,
|
||||
'unsupported media type in \'Content-Type\' header value: ' +
|
||||
f'{media_type}'
|
||||
)
|
||||
elif media_subtype not in self.media_subtypes:
|
||||
self.send_error(
|
||||
400,
|
||||
'unsupported media sub-type in \'Content-Type\' header value: ' +
|
||||
f'{media_type}. Must be one of {", ".join(self.media_subtypes)}'
|
||||
)
|
||||
|
||||
if h_transfer_encoding:
|
||||
if h_transfer_encoding != 'chunked':
|
||||
self.send_error(
|
||||
501,
|
||||
f'unable to handle transfer-encoding: {h_transfer_encoding}'
|
||||
)
|
||||
|
||||
content_type_params = {v[0].strip():v[1].strip() for v in [
|
||||
s.split('=', 1) for s in content_type_segments[1:]
|
||||
]}
|
||||
|
||||
boundary = content_type_params.get('boundary', '')
|
||||
boundary_len = len(boundary)
|
||||
if boundary == '':
|
||||
self.send_error(
|
||||
400,
|
||||
'missing \'boundary\' parameter in \'Content-Type\' header field'
|
||||
)
|
||||
elif boundary_len > 70:
|
||||
self.send_error(
|
||||
400,
|
||||
'\'boundary\' parameter value in \'Content-Type\' too long. ' +
|
||||
f'Is {boundary_len} characters long, must be less than 70.'
|
||||
)
|
||||
|
||||
del content_type_params['boundary']
|
||||
content_type_params_keys = content_type_params.keys()
|
||||
if len(content_type_params_keys) > 0:
|
||||
self.send_error(
|
||||
400,
|
||||
'None other than \'boundary\' parameter in \'Content-Type\'' +
|
||||
'header expected. Also received ' +
|
||||
'{\', \'.join(content_type_param_keys)}'
|
||||
)
|
||||
|
||||
|
||||
self.handle_expect_100()
|
||||
|
||||
# read the first 4-bytes of the body to check if it has a preamble
|
||||
# indication
|
||||
|
||||
# well great... curl is not RFC 1341 compliant. And RFC 1341 is asking
|
||||
# for tolerance towards non-compliant clients...
|
||||
|
||||
self.send_response(200, 'OK')
|
||||
self.end_headers()
|
||||
|
||||
88
src/byteb4rb1e/utils/http/server/__main__.py
Normal file
88
src/byteb4rb1e/utils/http/server/__main__.py
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
from argparse import (
|
||||
ArgumentParser,
|
||||
ArgumentDefaultsHelpFormatter,
|
||||
RawDescriptionHelpFormatter
|
||||
)
|
||||
from dataclasses import dataclass
|
||||
from http.server import HTTPServer
|
||||
from io import BytesIO, IOBase
|
||||
from typing import Optional, Tuple, List
|
||||
|
||||
from byteb4rb1e.utils.http.server import (
|
||||
HandlerOptions,
|
||||
MultipartUploadHandler,
|
||||
ServerOptions,
|
||||
)
|
||||
from byteb4rb1e.utils.io import ChunksIO
|
||||
|
||||
|
||||
__doc__ = """tsmuds - Tiara's Simple Multipart Upload Debugging Server
|
||||
|
||||
This is a simple standalone implementation of a HTTP/1.x multipart upload server
|
||||
using the Python 3.9+ standard library - with an interface catered explicitly
|
||||
towards debugging misbehaved clients.
|
||||
|
||||
Examples:
|
||||
|
||||
python3 tsmuds.py --port 8000
|
||||
"""
|
||||
__author__ = "Tiara Rodney <tiara.rodney@administratrix.de>"
|
||||
|
||||
|
||||
class CustomArgparseFormatter(
|
||||
ArgumentDefaultsHelpFormatter,
|
||||
RawDescriptionHelpFormatter
|
||||
):
|
||||
"""custom formatter that shows defaults for arguments as well as keeps
|
||||
my docstring unformatted.
|
||||
"""
|
||||
|
||||
|
||||
argparser = ArgumentParser(
|
||||
prog = 'byteb4rb1e.http.server',
|
||||
formatter_class = CustomArgparseFormatter,
|
||||
description = __doc__,
|
||||
epilog = f"""(c) 2025, {__author__}
|
||||
|
||||
This software is licensed under the Creative Commons Attribution 4.0
|
||||
International License (CC BY 4.0). For more details, visit:
|
||||
https://creativecommons.org/licenses/by/4.0/
|
||||
"""
|
||||
)
|
||||
|
||||
argparser.add_argument(
|
||||
'--port',
|
||||
type=int,
|
||||
default=ServerOptions.port,
|
||||
help="bind to this port"
|
||||
)
|
||||
|
||||
argparser.add_argument(
|
||||
'-b',
|
||||
'--bind',
|
||||
type=str,
|
||||
default=ServerOptions.hostname,
|
||||
help="bind to this address"
|
||||
)
|
||||
|
||||
argparser.add_argument(
|
||||
'--max-chunk-size',
|
||||
type=int,
|
||||
metavar='INT',
|
||||
default=(ChunksIO.max_chunk_size / (1024 ** 2)),
|
||||
help="""maximum allowed size of chunk (in MiB) when RFC 9112 chunk
|
||||
transfer encoding is requested by client"""
|
||||
)
|
||||
|
||||
args = argparser.parse_args()
|
||||
|
||||
server_options = ServerOptions(
|
||||
port = args.port,
|
||||
handler = HandlerOptions(
|
||||
max_chunk_size = args.max_chunk_size
|
||||
),
|
||||
)
|
||||
|
||||
with HTTPServer(server_options, MultipartUploadHandler) as httpd:
|
||||
print("serving at port", server_options.port)
|
||||
httpd.serve_forever()
|
||||
233
src/byteb4rb1e/utils/io/__init__.py
Normal file
233
src/byteb4rb1e/utils/io/__init__.py
Normal file
|
|
@ -0,0 +1,233 @@
|
|||
from io import BytesIO, IOBase
|
||||
import math
|
||||
from typing import Optional, Tuple, List
|
||||
|
||||
|
||||
class ChunksIO(IOBase):
|
||||
"""handler for HTTP/1.1 chunked transfer-encoded (RFC 9112 §7) byte streams
|
||||
|
||||
Compact and predictable implementation of a RFC 9112 compliant stream
|
||||
handler, which exposes a common IOBase interface for treating chunked byte
|
||||
streams as pure, unencoded byte streams.
|
||||
|
||||
.. notice::
|
||||
|
||||
The implementation is currently only concerned with read operations,
|
||||
though the layout is prepared for an easy straightforward implementation
|
||||
of write operations.
|
||||
"""
|
||||
#: maximum allowed size of a chunk
|
||||
# MiB by default, just guessing 10 MiB is a sensible limit
|
||||
max_chunk_size = int(10 * (1024 ** 2))
|
||||
#: optional write-through buffer
|
||||
_buffer: BytesIO
|
||||
#: chunks sizes
|
||||
_chunks_size: List[Tuple[int, int]]
|
||||
#: index of current chunk
|
||||
_current_chunk: int
|
||||
#: cursor position on the underlying stream, as the stream is not expected
|
||||
# to implement ``tell()``. Limiting factor of how large the stream may be.
|
||||
# Look at ``sys.maxsize`` for more information.
|
||||
_cursor: int
|
||||
#: chunk encoded stream
|
||||
_stream: BytesIO
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
stream: BytesIO,
|
||||
buffer: Optional[BytesIO] = None,
|
||||
):
|
||||
"""initialize the instance
|
||||
|
||||
.. notice::
|
||||
|
||||
The write-through buffer is required to be seekable, writable and
|
||||
readable and MUST be considered locked during any operation of the
|
||||
ChunksIO implementation. The buffer's cursor position does not
|
||||
reflect the cursor position of the underlying stream.
|
||||
|
||||
:param stream: a byte-stream to abstract
|
||||
:param buffer: write-through buffer for all read operations on the
|
||||
underlying stream. This can be useful, if the data needs
|
||||
to be accessed again later on.
|
||||
:param max_chunk_size: the maximum size of a single chunk (excluding
|
||||
it's bytes size segment)
|
||||
"""
|
||||
if stream.readable() == False:
|
||||
raise Exception('expected readable stream')
|
||||
|
||||
if buffer != None:
|
||||
if buffer.writable() == False:
|
||||
raise Exception('expected writable buffer')
|
||||
|
||||
self._buffer = buffer
|
||||
self._chunks_size = []
|
||||
self._current_chunk = 0
|
||||
self._cursor = 0
|
||||
self._stream = stream
|
||||
|
||||
super().__init__()
|
||||
|
||||
@staticmethod
|
||||
def get_chunk_size(
|
||||
stream: BytesIO,
|
||||
max_size: int,
|
||||
) -> Tuple[int, int]:
|
||||
"""get the size of the next chunk from a RFC 9112 (§7) chunk encoded
|
||||
byte stream
|
||||
|
||||
stream cursor position is assumed to be at the start of the preceeding
|
||||
byte size segment of chunk data. The max_size parameter is converted to
|
||||
its bytes representation, to determine early on if a read is feasible
|
||||
and won't cause a denial-of-service.
|
||||
|
||||
:param stream: the stream to read the chunk size from
|
||||
:param max_size: the maximum allowed size a chunk can be. I wasn't able
|
||||
to find a definitive limit defined in the RFC so this
|
||||
is guess working and at least curl has a pretty big
|
||||
chunk size of more than 6 MiB.
|
||||
|
||||
:returns: tuple of the size of the bytes size segment and the data
|
||||
bytes size, whose sum is the total size of the chunk
|
||||
"""
|
||||
_terminator = b'\r\n'
|
||||
chunk_size = b''
|
||||
terminator = b''
|
||||
|
||||
# calculate the number of bytes the max_size byte representation
|
||||
# requires. This is a precaution so that chunks can't be arbitrarily
|
||||
# long.
|
||||
max_size_bytes = math.ceil(max_size.bit_length() / 8)
|
||||
|
||||
# the iteration could be handled with less system calls by reading a
|
||||
# larger *chunk* of data and iterating over that in-memory cache.
|
||||
# Though, this would come at the expense of unpredictable memory
|
||||
# consumption and would require a write-through buffer by default, in
|
||||
# addition to making the implementation more complex.
|
||||
for _ in range(max_size_bytes + len(_terminator)):
|
||||
buf = stream.read(1)
|
||||
|
||||
if buf in _terminator: terminator += buf
|
||||
else: chunk_size += buf
|
||||
|
||||
if terminator == _terminator:
|
||||
if (not chunk_size):
|
||||
raise ValueError(
|
||||
'terminator reached without having parsed ' +
|
||||
'any byte size'
|
||||
)
|
||||
|
||||
return (
|
||||
len(chunk_size + terminator),
|
||||
int.from_bytes(chunk_size, byteorder='big')
|
||||
)
|
||||
|
||||
raise ValueError(
|
||||
'unable to reach terminator with a max chunk size of ' +
|
||||
f'{max_size / (1024 ** 2)} MiB'
|
||||
)
|
||||
|
||||
def read(self, size = -1) -> bytes:
|
||||
"""read an arbitrary amount of data from the underlying stream.
|
||||
"""
|
||||
buffer = b''
|
||||
|
||||
# if no chunk has been read yet
|
||||
if len(self._chunks_size) == 0:
|
||||
# determine the size of the initial chunk
|
||||
try:
|
||||
ichunk_size = ChunksIO.get_chunk_size(
|
||||
self._stream,
|
||||
self.max_chunk_size
|
||||
)
|
||||
except ValueError as e:
|
||||
raise ValueError(
|
||||
f'chunk #{self._current_chunk}: {e}'
|
||||
) from e
|
||||
|
||||
self._chunks_size.append(ichunk_size)
|
||||
|
||||
self._cursor += self._chunks_size[self._current_chunk][0]
|
||||
|
||||
# end position of current chunk
|
||||
cc_end = sum(
|
||||
[sum(c) for c in self._chunks_size[:self._current_chunk + 1]]
|
||||
)
|
||||
|
||||
# if the requested read end position exceeds the end position of the
|
||||
# current chunk and it's not the end chunk
|
||||
if self._cursor + size > cc_end and \
|
||||
self._chunks_size[self._current_chunk][1] != 0:
|
||||
# size of remaining bytes to read from current chunk
|
||||
cc_remaining = cc_end - self._cursor
|
||||
|
||||
buffer += self._stream.read(cc_remaining)
|
||||
|
||||
if len(buffer) != cc_remaining:
|
||||
raise ValueError(
|
||||
f'chunk #{self._current_chunk}: stream yielded too few bytes'
|
||||
)
|
||||
|
||||
if self._buffer: self._buffer.write(buffer)
|
||||
|
||||
# determine the size of the next chunk
|
||||
try:
|
||||
chunk_size = ChunksIO.get_chunk_size(
|
||||
self._stream,
|
||||
self.max_chunk_size
|
||||
)
|
||||
except ValueError as e:
|
||||
raise ValueError(
|
||||
f'chunk #{self._current_chunk + 1}: {e}'
|
||||
) from e
|
||||
|
||||
self._chunks_size.append(chunk_size)
|
||||
|
||||
self._current_chunk += 1
|
||||
|
||||
self._cursor += self._chunks_size[self._current_chunk][0]
|
||||
|
||||
size = size - cc_remaining
|
||||
|
||||
buffer += self._stream.read(size)
|
||||
|
||||
if self._buffer: self._buffer.write(buffer)
|
||||
|
||||
self._cursor += len(buffer)
|
||||
|
||||
return buffer
|
||||
|
||||
def readable() -> bool:
|
||||
"""
|
||||
"""
|
||||
return True
|
||||
|
||||
def readChunk() -> bytes:
|
||||
"""read until the end of a chunk
|
||||
|
||||
if buffered and cursor is not at the start position of a chunk, position
|
||||
will be seeked backwards, prior to reading. If unbuffered and not at the
|
||||
start position of a chunk, exception will be raised.
|
||||
"""
|
||||
if self._cursor != self._offset:
|
||||
raise Exception(
|
||||
'cursor not at starting position of a chunk. Mixing ' +
|
||||
'read() and readChunk() calls is currently not supported.'
|
||||
)
|
||||
|
||||
buffer = self.read(self._chunks_size[self._current_chunk])
|
||||
|
||||
if self._buffer: self._buffer.write(buffer)
|
||||
|
||||
self._cursor += len(buffer)
|
||||
|
||||
return buffer
|
||||
|
||||
def readChunks() -> bytes:
|
||||
"""yield all chunks until the terminating 0 byte chunk is reached
|
||||
"""
|
||||
|
||||
def tell() -> int:
|
||||
"""return the current stream position
|
||||
"""
|
||||
return this._cursor
|
||||
91
src/byteb4rb1e/utils/string.py
Normal file
91
src/byteb4rb1e/utils/string.py
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
from typing import Optional
|
||||
|
||||
|
||||
class RollingHash:
|
||||
"""implementation of Rabin-Karp rolling hash
|
||||
"""
|
||||
#: default base
|
||||
base: int = 31
|
||||
#: default modulus
|
||||
mod: int = 10**9 + 7
|
||||
#: current computed hash
|
||||
_hash: int
|
||||
#: prime number base (e.g., 31)
|
||||
_base: int
|
||||
#: large prime modulus (to prevent overflow)
|
||||
_mod: int
|
||||
# Precomputation of ``base^(length-1) % mod`` for removing the old byte when
|
||||
# rolling over
|
||||
_hbase_factor: int
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
data: bytes,
|
||||
base: Optional[int] = None,
|
||||
mod: Optional[int] = None
|
||||
):
|
||||
"""Initialize the rolling hash with a given base and modulus.
|
||||
|
||||
base: Prime number base (e.g., 31)
|
||||
mod: Large prime modulus to prevent overflow
|
||||
length: Length of the pattern to match
|
||||
"""
|
||||
self._base = base if base else RollingHash.base
|
||||
|
||||
self._mod = mod if mod else RollingHash.mod
|
||||
|
||||
self._hash = RollingHash.compute_initial_hash(
|
||||
data,
|
||||
self._base,
|
||||
self._mod
|
||||
)
|
||||
|
||||
self._hbase_factor = pow(self._base, len(data) - 1, self._mod)
|
||||
|
||||
@staticmethod
|
||||
def compute_initial_hash(
|
||||
data: bytes,
|
||||
base: int,
|
||||
mod: int,
|
||||
) -> int:
|
||||
"""Compute the hash for the initial window (first `length` bytes).
|
||||
|
||||
rather use this standalone for computing the hash of the search pattern,
|
||||
to avoid the overhead of instantiating an object.
|
||||
|
||||
:param data: data to build hash for
|
||||
:param base:
|
||||
:param: mod:
|
||||
|
||||
:returns: hash of data
|
||||
"""
|
||||
hash_ = 0
|
||||
for i in range(len(data)):
|
||||
# computing the modulus at each iteration, as to avoid the summed
|
||||
# integer to be chunky, as in HUUUUGEE...
|
||||
hash_ = (hash_ * base + data[i]) % mod
|
||||
return hash_
|
||||
|
||||
def roll(self, old_byte: int, new_byte: int) -> int:
|
||||
"""Efficiently update hash by removing ``old_byte`` and adding
|
||||
``new_byte``
|
||||
|
||||
The old_byte removal uses a pre-computed value of the highest base used
|
||||
in the polynomial calculation. This speeds things up a bit.
|
||||
|
||||
I was thinking about a way on how to store the old_byte efficiently
|
||||
within the class object, but that would require storing the entire data,
|
||||
basically doubling the memory consumption as the data must definetly
|
||||
also live outside of the class object. A memoryview could solve this
|
||||
problem, but at the cost of making the implementation more complex, so
|
||||
this will have to do.
|
||||
|
||||
:param old_byte: The ordinal of the first byte in buffer to roll over
|
||||
:param new_byte: The ordinal of the byte newly appended to the buffer
|
||||
"""
|
||||
# Remove old
|
||||
self._hash = (self._hash - old_byte * self._hbase_factor) % self.mod
|
||||
# Add new
|
||||
self._hash = (self._hash * self.base + new_byte) % self.mod
|
||||
|
||||
return self._hash
|
||||
Loading…
Add table
Add a link
Reference in a new issue