This commit is contained in:
Rodney, Tiara 2025-05-03 19:26:12 +02:00
commit 29c7308410
No known key found for this signature in database
GPG key ID: 5CD8EC1D46106723
17 changed files with 3755 additions and 0 deletions

13
.gitignore vendored Normal file
View file

@ -0,0 +1,13 @@
/dist/
/build/
**/*.egg-info
**/*.pyc
**/__pycache__/
/.venv/
/autom4te.cache/
/config.log
/config.status
/configure~
*.swo
*.swp

24
Makefile Normal file
View file

@ -0,0 +1,24 @@
.PHONY: chore configure
chore: configure Pipfile.lock requirements-dev.txt
Pipfile.lock: .venv Pipfile
.venv/bin/pipenv lock
requirements-dev.txt: .venv Pipfile.lock
.venv/bin/pipenv requirements --dev-only > requirements-dev.txt
configure: configure.ac
autoconf
.venv: requirements-dev.txt
python3 -m venv .venv
.venv/bin/python3 -m pip install --upgrade pip
.venv/bin/pip install -r requirements-dev.txt
test-reports:
.venv/bin/python3 -m unittest discover -v
build: .venv/bin/pipenv
.venv/bin/pipenv run build

20
Pipfile Normal file
View file

@ -0,0 +1,20 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[dev-packages]
mypy = "~=1.15.0"
autopep8 = "~=2.3.2"
setuptools-scm = "~=8.2.0"
pylint = "~=3.3.6"
build = "*"
pipenv = "*"
byteb4rb1e-utils = { editable = true, path = '.'}
[requires]
python_version = "3.11"
[scripts]
"build" = "python3 -m build"

244
Pipfile.lock generated Normal file
View file

@ -0,0 +1,244 @@
{
"_meta": {
"hash": {
"sha256": "4847baa5a13a96f2c3de2a246a0c088806c308426c79d8105387dff1fe1f1e58"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.11"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {},
"develop": {
"astroid": {
"hashes": [
"sha256:622cc8e3048684aa42c820d9d218978021c3c3d174fb03a9f0d615921744f550",
"sha256:d05bfd0acba96a7bd43e222828b7d9bc1e138aaeb0649707908d3702a9831248"
],
"markers": "python_full_version >= '3.9.0'",
"version": "==3.3.9"
},
"autopep8": {
"hashes": [
"sha256:89440a4f969197b69a995e4ce0661b031f455a9f776d2c5ba3dbd83466931758",
"sha256:ce8ad498672c845a0c3de2629c15b635ec2b05ef8177a6e7c91c74f3e9b51128"
],
"index": "pypi",
"markers": "python_version >= '3.9'",
"version": "==2.3.2"
},
"build": {
"hashes": [
"sha256:1d61c0887fa860c01971625baae8bdd338e517b836a2f70dd1f7aa3a6b2fc5b5",
"sha256:b36993e92ca9375a219c99e606a122ff365a760a2d4bba0caa09bd5278b608b7"
],
"index": "pypi",
"markers": "python_version >= '3.8'",
"version": "==1.2.2.post1"
},
"byteb4rb1e-utils": {
"editable": true,
"path": "."
},
"certifi": {
"hashes": [
"sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6",
"sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3"
],
"markers": "python_version >= '3.6'",
"version": "==2025.4.26"
},
"colorama": {
"hashes": [
"sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44",
"sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'",
"version": "==0.4.6"
},
"dill": {
"hashes": [
"sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0",
"sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049"
],
"markers": "python_version >= '3.8'",
"version": "==0.4.0"
},
"distlib": {
"hashes": [
"sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87",
"sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"
],
"version": "==0.3.9"
},
"filelock": {
"hashes": [
"sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2",
"sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de"
],
"markers": "python_version >= '3.9'",
"version": "==3.18.0"
},
"isort": {
"hashes": [
"sha256:1cb5df28dfbc742e490c5e41bad6da41b805b0a8be7bc93cd0fb2a8a890ac450",
"sha256:2dc5d7f65c9678d94c88dfc29161a320eec67328bc97aad576874cb4be1e9615"
],
"markers": "python_full_version >= '3.9.0'",
"version": "==6.0.1"
},
"mccabe": {
"hashes": [
"sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325",
"sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"
],
"markers": "python_version >= '3.6'",
"version": "==0.7.0"
},
"mypy": {
"hashes": [
"sha256:1124a18bc11a6a62887e3e137f37f53fbae476dc36c185d549d4f837a2a6a14e",
"sha256:171a9ca9a40cd1843abeca0e405bc1940cd9b305eaeea2dda769ba096932bb22",
"sha256:1905f494bfd7d85a23a88c5d97840888a7bd516545fc5aaedff0267e0bb54e2f",
"sha256:1fbb8da62dc352133d7d7ca90ed2fb0e9d42bb1a32724c287d3c76c58cbaa9c2",
"sha256:2922d42e16d6de288022e5ca321cd0618b238cfc5570e0263e5ba0a77dbef56f",
"sha256:2e2c2e6d3593f6451b18588848e66260ff62ccca522dd231cd4dd59b0160668b",
"sha256:2ee2d57e01a7c35de00f4634ba1bbf015185b219e4dc5909e281016df43f5ee5",
"sha256:2f2147ab812b75e5b5499b01ade1f4a81489a147c01585cda36019102538615f",
"sha256:404534629d51d3efea5c800ee7c42b72a6554d6c400e6a79eafe15d11341fd43",
"sha256:5469affef548bd1895d86d3bf10ce2b44e33d86923c29e4d675b3e323437ea3e",
"sha256:5a95fb17c13e29d2d5195869262f8125dfdb5c134dc8d9a9d0aecf7525b10c2c",
"sha256:6983aae8b2f653e098edb77f893f7b6aca69f6cffb19b2cc7443f23cce5f4828",
"sha256:712e962a6357634fef20412699a3655c610110e01cdaa6180acec7fc9f8513ba",
"sha256:8023ff13985661b50a5928fc7a5ca15f3d1affb41e5f0a9952cb68ef090b31ee",
"sha256:811aeccadfb730024c5d3e326b2fbe9249bb7413553f15499a4050f7c30e801d",
"sha256:8f8722560a14cde92fdb1e31597760dc35f9f5524cce17836c0d22841830fd5b",
"sha256:93faf3fdb04768d44bf28693293f3904bbb555d076b781ad2530214ee53e3445",
"sha256:973500e0774b85d9689715feeffcc980193086551110fd678ebe1f4342fb7c5e",
"sha256:979e4e1a006511dacf628e36fadfecbcc0160a8af6ca7dad2f5025529e082c13",
"sha256:98b7b9b9aedb65fe628c62a6dc57f6d5088ef2dfca37903a7d9ee374d03acca5",
"sha256:aea39e0583d05124836ea645f412e88a5c7d0fd77a6d694b60d9b6b2d9f184fd",
"sha256:b9378e2c00146c44793c98b8d5a61039a048e31f429fb0eb546d93f4b000bedf",
"sha256:baefc32840a9f00babd83251560e0ae1573e2f9d1b067719479bfb0e987c6357",
"sha256:be68172e9fd9ad8fb876c6389f16d1c1b5f100ffa779f77b1fb2176fcc9ab95b",
"sha256:c43a7682e24b4f576d93072216bf56eeff70d9140241f9edec0c104d0c515036",
"sha256:c4bb0e1bd29f7d34efcccd71cf733580191e9a264a2202b0239da95984c5b559",
"sha256:c7be1e46525adfa0d97681432ee9fcd61a3964c2446795714699a998d193f1a3",
"sha256:c9817fa23833ff189db061e6d2eff49b2f3b6ed9856b4a0a73046e41932d744f",
"sha256:ce436f4c6d218a070048ed6a44c0bbb10cd2cc5e272b29e7845f6a2f57ee4464",
"sha256:d10d994b41fb3497719bbf866f227b3489048ea4bbbb5015357db306249f7980",
"sha256:e601a7fa172c2131bff456bb3ee08a88360760d0d2f8cbd7a75a65497e2df078",
"sha256:f95579473af29ab73a10bada2f9722856792a36ec5af5399b653aa28360290a5"
],
"index": "pypi",
"markers": "python_version >= '3.9'",
"version": "==1.15.0"
},
"mypy-extensions": {
"hashes": [
"sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505",
"sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"
],
"markers": "python_version >= '3.8'",
"version": "==1.1.0"
},
"packaging": {
"hashes": [
"sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484",
"sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"
],
"markers": "python_version >= '3.8'",
"version": "==25.0"
},
"pipenv": {
"hashes": [
"sha256:85d42e13da78f27f0213c998dba9a59f3ba6a6fe9e420b75b561acc344f021ad",
"sha256:f26dc0352f3fb167c3897a66a5d8c9ab81dd52a836a48630712e1e5a06840ebf"
],
"index": "pypi",
"markers": "python_version >= '3.9'",
"version": "==2025.0.2"
},
"platformdirs": {
"hashes": [
"sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94",
"sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351"
],
"markers": "python_version >= '3.9'",
"version": "==4.3.7"
},
"pycodestyle": {
"hashes": [
"sha256:35863c5974a271c7a726ed228a14a4f6daf49df369d8c50cd9a6f58a5e143ba9",
"sha256:c8415bf09abe81d9c7f872502a6eee881fbe85d8763dd5b9924bb0a01d67efae"
],
"markers": "python_version >= '3.9'",
"version": "==2.13.0"
},
"pylint": {
"hashes": [
"sha256:8b7c2d3e86ae3f94fb27703d521dd0b9b6b378775991f504d7c3a6275aa0a6a6",
"sha256:b634a041aac33706d56a0d217e6587228c66427e20ec21a019bc4cdee48c040a"
],
"index": "pypi",
"markers": "python_full_version >= '3.9.0'",
"version": "==3.3.6"
},
"pyproject-hooks": {
"hashes": [
"sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8",
"sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913"
],
"markers": "python_version >= '3.7'",
"version": "==1.2.0"
},
"setuptools": {
"hashes": [
"sha256:a65cffc4fb86167e3020b3ef58e08226baad8b29a3b34ce2c9d07e901bac481d",
"sha256:ec8308eb180b2312062b1c5523204acf872cd8b0a9e6c2ae76431b22bc4065d7"
],
"markers": "python_version >= '3.9'",
"version": "==80.3.0"
},
"setuptools-scm": {
"hashes": [
"sha256:136e2b1d393d709d2bcf26f275b8dec06c48b811154167b0fd6bb002aad17d6d",
"sha256:a18396a1bc0219c974d1a74612b11f9dce0d5bd8b1dc55c65f6ac7fd609e8c28"
],
"index": "pypi",
"markers": "python_version >= '3.8'",
"version": "==8.2.0"
},
"tomlkit": {
"hashes": [
"sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde",
"sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79"
],
"markers": "python_version >= '3.8'",
"version": "==0.13.2"
},
"typing-extensions": {
"hashes": [
"sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c",
"sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"
],
"markers": "python_version >= '3.8'",
"version": "==4.13.2"
},
"virtualenv": {
"hashes": [
"sha256:800863162bcaa5450a6e4d721049730e7f2dae07720e0902b0e4040bd6f9ada8",
"sha256:e34302959180fca3af42d1800df014b35019490b119eba981af27f2fa486e5d6"
],
"markers": "python_version >= '3.8'",
"version": "==20.30.0"
}
}
}

49
TODO Normal file
View file

@ -0,0 +1,49 @@
# TODO List for esm-logging
This is a poor-man's issue tracker. I am not primarily a GitHub user so don't
want to commit to their issue tracking feature, but my primary SVC service
provider (Bitbucket) only offers paid integration into their issue tracker
(Jira). I don't have the time (and patience) at the moment to analyze the best
approach, so this file will have to suffice.
It's a very simple concept: Track any issues (features, bugfixes, hotfixes) in
here, assign a sequential number to it and use that number when branching.
I will try to develop a format so that I can parse the file later on, should I
decide to migrate to a real issue tracker. It's probably going to be Bugzilla,
but for that my html-theme-ref project needs to stabilize first.
## Format Specification
The file uses Markdown conventions for formatting headers and other text block
entitities, but SHOULD NOT be considered a Markdown file. That's why it has no
definitive file extension.
Each issue entry follows a structured format for easier parsing and future
migration. Issues MUST be **appended** to this file and never moved, to
preserve Git diffing.
### Issue Format
```
ID: [ISSUE-NUMBER]
Type: [feature/bugfix/hotfix]
Title: [Short title]
Status: [open/in-progress/done]
Priority: [low/medium/high]
Created: [YYYY-MM-DD]
Description: [Detailed explanation]
---
```
- ISSUE-NUMBERs must be sequential
- truncation of description must be indentended so that every line starts at the
same column
- issues must be started with two LF
- issues must be terminated with two LF, then `---`
- issues may have a free-text field (epilog), which must be started with two LF.
## Issues

2663
configure vendored Normal file

File diff suppressed because it is too large Load diff

27
configure.ac Normal file
View file

@ -0,0 +1,27 @@
AC_INIT
AC_CHECK_PROGS([MAKE], [make], [no])
AS_IF([test "$MAKE" == "no"],
[AC_MSG_NOTICE([without GNU Make, you have to inspect 'Makefile' and deduce build targets yourself.])])
AC_CHECK_PROGS([GIT], [git], [no])
AS_IF([test "$GIT" == "no"],
[AC_MSG_ERROR([install Git, before continuing.])])
AC_CHECK_PROGS([PYTHON3], [python3], [no])
AS_IF([test "$PYTHON3" == "no"],
[AC_MSG_ERROR([install Python 3, before continuing.])])
# required in Makefile to ensure proper path resolution during preprocessing
# realpath is not available on macOS
AC_CHECK_PROGS([REALPATH], [realpath], [no])
AS_IF([test "$REALPATH" == "no"],
[AC_MSG_ERROR([set a persistent alias for 'realpath', before continuing, e.g.
alias='python3 -c "import pathlib,sys;print(pathlib.Path(sys.argv[[1]]).resolve())"'"
])])
AC_MSG_NOTICE([initializing python3 venv...])
make .venv
AC_OUTPUT

54
pyproject.toml Normal file
View file

@ -0,0 +1,54 @@
[build-system]
requires = [
"setuptools",
"wheel",
"setuptools-scm[toml]"
]
build-backend = "setuptools.build_meta"
[project]
name = "byteb4rb1e-utils"
description = "personal utilities and helpers"
authors = [
{ name = "Tiara Rodney", email = "tiara.rodney@administratrix.de" }
]
license = { file = "LICENSE" }
readme = "README.md"
classifiers = [
"Development Status :: 1 - Planning",
"Environment :: Web Environment",
"Framework :: Sphinx",
"Framework :: Sphinx :: Theme",
"Intended Audience :: Developers",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Documentation",
"Topic :: Software Development :: Documentation",
]
dependencies = []
dynamic = ["version"]
requires-python = ">=3.8"
[project.urls]
Bitbucket = "https://bitbucket.org/byteb4rb1e/py-utils"
[tool.setuptools.packages.find]
where = ["src"]
namespaces = true
[tool.mypy]
strict = true
[tool.autopep8]
max_line_length = 80
aggressive = 3
recursive = true
in-place = true
[tool.setuptools_scm]

25
requirements-dev.txt Normal file
View file

@ -0,0 +1,25 @@
-i https://pypi.org/simple
astroid==3.3.9; python_full_version >= '3.9.0'
autopep8==2.3.2; python_version >= '3.9'
build==1.2.2.post1; python_version >= '3.8'
-e .
certifi==2025.4.26; python_version >= '3.6'
colorama==0.4.6; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
dill==0.4.0; python_version >= '3.8'
distlib==0.3.9
filelock==3.18.0; python_version >= '3.9'
isort==6.0.1; python_full_version >= '3.9.0'
mccabe==0.7.0; python_version >= '3.6'
mypy==1.15.0; python_version >= '3.9'
mypy-extensions==1.1.0; python_version >= '3.8'
packaging==25.0; python_version >= '3.8'
pipenv==2025.0.2; python_version >= '3.9'
platformdirs==4.3.7; python_version >= '3.9'
pycodestyle==2.13.0; python_version >= '3.9'
pylint==3.3.6; python_full_version >= '3.9.0'
pyproject-hooks==1.2.0; python_version >= '3.7'
setuptools==80.3.0; python_version >= '3.9'
setuptools-scm==8.2.0; python_version >= '3.8'
tomlkit==0.13.2; python_version >= '3.8'
typing-extensions==4.13.2; python_version >= '3.8'
virtualenv==20.30.0; python_version >= '3.8'

View file

View file

@ -0,0 +1,129 @@
from dataclasses import dataclass
from http.server import SimpleHTTPRequestHandler
from byteb4rb1e_utils.io import ChunksIO
@dataclass
class HandlerOptions:
"""configuration options of the HTTP POST method handler
"""
max_chunk_size: int = ChunksIO.max_chunk_size
# default (in memory) buffer size in bytes (from KiB) of the sliding buffer
# reading from the pure (unchunked) client read stream
buffer_size: int = 512 * 1024
@dataclass
class ServerOptions:
"""configuration options of the HTTP server
"""
handler: HandlerOptions
hostname: str = ''
port: int = 8000
class MultipartUploadHandler(SimpleHTTPRequestHandler):
"""Simple, yet compliant HTTP/1.0 MIME Multipart Upload Handler
Implementation of a RFC1341 & RFC7578 compliant server for handling
multipart uploads.
This is meant as a utility for debugging MIME Multipart upload clients
Support for:
- client 'Expect' header
- chunked transfer-encoding
"""
media_subtypes = [
'mixed',
'alternative',
'parallel',
'digest',
'form-data'
]
def do_POST(self):
h_content_type = self.headers.get('Content-Type')
h_expect = self.headers.get('Expect')
h_transfer_encoding = self.headers.get('Transfer-Encoding')
if h_content_type == None:
self.send_error(400, 'Missing \'Content-Type\' header')
content_type_segments = [s.strip() for s in h_content_type.split(';')]
try:
media_type, media_subtype = content_type_segments[0].split('/', 1)
except IndexError:
self.send_error(
400,
'no value was supplied for \'Content-Type\' header'
)
except ValueError:
self.send_error(
400,
'unable to parse media type and subtype from ' +
'first (semicolon-delimited) segment of \'Content-Type\' ' +
f'header value: {content_type_segments[0]}'
)
if media_type != 'multipart':
self.send_error(
400,
'unsupported media type in \'Content-Type\' header value: ' +
f'{media_type}'
)
elif media_subtype not in self.media_subtypes:
self.send_error(
400,
'unsupported media sub-type in \'Content-Type\' header value: ' +
f'{media_type}. Must be one of {", ".join(self.media_subtypes)}'
)
if h_transfer_encoding:
if h_transfer_encoding != 'chunked':
self.send_error(
501,
f'unable to handle transfer-encoding: {h_transfer_encoding}'
)
content_type_params = {v[0].strip():v[1].strip() for v in [
s.split('=', 1) for s in content_type_segments[1:]
]}
boundary = content_type_params.get('boundary', '')
boundary_len = len(boundary)
if boundary == '':
self.send_error(
400,
'missing \'boundary\' parameter in \'Content-Type\' header field'
)
elif boundary_len > 70:
self.send_error(
400,
'\'boundary\' parameter value in \'Content-Type\' too long. ' +
f'Is {boundary_len} characters long, must be less than 70.'
)
del content_type_params['boundary']
content_type_params_keys = content_type_params.keys()
if len(content_type_params_keys) > 0:
self.send_error(
400,
'None other than \'boundary\' parameter in \'Content-Type\'' +
'header expected. Also received ' +
'{\', \'.join(content_type_param_keys)}'
)
self.handle_expect_100()
# read the first 4-bytes of the body to check if it has a preamble
# indication
# well great... curl is not RFC 1341 compliant. And RFC 1341 is asking
# for tolerance towards non-compliant clients...
self.send_response(200, 'OK')
self.end_headers()

View file

@ -0,0 +1,88 @@
from argparse import (
ArgumentParser,
ArgumentDefaultsHelpFormatter,
RawDescriptionHelpFormatter
)
from dataclasses import dataclass
from http.server import HTTPServer
from io import BytesIO, IOBase
from typing import Optional, Tuple, List
from byteb4rb1e_utils.http.server import (
HandlerOptions,
MultipartUploadHandler,
ServerOptions,
)
from byteb4rb1e_utils.io import ChunksIO
__doc__ = """tsmuds - Tiara's Simple Multipart Upload Debugging Server
This is a simple standalone implementation of a HTTP/1.x multipart upload server
using the Python 3.9+ standard library - with an interface catered explicitly
towards debugging misbehaved clients.
Examples:
python3 tsmuds.py --port 8000
"""
__author__ = "Tiara Rodney <tiara.rodney@administratrix.de>"
class CustomArgparseFormatter(
ArgumentDefaultsHelpFormatter,
RawDescriptionHelpFormatter
):
"""custom formatter that shows defaults for arguments as well as keeps
my docstring unformatted.
"""
argparser = ArgumentParser(
prog = 'byteb4rb1e.http.server',
formatter_class = CustomArgparseFormatter,
description = __doc__,
epilog = f"""(c) 2025, {__author__}
This software is licensed under the Creative Commons Attribution 4.0
International License (CC BY 4.0). For more details, visit:
https://creativecommons.org/licenses/by/4.0/
"""
)
argparser.add_argument(
'--port',
type=int,
default=ServerOptions.port,
help="bind to this port"
)
argparser.add_argument(
'-b',
'--bind',
type=str,
default=ServerOptions.hostname,
help="bind to this address"
)
argparser.add_argument(
'--max-chunk-size',
type=int,
metavar='INT',
default=(ChunksIO.max_chunk_size / (1024 ** 2)),
help="""maximum allowed size of chunk (in MiB) when RFC 9112 chunk
transfer encoding is requested by client"""
)
args = argparser.parse_args()
server_options = ServerOptions(
port = args.port,
handler = HandlerOptions(
max_chunk_size = args.max_chunk_size
),
)
with HTTPServer(server_options, MultipartUploadHandler) as httpd:
print("serving at port", server_options.port)
httpd.serve_forever()

View file

@ -0,0 +1,233 @@
from io import BytesIO, IOBase
import math
from typing import Optional, Tuple, List
class ChunksIO(IOBase):
"""handler for HTTP/1.1 chunked transfer-encoded (RFC 9112 §7) byte streams
Compact and predictable implementation of a RFC 9112 compliant stream
handler, which exposes a common IOBase interface for treating chunked byte
streams as pure, unencoded byte streams.
.. notice::
The implementation is currently only concerned with read operations,
though the layout is prepared for an easy straightforward implementation
of write operations.
"""
#: maximum allowed size of a chunk
# MiB by default, just guessing 10 MiB is a sensible limit
max_chunk_size = int(10 * (1024 ** 2))
#: optional write-through buffer
_buffer: BytesIO
#: chunks sizes
_chunks_size: List[Tuple[int, int]]
#: index of current chunk
_current_chunk: int
#: cursor position on the underlying stream, as the stream is not expected
# to implement ``tell()``. Limiting factor of how large the stream may be.
# Look at ``sys.maxsize`` for more information.
_cursor: int
#: chunk encoded stream
_stream: BytesIO
def __init__(
self,
stream: BytesIO,
buffer: Optional[BytesIO] = None,
):
"""initialize the instance
.. notice::
The write-through buffer is required to be seekable, writable and
readable and MUST be considered locked during any operation of the
ChunksIO implementation. The buffer's cursor position does not
reflect the cursor position of the underlying stream.
:param stream: a byte-stream to abstract
:param buffer: write-through buffer for all read operations on the
underlying stream. This can be useful, if the data needs
to be accessed again later on.
:param max_chunk_size: the maximum size of a single chunk (excluding
it's bytes size segment)
"""
if stream.readable() == False:
raise Exception('expected readable stream')
if buffer != None:
if buffer.writable() == False:
raise Exception('expected writable buffer')
self._buffer = buffer
self._chunks_size = []
self._current_chunk = 0
self._cursor = 0
self._stream = stream
super().__init__()
@staticmethod
def get_chunk_size(
stream: BytesIO,
max_size: int,
) -> Tuple[int, int]:
"""get the size of the next chunk from a RFC 9112 (§7) chunk encoded
byte stream
stream cursor position is assumed to be at the start of the preceeding
byte size segment of chunk data. The max_size parameter is converted to
its bytes representation, to determine early on if a read is feasible
and won't cause a denial-of-service.
:param stream: the stream to read the chunk size from
:param max_size: the maximum allowed size a chunk can be. I wasn't able
to find a definitive limit defined in the RFC so this
is guess working and at least curl has a pretty big
chunk size of more than 6 MiB.
:returns: tuple of the size of the bytes size segment and the data
bytes size, whose sum is the total size of the chunk
"""
_terminator = b'\r\n'
chunk_size = b''
terminator = b''
# calculate the number of bytes the max_size byte representation
# requires. This is a precaution so that chunks can't be arbitrarily
# long.
max_size_bytes = math.ceil(max_size.bit_length() / 8)
# the iteration could be handled with less system calls by reading a
# larger *chunk* of data and iterating over that in-memory cache.
# Though, this would come at the expense of unpredictable memory
# consumption and would require a write-through buffer by default, in
# addition to making the implementation more complex.
for _ in range(max_size_bytes + len(_terminator)):
buf = stream.read(1)
if buf in _terminator: terminator += buf
else: chunk_size += buf
if terminator == _terminator:
if (not chunk_size):
raise ValueError(
'terminator reached without having parsed ' +
'any byte size'
)
return (
len(chunk_size + terminator),
int.from_bytes(chunk_size, byteorder='big')
)
raise ValueError(
'unable to reach terminator with a max chunk size of ' +
f'{max_size / (1024 ** 2)} MiB'
)
def read(self, size = -1) -> bytes:
"""read an arbitrary amount of data from the underlying stream.
"""
buffer = b''
# if no chunk has been read yet
if len(self._chunks_size) == 0:
# determine the size of the initial chunk
try:
ichunk_size = ChunksIO.get_chunk_size(
self._stream,
self.max_chunk_size
)
except ValueError as e:
raise ValueError(
f'chunk #{self._current_chunk}: {e}'
) from e
self._chunks_size.append(ichunk_size)
self._cursor += self._chunks_size[self._current_chunk][0]
# end position of current chunk
cc_end = sum(
[sum(c) for c in self._chunks_size[:self._current_chunk + 1]]
)
# if the requested read end position exceeds the end position of the
# current chunk and it's not the end chunk
if self._cursor + size > cc_end and \
self._chunks_size[self._current_chunk][1] != 0:
# size of remaining bytes to read from current chunk
cc_remaining = cc_end - self._cursor
buffer += self._stream.read(cc_remaining)
if len(buffer) != cc_remaining:
raise ValueError(
f'chunk #{self._current_chunk}: stream yielded too few bytes'
)
if self._buffer: self._buffer.write(buffer)
# determine the size of the next chunk
try:
chunk_size = ChunksIO.get_chunk_size(
self._stream,
self.max_chunk_size
)
except ValueError as e:
raise ValueError(
f'chunk #{self._current_chunk + 1}: {e}'
) from e
self._chunks_size.append(chunk_size)
self._current_chunk += 1
self._cursor += self._chunks_size[self._current_chunk][0]
size = size - cc_remaining
buffer += self._stream.read(size)
if self._buffer: self._buffer.write(buffer)
self._cursor += len(buffer)
return buffer
def readable() -> bool:
"""
"""
return True
def readChunk() -> bytes:
"""read until the end of a chunk
if buffered and cursor is not at the start position of a chunk, position
will be seeked backwards, prior to reading. If unbuffered and not at the
start position of a chunk, exception will be raised.
"""
if self._cursor != self._offset:
raise Exception(
'cursor not at starting position of a chunk. Mixing ' +
'read() and readChunk() calls is currently not supported.'
)
buffer = self.read(self._chunks_size[self._current_chunk])
if self._buffer: self._buffer.write(buffer)
self._cursor += len(buffer)
return buffer
def readChunks() -> bytes:
"""yield all chunks until the terminating 0 byte chunk is reached
"""
def tell() -> int:
"""return the current stream position
"""
return this._cursor

0
tests/__init__.py Normal file
View file

View file

View file

View file

@ -0,0 +1,186 @@
from io import BytesIO, IOBase
import unittest
from byteb4rb1e_utils.io import ChunksIO
class TestGetChunkSize(unittest.TestCase):
def test_default(self):
sample = int.to_bytes(100) + b'\r\n'
self.assertEqual(
ChunksIO.get_chunk_size(BytesIO(sample), ChunksIO.max_chunk_size),
(3, 100)
)
def test_oversized(self):
"""any 4-digit integer, exceeds the byte size definition of 512
"""
chunk_size = 512
sample = b''.join([int.to_bytes(1) for _ in range(4)]) + b'\r\n'
with self.assertRaises(ValueError) as result:
ChunksIO.get_chunk_size(BytesIO(sample), max_size=chunk_size)
self.assertTrue('unable to reach terminator' in str(result.exception))
def test_missing_terminator(self):
chunk_size = 512
sample = b''.join([int.to_bytes(9) for _ in range(chunk_size)])
with self.assertRaises(ValueError) as result:
ChunksIO.get_chunk_size(BytesIO(sample), max_size=chunk_size)
self.assertTrue('unable to reach terminator' in str(result.exception))
def test_missing_byte_size(self):
chunk_size = 512
sample = b'\r\n'
with self.assertRaises(ValueError) as result:
ChunksIO.get_chunk_size(BytesIO(sample), max_size=chunk_size)
self.assertTrue(
'without having parsed any byte size' in str(result.exception)
)
class TestRead(unittest.TestCase):
def test_default(self):
"""
"""
chunk_data = 'Foobar'.encode('utf-8')
chunk = int.to_bytes(len(chunk_data), byteorder="big") + \
b'\r\n' + \
chunk_data
self.assertEqual(ChunksIO(BytesIO(chunk)).read(), b'Foobar')
def test_perfect_multiple(self):
"""read operations match sizes of chunks
chunk 1 chunk 2
|----------|---------|
^
cursor is here
>------- --|
^
first requested read ends here
>---------|
^
second requested read ends here
"""
chunk1_data = 'Foobar'.encode('utf-8')
chunk1 = int.to_bytes(len(chunk1_data), byteorder="big") + \
b'\r\n' + \
chunk1_data
chunk2_data = 'RTFM'.encode('utf-8')
chunk2 = int.to_bytes(len(chunk2_data), byteorder="big") + \
b'\r\n' + \
chunk2_data
handler = ChunksIO(BytesIO(chunk1 + chunk2))
self.assertEqual(handler.read(6), b'Foobar')
self.assertEqual(handler.read(4), b'RTFM')
def test_imperfect_multiple_first(self):
"""first read operation does not match the size of the current chunk
chunk 1 chunk 2
|----------|---------|
^
cursor is here
>--------------|
^
first requested read ends here
>-----|
^
second requested read ends here
"""
chunk1_data = 'Foo'.encode('utf-8')
chunk1 = int.to_bytes(len(chunk1_data), byteorder="big") + \
b'\r\n' + \
chunk1_data
chunk2_data = 'barRTFM'.encode('utf-8')
chunk2 = int.to_bytes(len(chunk2_data), byteorder="big") + \
b'\r\n' + \
chunk2_data
handler = ChunksIO(BytesIO(chunk1 + chunk2))
self.assertEqual(handler.read(6), b'Foobar')
self.assertEqual(handler.read(4), b'RTFM')
def test_imperfect_multiple_second(self):
"""first read operation does not match the size of the current chunk
chunk 1 chunk 2
|----------|---------|
^
cursor is here
>------|
^
first requested read ends here
>-------------|
^
second requested read ends here
"""
chunk1_data = 'FoobarRT'.encode('utf-8')
chunk1 = int.to_bytes(len(chunk1_data), byteorder="big") + \
b'\r\n' + \
chunk1_data
chunk2_data = 'FM'.encode('utf-8')
chunk2 = int.to_bytes(len(chunk2_data), byteorder="big") + \
b'\r\n' + \
chunk2_data
handler = ChunksIO(BytesIO(chunk1 + chunk2))
self.assertEqual(handler.read(6), b'Foobar')
self.assertEqual(handler.read(4), b'RTFM')
def test_properly_terminated(self):
"""a proper termination chunk is emitted, resulting in no further
attempts to retrieved chunks, exposing the behavior of the underlying
stream
"""
chunk1_data = 'Foobar'.encode('utf-8')
chunk1 = int.to_bytes(len(chunk1_data), byteorder="big") + \
b'\r\n' + \
chunk1_data
chunk2_data = ''.encode('utf-8')
chunk2 = int.to_bytes(len(chunk2_data), byteorder="big") + \
b'\r\n' + \
chunk2_data
handler = ChunksIO(BytesIO(chunk1 + chunk2))
self.assertEqual(handler.read(6), b'Foobar')
self.assertEqual(handler.read(4), b'')
self.assertEqual(handler.read(4), b'')
def test_not_properly_terminated(self):
"""a proper termination chunk is emitted, resulting in no further
attempts to retrieved chunks, exposing the behavior of the underlying
stream
"""
chunk_data = 'Foobar'.encode('utf-8')
chunk = int.to_bytes(len(chunk_data), byteorder="big") + \
b'\r\n' + \
chunk_data
handler = ChunksIO(BytesIO(chunk))
self.assertEqual(handler.read(6), b'Foobar')
with self.assertRaises(ValueError) as context:
handler.read(4)
self.assertTrue('unable to reach terminator' in str(context.exception))