Merge branch 'feature/20' into develop

This commit is contained in:
Tiara Rodney 2026-06-06 14:55:06 +02:00
commit b707325c69
3 changed files with 316 additions and 7 deletions

2
TODO
View file

@ -267,7 +267,7 @@ Content-Type: application/issue
ID: 20
Type: feature
Title: cookie-persisting HTTP session client
Status: in-progress
Status: done
Priority: medium
Created: 2026-06-06
Relationships:

View file

@ -5,6 +5,8 @@ Thin urllib wrapper with retry-on-rate-limit. No domain knowledge —
GitHub, Bitbucket, etc. are handled by higher-level modules.
"""
from dataclasses import dataclass
import http.cookiejar
import json
import time
from typing import Any, Dict, Optional
@ -13,17 +15,107 @@ import urllib.parse
from warnings import warn
@dataclass(frozen=True)
class HttpResponse:
def __init__(self, status: int, headers: dict, data: bytes, reason: str):
self.status_code = status
self.headers = headers
self.data = data
self.reason = reason
self.text = data.decode("utf-8", errors="replace")
status_code: int
headers: dict[str, str]
data: bytes
reason: Optional[str] = None
def json(self):
return json.loads(self.data.decode("utf-8"))
@property
def text(self) -> str:
return self.data.decode("utf-8", errors="replace")
class HttpSession:
"""HTTP client that persists cookies across requests.
Suitable for sites that require login followed by
cookie-authenticated page fetches.
"""
def __init__(
self,
default_headers: dict[str, str] | None = None,
timeout: int = 30,
) -> None:
self._timeout = timeout
self._default_headers = default_headers or {}
self._jar = http.cookiejar.CookieJar()
self._opener = urllib.request.build_opener(
urllib.request.HTTPCookieProcessor(self._jar),
)
def get(
self,
url: str,
params: dict[str, str] | None = None,
headers: dict[str, str] | None = None,
) -> HttpResponse:
if params:
query = urllib.parse.urlencode(params)
url = f"{url}?{query}"
req = urllib.request.Request(
url,
headers=self._merged_headers(headers),
method="GET",
)
return self._send(req)
def post(
self,
url: str,
data: dict[str, str] | None = None,
headers: dict[str, str] | None = None,
) -> HttpResponse:
body = (
urllib.parse.urlencode(data).encode()
if data else None
)
merged = self._merged_headers(headers)
if data and "Content-Type" not in merged:
merged["Content-Type"] = (
"application/x-www-form-urlencoded"
)
req = urllib.request.Request(
url,
data=body,
headers=merged,
method="POST",
)
return self._send(req)
def _send(self, req: urllib.request.Request) -> HttpResponse:
try:
with self._opener.open(
req, timeout=self._timeout
) as resp:
return HttpResponse(
status_code=resp.getcode(),
headers=dict(resp.getheaders()),
data=resp.read(),
)
except urllib.error.HTTPError as e:
return HttpResponse(
status_code=e.code,
headers=dict(e.headers.items()),
data=e.read(),
)
def _merged_headers(
self, extra: dict[str, str] | None
) -> dict[str, str]:
merged = dict(self._default_headers)
if extra:
merged.update(extra)
return merged
def _request(
url: str,

View file

@ -0,0 +1,217 @@
"""Tests for the generic HTTP client."""
import email.message
import io
import urllib.error
import urllib.parse
import urllib.request
from types import TracebackType
from typing import Dict, List, Optional, Tuple, Type, Union
import pytest
from byteb4rb1e.utils.http.client import HttpResponse, HttpSession
class _FakeRawResponse:
"""Stands in for the object returned by OpenerDirector.open()."""
def __init__(
self,
status: int = 200,
headers: Optional[Dict[str, str]] = None,
data: bytes = b"",
) -> None:
self._status = status
self._headers = headers or {}
self._data = data
def getcode(self) -> int:
return self._status
def getheaders(self) -> List[Tuple[str, str]]:
return list(self._headers.items())
def read(self) -> bytes:
return self._data
def __enter__(self) -> "_FakeRawResponse":
return self
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc: Optional[BaseException],
tb: Optional[TracebackType],
) -> None:
return None
class _FakeOpener:
"""Records requests and replays canned responses."""
def __init__(
self,
responses: Optional[
List[Union[_FakeRawResponse, Exception]]
] = None,
) -> None:
self.requests: List[urllib.request.Request] = []
self._responses = list(responses or [_FakeRawResponse()])
def open(
self,
req: urllib.request.Request,
timeout: Optional[int] = None,
) -> _FakeRawResponse:
self.requests.append(req)
response = self._responses.pop(0)
if isinstance(response, Exception):
raise response
return response
def _http_error(
code: int = 404,
data: bytes = b"",
headers: Optional[Dict[str, str]] = None,
) -> urllib.error.HTTPError:
hdrs = email.message.Message()
for key, value in (headers or {}).items():
hdrs[key] = value
return urllib.error.HTTPError(
"http://testserver/", code, "error", hdrs, io.BytesIO(data),
)
class TestHttpResponse:
def test_json(self) -> None:
resp = HttpResponse(200, {}, b'{"a": 1}')
assert resp.json() == {"a": 1}
def test_text(self) -> None:
resp = HttpResponse(200, {}, b"hello")
assert resp.text == "hello"
def test_text_replaces_invalid_utf8(self) -> None:
resp = HttpResponse(200, {}, b"\xff\xfe")
assert "<EFBFBD>" in resp.text
def test_reason_defaults_to_none(self) -> None:
resp = HttpResponse(200, {}, b"")
assert resp.reason is None
def test_frozen(self) -> None:
resp = HttpResponse(200, {}, b"")
with pytest.raises(Exception):
resp.status_code = 500
class TestHttpSession:
def test_opener_has_cookie_processor(self) -> None:
session = HttpSession()
processors = [
h for h in session._opener.handlers
if isinstance(h, urllib.request.HTTPCookieProcessor)
]
assert len(processors) == 1
assert processors[0].cookiejar is session._jar
def test_get(self) -> None:
opener = _FakeOpener([
_FakeRawResponse(200, {"X-Foo": "bar"}, b"body"),
])
session = HttpSession()
session._opener = opener
resp = session.get("http://testserver/page")
assert resp.status_code == 200
assert resp.data == b"body"
assert resp.headers == {"X-Foo": "bar"}
assert opener.requests[0].get_method() == "GET"
assert opener.requests[0].full_url == "http://testserver/page"
def test_get_with_params(self) -> None:
opener = _FakeOpener()
session = HttpSession()
session._opener = opener
session.get("http://testserver/page", params={"a": "1", "b": "x y"})
assert opener.requests[0].full_url == (
"http://testserver/page?a=1&b=x+y"
)
def test_default_headers_sent(self) -> None:
opener = _FakeOpener()
session = HttpSession(default_headers={"User-Agent": "test"})
session._opener = opener
session.get("http://testserver/")
assert opener.requests[0].get_header("User-agent") == "test"
def test_request_headers_override_defaults(self) -> None:
opener = _FakeOpener()
session = HttpSession(default_headers={"X-Token": "default"})
session._opener = opener
session.get("http://testserver/", headers={"X-Token": "override"})
assert opener.requests[0].get_header("X-token") == "override"
def test_post_form_encodes_data(self) -> None:
opener = _FakeOpener()
session = HttpSession()
session._opener = opener
session.post("http://testserver/login", data={"user": "u", "pass": "p"})
req = opener.requests[0]
assert req.get_method() == "POST"
assert isinstance(req.data, bytes)
assert dict(urllib.parse.parse_qsl(req.data.decode())) == {
"user": "u",
"pass": "p",
}
assert req.get_header("Content-type") == (
"application/x-www-form-urlencoded"
)
def test_post_keeps_explicit_content_type(self) -> None:
opener = _FakeOpener()
session = HttpSession()
session._opener = opener
session.post(
"http://testserver/",
data={"a": "1"},
headers={"Content-Type": "text/plain"},
)
assert opener.requests[0].get_header("Content-type") == "text/plain"
def test_post_without_data(self) -> None:
opener = _FakeOpener()
session = HttpSession()
session._opener = opener
session.post("http://testserver/")
assert opener.requests[0].data is None
def test_http_error_returned_as_response(self) -> None:
opener = _FakeOpener([
_http_error(404, b"missing", {"X-Err": "yes"}),
])
session = HttpSession()
session._opener = opener
resp = session.get("http://testserver/nope")
assert resp.status_code == 404
assert resp.data == b"missing"
assert resp.headers["X-Err"] == "yes"