Merge branch 'feature/20' into develop
This commit is contained in:
commit
b707325c69
3 changed files with 316 additions and 7 deletions
2
TODO
2
TODO
|
|
@ -267,7 +267,7 @@ Content-Type: application/issue
|
||||||
ID: 20
|
ID: 20
|
||||||
Type: feature
|
Type: feature
|
||||||
Title: cookie-persisting HTTP session client
|
Title: cookie-persisting HTTP session client
|
||||||
Status: in-progress
|
Status: done
|
||||||
Priority: medium
|
Priority: medium
|
||||||
Created: 2026-06-06
|
Created: 2026-06-06
|
||||||
Relationships:
|
Relationships:
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@ Thin urllib wrapper with retry-on-rate-limit. No domain knowledge —
|
||||||
GitHub, Bitbucket, etc. are handled by higher-level modules.
|
GitHub, Bitbucket, etc. are handled by higher-level modules.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import http.cookiejar
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
@ -13,17 +15,107 @@ import urllib.parse
|
||||||
from warnings import warn
|
from warnings import warn
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
class HttpResponse:
|
class HttpResponse:
|
||||||
def __init__(self, status: int, headers: dict, data: bytes, reason: str):
|
status_code: int
|
||||||
self.status_code = status
|
headers: dict[str, str]
|
||||||
self.headers = headers
|
data: bytes
|
||||||
self.data = data
|
reason: Optional[str] = None
|
||||||
self.reason = reason
|
|
||||||
self.text = data.decode("utf-8", errors="replace")
|
|
||||||
|
|
||||||
def json(self):
|
def json(self):
|
||||||
return json.loads(self.data.decode("utf-8"))
|
return json.loads(self.data.decode("utf-8"))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def text(self) -> str:
|
||||||
|
return self.data.decode("utf-8", errors="replace")
|
||||||
|
|
||||||
|
|
||||||
|
class HttpSession:
|
||||||
|
"""HTTP client that persists cookies across requests.
|
||||||
|
|
||||||
|
Suitable for sites that require login followed by
|
||||||
|
cookie-authenticated page fetches.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
default_headers: dict[str, str] | None = None,
|
||||||
|
timeout: int = 30,
|
||||||
|
) -> None:
|
||||||
|
self._timeout = timeout
|
||||||
|
self._default_headers = default_headers or {}
|
||||||
|
self._jar = http.cookiejar.CookieJar()
|
||||||
|
self._opener = urllib.request.build_opener(
|
||||||
|
urllib.request.HTTPCookieProcessor(self._jar),
|
||||||
|
)
|
||||||
|
|
||||||
|
def get(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
params: dict[str, str] | None = None,
|
||||||
|
headers: dict[str, str] | None = None,
|
||||||
|
) -> HttpResponse:
|
||||||
|
if params:
|
||||||
|
query = urllib.parse.urlencode(params)
|
||||||
|
url = f"{url}?{query}"
|
||||||
|
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
headers=self._merged_headers(headers),
|
||||||
|
method="GET",
|
||||||
|
)
|
||||||
|
return self._send(req)
|
||||||
|
|
||||||
|
def post(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
data: dict[str, str] | None = None,
|
||||||
|
headers: dict[str, str] | None = None,
|
||||||
|
) -> HttpResponse:
|
||||||
|
body = (
|
||||||
|
urllib.parse.urlencode(data).encode()
|
||||||
|
if data else None
|
||||||
|
)
|
||||||
|
|
||||||
|
merged = self._merged_headers(headers)
|
||||||
|
if data and "Content-Type" not in merged:
|
||||||
|
merged["Content-Type"] = (
|
||||||
|
"application/x-www-form-urlencoded"
|
||||||
|
)
|
||||||
|
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
data=body,
|
||||||
|
headers=merged,
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
return self._send(req)
|
||||||
|
|
||||||
|
def _send(self, req: urllib.request.Request) -> HttpResponse:
|
||||||
|
try:
|
||||||
|
with self._opener.open(
|
||||||
|
req, timeout=self._timeout
|
||||||
|
) as resp:
|
||||||
|
return HttpResponse(
|
||||||
|
status_code=resp.getcode(),
|
||||||
|
headers=dict(resp.getheaders()),
|
||||||
|
data=resp.read(),
|
||||||
|
)
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
return HttpResponse(
|
||||||
|
status_code=e.code,
|
||||||
|
headers=dict(e.headers.items()),
|
||||||
|
data=e.read(),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _merged_headers(
|
||||||
|
self, extra: dict[str, str] | None
|
||||||
|
) -> dict[str, str]:
|
||||||
|
merged = dict(self._default_headers)
|
||||||
|
if extra:
|
||||||
|
merged.update(extra)
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
def _request(
|
def _request(
|
||||||
url: str,
|
url: str,
|
||||||
|
|
|
||||||
217
tests/unit/byteb4rb1e/utils/http/test_client.py
Normal file
217
tests/unit/byteb4rb1e/utils/http/test_client.py
Normal file
|
|
@ -0,0 +1,217 @@
|
||||||
|
"""Tests for the generic HTTP client."""
|
||||||
|
|
||||||
|
import email.message
|
||||||
|
import io
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
from types import TracebackType
|
||||||
|
from typing import Dict, List, Optional, Tuple, Type, Union
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from byteb4rb1e.utils.http.client import HttpResponse, HttpSession
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeRawResponse:
|
||||||
|
"""Stands in for the object returned by OpenerDirector.open()."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
status: int = 200,
|
||||||
|
headers: Optional[Dict[str, str]] = None,
|
||||||
|
data: bytes = b"",
|
||||||
|
) -> None:
|
||||||
|
self._status = status
|
||||||
|
self._headers = headers or {}
|
||||||
|
self._data = data
|
||||||
|
|
||||||
|
def getcode(self) -> int:
|
||||||
|
return self._status
|
||||||
|
|
||||||
|
def getheaders(self) -> List[Tuple[str, str]]:
|
||||||
|
return list(self._headers.items())
|
||||||
|
|
||||||
|
def read(self) -> bytes:
|
||||||
|
return self._data
|
||||||
|
|
||||||
|
def __enter__(self) -> "_FakeRawResponse":
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(
|
||||||
|
self,
|
||||||
|
exc_type: Optional[Type[BaseException]],
|
||||||
|
exc: Optional[BaseException],
|
||||||
|
tb: Optional[TracebackType],
|
||||||
|
) -> None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeOpener:
|
||||||
|
"""Records requests and replays canned responses."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
responses: Optional[
|
||||||
|
List[Union[_FakeRawResponse, Exception]]
|
||||||
|
] = None,
|
||||||
|
) -> None:
|
||||||
|
self.requests: List[urllib.request.Request] = []
|
||||||
|
self._responses = list(responses or [_FakeRawResponse()])
|
||||||
|
|
||||||
|
def open(
|
||||||
|
self,
|
||||||
|
req: urllib.request.Request,
|
||||||
|
timeout: Optional[int] = None,
|
||||||
|
) -> _FakeRawResponse:
|
||||||
|
self.requests.append(req)
|
||||||
|
response = self._responses.pop(0)
|
||||||
|
if isinstance(response, Exception):
|
||||||
|
raise response
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def _http_error(
|
||||||
|
code: int = 404,
|
||||||
|
data: bytes = b"",
|
||||||
|
headers: Optional[Dict[str, str]] = None,
|
||||||
|
) -> urllib.error.HTTPError:
|
||||||
|
hdrs = email.message.Message()
|
||||||
|
for key, value in (headers or {}).items():
|
||||||
|
hdrs[key] = value
|
||||||
|
return urllib.error.HTTPError(
|
||||||
|
"http://testserver/", code, "error", hdrs, io.BytesIO(data),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestHttpResponse:
|
||||||
|
|
||||||
|
def test_json(self) -> None:
|
||||||
|
resp = HttpResponse(200, {}, b'{"a": 1}')
|
||||||
|
assert resp.json() == {"a": 1}
|
||||||
|
|
||||||
|
def test_text(self) -> None:
|
||||||
|
resp = HttpResponse(200, {}, b"hello")
|
||||||
|
assert resp.text == "hello"
|
||||||
|
|
||||||
|
def test_text_replaces_invalid_utf8(self) -> None:
|
||||||
|
resp = HttpResponse(200, {}, b"\xff\xfe")
|
||||||
|
assert "<EFBFBD>" in resp.text
|
||||||
|
|
||||||
|
def test_reason_defaults_to_none(self) -> None:
|
||||||
|
resp = HttpResponse(200, {}, b"")
|
||||||
|
assert resp.reason is None
|
||||||
|
|
||||||
|
def test_frozen(self) -> None:
|
||||||
|
resp = HttpResponse(200, {}, b"")
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
resp.status_code = 500
|
||||||
|
|
||||||
|
|
||||||
|
class TestHttpSession:
|
||||||
|
|
||||||
|
def test_opener_has_cookie_processor(self) -> None:
|
||||||
|
session = HttpSession()
|
||||||
|
processors = [
|
||||||
|
h for h in session._opener.handlers
|
||||||
|
if isinstance(h, urllib.request.HTTPCookieProcessor)
|
||||||
|
]
|
||||||
|
assert len(processors) == 1
|
||||||
|
assert processors[0].cookiejar is session._jar
|
||||||
|
|
||||||
|
def test_get(self) -> None:
|
||||||
|
opener = _FakeOpener([
|
||||||
|
_FakeRawResponse(200, {"X-Foo": "bar"}, b"body"),
|
||||||
|
])
|
||||||
|
session = HttpSession()
|
||||||
|
session._opener = opener
|
||||||
|
|
||||||
|
resp = session.get("http://testserver/page")
|
||||||
|
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.data == b"body"
|
||||||
|
assert resp.headers == {"X-Foo": "bar"}
|
||||||
|
assert opener.requests[0].get_method() == "GET"
|
||||||
|
assert opener.requests[0].full_url == "http://testserver/page"
|
||||||
|
|
||||||
|
def test_get_with_params(self) -> None:
|
||||||
|
opener = _FakeOpener()
|
||||||
|
session = HttpSession()
|
||||||
|
session._opener = opener
|
||||||
|
|
||||||
|
session.get("http://testserver/page", params={"a": "1", "b": "x y"})
|
||||||
|
|
||||||
|
assert opener.requests[0].full_url == (
|
||||||
|
"http://testserver/page?a=1&b=x+y"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_default_headers_sent(self) -> None:
|
||||||
|
opener = _FakeOpener()
|
||||||
|
session = HttpSession(default_headers={"User-Agent": "test"})
|
||||||
|
session._opener = opener
|
||||||
|
|
||||||
|
session.get("http://testserver/")
|
||||||
|
|
||||||
|
assert opener.requests[0].get_header("User-agent") == "test"
|
||||||
|
|
||||||
|
def test_request_headers_override_defaults(self) -> None:
|
||||||
|
opener = _FakeOpener()
|
||||||
|
session = HttpSession(default_headers={"X-Token": "default"})
|
||||||
|
session._opener = opener
|
||||||
|
|
||||||
|
session.get("http://testserver/", headers={"X-Token": "override"})
|
||||||
|
|
||||||
|
assert opener.requests[0].get_header("X-token") == "override"
|
||||||
|
|
||||||
|
def test_post_form_encodes_data(self) -> None:
|
||||||
|
opener = _FakeOpener()
|
||||||
|
session = HttpSession()
|
||||||
|
session._opener = opener
|
||||||
|
|
||||||
|
session.post("http://testserver/login", data={"user": "u", "pass": "p"})
|
||||||
|
|
||||||
|
req = opener.requests[0]
|
||||||
|
assert req.get_method() == "POST"
|
||||||
|
assert isinstance(req.data, bytes)
|
||||||
|
assert dict(urllib.parse.parse_qsl(req.data.decode())) == {
|
||||||
|
"user": "u",
|
||||||
|
"pass": "p",
|
||||||
|
}
|
||||||
|
assert req.get_header("Content-type") == (
|
||||||
|
"application/x-www-form-urlencoded"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_post_keeps_explicit_content_type(self) -> None:
|
||||||
|
opener = _FakeOpener()
|
||||||
|
session = HttpSession()
|
||||||
|
session._opener = opener
|
||||||
|
|
||||||
|
session.post(
|
||||||
|
"http://testserver/",
|
||||||
|
data={"a": "1"},
|
||||||
|
headers={"Content-Type": "text/plain"},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert opener.requests[0].get_header("Content-type") == "text/plain"
|
||||||
|
|
||||||
|
def test_post_without_data(self) -> None:
|
||||||
|
opener = _FakeOpener()
|
||||||
|
session = HttpSession()
|
||||||
|
session._opener = opener
|
||||||
|
|
||||||
|
session.post("http://testserver/")
|
||||||
|
|
||||||
|
assert opener.requests[0].data is None
|
||||||
|
|
||||||
|
def test_http_error_returned_as_response(self) -> None:
|
||||||
|
opener = _FakeOpener([
|
||||||
|
_http_error(404, b"missing", {"X-Err": "yes"}),
|
||||||
|
])
|
||||||
|
session = HttpSession()
|
||||||
|
session._opener = opener
|
||||||
|
|
||||||
|
resp = session.get("http://testserver/nope")
|
||||||
|
|
||||||
|
assert resp.status_code == 404
|
||||||
|
assert resp.data == b"missing"
|
||||||
|
assert resp.headers["X-Err"] == "yes"
|
||||||
Loading…
Add table
Add a link
Reference in a new issue