Merge branch 'feature/20' into develop
This commit is contained in:
commit
b707325c69
3 changed files with 316 additions and 7 deletions
2
TODO
2
TODO
|
|
@ -267,7 +267,7 @@ Content-Type: application/issue
|
|||
ID: 20
|
||||
Type: feature
|
||||
Title: cookie-persisting HTTP session client
|
||||
Status: in-progress
|
||||
Status: done
|
||||
Priority: medium
|
||||
Created: 2026-06-06
|
||||
Relationships:
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ Thin urllib wrapper with retry-on-rate-limit. No domain knowledge —
|
|||
GitHub, Bitbucket, etc. are handled by higher-level modules.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
import http.cookiejar
|
||||
import json
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
|
|
@ -13,17 +15,107 @@ import urllib.parse
|
|||
from warnings import warn
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class HttpResponse:
|
||||
def __init__(self, status: int, headers: dict, data: bytes, reason: str):
|
||||
self.status_code = status
|
||||
self.headers = headers
|
||||
self.data = data
|
||||
self.reason = reason
|
||||
self.text = data.decode("utf-8", errors="replace")
|
||||
status_code: int
|
||||
headers: dict[str, str]
|
||||
data: bytes
|
||||
reason: Optional[str] = None
|
||||
|
||||
def json(self):
|
||||
return json.loads(self.data.decode("utf-8"))
|
||||
|
||||
@property
|
||||
def text(self) -> str:
|
||||
return self.data.decode("utf-8", errors="replace")
|
||||
|
||||
|
||||
class HttpSession:
|
||||
"""HTTP client that persists cookies across requests.
|
||||
|
||||
Suitable for sites that require login followed by
|
||||
cookie-authenticated page fetches.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
default_headers: dict[str, str] | None = None,
|
||||
timeout: int = 30,
|
||||
) -> None:
|
||||
self._timeout = timeout
|
||||
self._default_headers = default_headers or {}
|
||||
self._jar = http.cookiejar.CookieJar()
|
||||
self._opener = urllib.request.build_opener(
|
||||
urllib.request.HTTPCookieProcessor(self._jar),
|
||||
)
|
||||
|
||||
def get(
|
||||
self,
|
||||
url: str,
|
||||
params: dict[str, str] | None = None,
|
||||
headers: dict[str, str] | None = None,
|
||||
) -> HttpResponse:
|
||||
if params:
|
||||
query = urllib.parse.urlencode(params)
|
||||
url = f"{url}?{query}"
|
||||
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers=self._merged_headers(headers),
|
||||
method="GET",
|
||||
)
|
||||
return self._send(req)
|
||||
|
||||
def post(
|
||||
self,
|
||||
url: str,
|
||||
data: dict[str, str] | None = None,
|
||||
headers: dict[str, str] | None = None,
|
||||
) -> HttpResponse:
|
||||
body = (
|
||||
urllib.parse.urlencode(data).encode()
|
||||
if data else None
|
||||
)
|
||||
|
||||
merged = self._merged_headers(headers)
|
||||
if data and "Content-Type" not in merged:
|
||||
merged["Content-Type"] = (
|
||||
"application/x-www-form-urlencoded"
|
||||
)
|
||||
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=body,
|
||||
headers=merged,
|
||||
method="POST",
|
||||
)
|
||||
return self._send(req)
|
||||
|
||||
def _send(self, req: urllib.request.Request) -> HttpResponse:
|
||||
try:
|
||||
with self._opener.open(
|
||||
req, timeout=self._timeout
|
||||
) as resp:
|
||||
return HttpResponse(
|
||||
status_code=resp.getcode(),
|
||||
headers=dict(resp.getheaders()),
|
||||
data=resp.read(),
|
||||
)
|
||||
except urllib.error.HTTPError as e:
|
||||
return HttpResponse(
|
||||
status_code=e.code,
|
||||
headers=dict(e.headers.items()),
|
||||
data=e.read(),
|
||||
)
|
||||
|
||||
def _merged_headers(
|
||||
self, extra: dict[str, str] | None
|
||||
) -> dict[str, str]:
|
||||
merged = dict(self._default_headers)
|
||||
if extra:
|
||||
merged.update(extra)
|
||||
return merged
|
||||
|
||||
|
||||
def _request(
|
||||
url: str,
|
||||
|
|
|
|||
217
tests/unit/byteb4rb1e/utils/http/test_client.py
Normal file
217
tests/unit/byteb4rb1e/utils/http/test_client.py
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
"""Tests for the generic HTTP client."""
|
||||
|
||||
import email.message
|
||||
import io
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from types import TracebackType
|
||||
from typing import Dict, List, Optional, Tuple, Type, Union
|
||||
|
||||
import pytest
|
||||
|
||||
from byteb4rb1e.utils.http.client import HttpResponse, HttpSession
|
||||
|
||||
|
||||
class _FakeRawResponse:
|
||||
"""Stands in for the object returned by OpenerDirector.open()."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
status: int = 200,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
data: bytes = b"",
|
||||
) -> None:
|
||||
self._status = status
|
||||
self._headers = headers or {}
|
||||
self._data = data
|
||||
|
||||
def getcode(self) -> int:
|
||||
return self._status
|
||||
|
||||
def getheaders(self) -> List[Tuple[str, str]]:
|
||||
return list(self._headers.items())
|
||||
|
||||
def read(self) -> bytes:
|
||||
return self._data
|
||||
|
||||
def __enter__(self) -> "_FakeRawResponse":
|
||||
return self
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: Optional[Type[BaseException]],
|
||||
exc: Optional[BaseException],
|
||||
tb: Optional[TracebackType],
|
||||
) -> None:
|
||||
return None
|
||||
|
||||
|
||||
class _FakeOpener:
|
||||
"""Records requests and replays canned responses."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
responses: Optional[
|
||||
List[Union[_FakeRawResponse, Exception]]
|
||||
] = None,
|
||||
) -> None:
|
||||
self.requests: List[urllib.request.Request] = []
|
||||
self._responses = list(responses or [_FakeRawResponse()])
|
||||
|
||||
def open(
|
||||
self,
|
||||
req: urllib.request.Request,
|
||||
timeout: Optional[int] = None,
|
||||
) -> _FakeRawResponse:
|
||||
self.requests.append(req)
|
||||
response = self._responses.pop(0)
|
||||
if isinstance(response, Exception):
|
||||
raise response
|
||||
return response
|
||||
|
||||
|
||||
def _http_error(
|
||||
code: int = 404,
|
||||
data: bytes = b"",
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
) -> urllib.error.HTTPError:
|
||||
hdrs = email.message.Message()
|
||||
for key, value in (headers or {}).items():
|
||||
hdrs[key] = value
|
||||
return urllib.error.HTTPError(
|
||||
"http://testserver/", code, "error", hdrs, io.BytesIO(data),
|
||||
)
|
||||
|
||||
|
||||
class TestHttpResponse:
|
||||
|
||||
def test_json(self) -> None:
|
||||
resp = HttpResponse(200, {}, b'{"a": 1}')
|
||||
assert resp.json() == {"a": 1}
|
||||
|
||||
def test_text(self) -> None:
|
||||
resp = HttpResponse(200, {}, b"hello")
|
||||
assert resp.text == "hello"
|
||||
|
||||
def test_text_replaces_invalid_utf8(self) -> None:
|
||||
resp = HttpResponse(200, {}, b"\xff\xfe")
|
||||
assert "<EFBFBD>" in resp.text
|
||||
|
||||
def test_reason_defaults_to_none(self) -> None:
|
||||
resp = HttpResponse(200, {}, b"")
|
||||
assert resp.reason is None
|
||||
|
||||
def test_frozen(self) -> None:
|
||||
resp = HttpResponse(200, {}, b"")
|
||||
with pytest.raises(Exception):
|
||||
resp.status_code = 500
|
||||
|
||||
|
||||
class TestHttpSession:
|
||||
|
||||
def test_opener_has_cookie_processor(self) -> None:
|
||||
session = HttpSession()
|
||||
processors = [
|
||||
h for h in session._opener.handlers
|
||||
if isinstance(h, urllib.request.HTTPCookieProcessor)
|
||||
]
|
||||
assert len(processors) == 1
|
||||
assert processors[0].cookiejar is session._jar
|
||||
|
||||
def test_get(self) -> None:
|
||||
opener = _FakeOpener([
|
||||
_FakeRawResponse(200, {"X-Foo": "bar"}, b"body"),
|
||||
])
|
||||
session = HttpSession()
|
||||
session._opener = opener
|
||||
|
||||
resp = session.get("http://testserver/page")
|
||||
|
||||
assert resp.status_code == 200
|
||||
assert resp.data == b"body"
|
||||
assert resp.headers == {"X-Foo": "bar"}
|
||||
assert opener.requests[0].get_method() == "GET"
|
||||
assert opener.requests[0].full_url == "http://testserver/page"
|
||||
|
||||
def test_get_with_params(self) -> None:
|
||||
opener = _FakeOpener()
|
||||
session = HttpSession()
|
||||
session._opener = opener
|
||||
|
||||
session.get("http://testserver/page", params={"a": "1", "b": "x y"})
|
||||
|
||||
assert opener.requests[0].full_url == (
|
||||
"http://testserver/page?a=1&b=x+y"
|
||||
)
|
||||
|
||||
def test_default_headers_sent(self) -> None:
|
||||
opener = _FakeOpener()
|
||||
session = HttpSession(default_headers={"User-Agent": "test"})
|
||||
session._opener = opener
|
||||
|
||||
session.get("http://testserver/")
|
||||
|
||||
assert opener.requests[0].get_header("User-agent") == "test"
|
||||
|
||||
def test_request_headers_override_defaults(self) -> None:
|
||||
opener = _FakeOpener()
|
||||
session = HttpSession(default_headers={"X-Token": "default"})
|
||||
session._opener = opener
|
||||
|
||||
session.get("http://testserver/", headers={"X-Token": "override"})
|
||||
|
||||
assert opener.requests[0].get_header("X-token") == "override"
|
||||
|
||||
def test_post_form_encodes_data(self) -> None:
|
||||
opener = _FakeOpener()
|
||||
session = HttpSession()
|
||||
session._opener = opener
|
||||
|
||||
session.post("http://testserver/login", data={"user": "u", "pass": "p"})
|
||||
|
||||
req = opener.requests[0]
|
||||
assert req.get_method() == "POST"
|
||||
assert isinstance(req.data, bytes)
|
||||
assert dict(urllib.parse.parse_qsl(req.data.decode())) == {
|
||||
"user": "u",
|
||||
"pass": "p",
|
||||
}
|
||||
assert req.get_header("Content-type") == (
|
||||
"application/x-www-form-urlencoded"
|
||||
)
|
||||
|
||||
def test_post_keeps_explicit_content_type(self) -> None:
|
||||
opener = _FakeOpener()
|
||||
session = HttpSession()
|
||||
session._opener = opener
|
||||
|
||||
session.post(
|
||||
"http://testserver/",
|
||||
data={"a": "1"},
|
||||
headers={"Content-Type": "text/plain"},
|
||||
)
|
||||
|
||||
assert opener.requests[0].get_header("Content-type") == "text/plain"
|
||||
|
||||
def test_post_without_data(self) -> None:
|
||||
opener = _FakeOpener()
|
||||
session = HttpSession()
|
||||
session._opener = opener
|
||||
|
||||
session.post("http://testserver/")
|
||||
|
||||
assert opener.requests[0].data is None
|
||||
|
||||
def test_http_error_returned_as_response(self) -> None:
|
||||
opener = _FakeOpener([
|
||||
_http_error(404, b"missing", {"X-Err": "yes"}),
|
||||
])
|
||||
session = HttpSession()
|
||||
session._opener = opener
|
||||
|
||||
resp = session.get("http://testserver/nope")
|
||||
|
||||
assert resp.status_code == 404
|
||||
assert resp.data == b"missing"
|
||||
assert resp.headers["X-Err"] == "yes"
|
||||
Loading…
Add table
Add a link
Reference in a new issue