feat: add cookie-persisting HttpSession

HTTP client that persists cookies across requests via
http.cookiejar, for sites requiring login followed by
cookie-authenticated fetches. Supports GET with query params,
form-encoded POST, default/per-request header merging, and
HTTPError-to-response conversion.
This commit is contained in:
Tiara Rodney 2026-06-06 14:36:18 +02:00
parent 9a4d2041f9
commit bdd3892c5c

View file

@ -6,6 +6,7 @@ GitHub, Bitbucket, etc. are handled by higher-level modules.
""" """
from dataclasses import dataclass from dataclasses import dataclass
import http.cookiejar
import json import json
import time import time
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
@ -29,6 +30,93 @@ class HttpResponse:
return self.data.decode("utf-8", errors="replace") return self.data.decode("utf-8", errors="replace")
class HttpSession:
"""HTTP client that persists cookies across requests.
Suitable for sites that require login followed by
cookie-authenticated page fetches.
"""
def __init__(
self,
default_headers: dict[str, str] | None = None,
timeout: int = 30,
) -> None:
self._timeout = timeout
self._default_headers = default_headers or {}
self._jar = http.cookiejar.CookieJar()
self._opener = urllib.request.build_opener(
urllib.request.HTTPCookieProcessor(self._jar),
)
def get(
self,
url: str,
params: dict[str, str] | None = None,
headers: dict[str, str] | None = None,
) -> HttpResponse:
if params:
query = urllib.parse.urlencode(params)
url = f"{url}?{query}"
req = urllib.request.Request(
url,
headers=self._merged_headers(headers),
method="GET",
)
return self._send(req)
def post(
self,
url: str,
data: dict[str, str] | None = None,
headers: dict[str, str] | None = None,
) -> HttpResponse:
body = (
urllib.parse.urlencode(data).encode()
if data else None
)
merged = self._merged_headers(headers)
if data and "Content-Type" not in merged:
merged["Content-Type"] = (
"application/x-www-form-urlencoded"
)
req = urllib.request.Request(
url,
data=body,
headers=merged,
method="POST",
)
return self._send(req)
def _send(self, req: urllib.request.Request) -> HttpResponse:
try:
with self._opener.open(
req, timeout=self._timeout
) as resp:
return HttpResponse(
status_code=resp.getcode(),
headers=dict(resp.getheaders()),
data=resp.read(),
)
except urllib.error.HTTPError as e:
return HttpResponse(
status_code=e.code,
headers=dict(e.headers.items()),
data=e.read(),
)
def _merged_headers(
self, extra: dict[str, str] | None
) -> dict[str, str]:
merged = dict(self._default_headers)
if extra:
merged.update(extra)
return merged
def _request( def _request(
url: str, url: str,
method: str = "GET", method: str = "GET",