feat: add cookie-persisting HttpSession

HTTP client that persists cookies across requests via
http.cookiejar, for sites requiring login followed by
cookie-authenticated fetches. Supports GET with query params,
form-encoded POST, default/per-request header merging, and
HTTPError-to-response conversion.
This commit is contained in:
Tiara Rodney 2026-06-06 14:36:18 +02:00
parent 9a4d2041f9
commit bdd3892c5c

View file

@ -6,6 +6,7 @@ GitHub, Bitbucket, etc. are handled by higher-level modules.
"""
from dataclasses import dataclass
import http.cookiejar
import json
import time
from typing import Any, Dict, Optional
@ -29,6 +30,93 @@ class HttpResponse:
return self.data.decode("utf-8", errors="replace")
class HttpSession:
"""HTTP client that persists cookies across requests.
Suitable for sites that require login followed by
cookie-authenticated page fetches.
"""
def __init__(
self,
default_headers: dict[str, str] | None = None,
timeout: int = 30,
) -> None:
self._timeout = timeout
self._default_headers = default_headers or {}
self._jar = http.cookiejar.CookieJar()
self._opener = urllib.request.build_opener(
urllib.request.HTTPCookieProcessor(self._jar),
)
def get(
self,
url: str,
params: dict[str, str] | None = None,
headers: dict[str, str] | None = None,
) -> HttpResponse:
if params:
query = urllib.parse.urlencode(params)
url = f"{url}?{query}"
req = urllib.request.Request(
url,
headers=self._merged_headers(headers),
method="GET",
)
return self._send(req)
def post(
self,
url: str,
data: dict[str, str] | None = None,
headers: dict[str, str] | None = None,
) -> HttpResponse:
body = (
urllib.parse.urlencode(data).encode()
if data else None
)
merged = self._merged_headers(headers)
if data and "Content-Type" not in merged:
merged["Content-Type"] = (
"application/x-www-form-urlencoded"
)
req = urllib.request.Request(
url,
data=body,
headers=merged,
method="POST",
)
return self._send(req)
def _send(self, req: urllib.request.Request) -> HttpResponse:
try:
with self._opener.open(
req, timeout=self._timeout
) as resp:
return HttpResponse(
status_code=resp.getcode(),
headers=dict(resp.getheaders()),
data=resp.read(),
)
except urllib.error.HTTPError as e:
return HttpResponse(
status_code=e.code,
headers=dict(e.headers.items()),
data=e.read(),
)
def _merged_headers(
self, extra: dict[str, str] | None
) -> dict[str, str]:
merged = dict(self._default_headers)
if extra:
merged.update(extra)
return merged
def _request(
url: str,
method: str = "GET",