feat: add cookie-persisting HttpSession
HTTP client that persists cookies across requests via http.cookiejar, for sites requiring login followed by cookie-authenticated fetches. Supports GET with query params, form-encoded POST, default/per-request header merging, and HTTPError-to-response conversion.
This commit is contained in:
parent
9a4d2041f9
commit
bdd3892c5c
1 changed files with 88 additions and 0 deletions
|
|
@ -6,6 +6,7 @@ GitHub, Bitbucket, etc. are handled by higher-level modules.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
import http.cookiejar
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
@ -29,6 +30,93 @@ class HttpResponse:
|
||||||
return self.data.decode("utf-8", errors="replace")
|
return self.data.decode("utf-8", errors="replace")
|
||||||
|
|
||||||
|
|
||||||
|
class HttpSession:
|
||||||
|
"""HTTP client that persists cookies across requests.
|
||||||
|
|
||||||
|
Suitable for sites that require login followed by
|
||||||
|
cookie-authenticated page fetches.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
default_headers: dict[str, str] | None = None,
|
||||||
|
timeout: int = 30,
|
||||||
|
) -> None:
|
||||||
|
self._timeout = timeout
|
||||||
|
self._default_headers = default_headers or {}
|
||||||
|
self._jar = http.cookiejar.CookieJar()
|
||||||
|
self._opener = urllib.request.build_opener(
|
||||||
|
urllib.request.HTTPCookieProcessor(self._jar),
|
||||||
|
)
|
||||||
|
|
||||||
|
def get(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
params: dict[str, str] | None = None,
|
||||||
|
headers: dict[str, str] | None = None,
|
||||||
|
) -> HttpResponse:
|
||||||
|
if params:
|
||||||
|
query = urllib.parse.urlencode(params)
|
||||||
|
url = f"{url}?{query}"
|
||||||
|
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
headers=self._merged_headers(headers),
|
||||||
|
method="GET",
|
||||||
|
)
|
||||||
|
return self._send(req)
|
||||||
|
|
||||||
|
def post(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
data: dict[str, str] | None = None,
|
||||||
|
headers: dict[str, str] | None = None,
|
||||||
|
) -> HttpResponse:
|
||||||
|
body = (
|
||||||
|
urllib.parse.urlencode(data).encode()
|
||||||
|
if data else None
|
||||||
|
)
|
||||||
|
|
||||||
|
merged = self._merged_headers(headers)
|
||||||
|
if data and "Content-Type" not in merged:
|
||||||
|
merged["Content-Type"] = (
|
||||||
|
"application/x-www-form-urlencoded"
|
||||||
|
)
|
||||||
|
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
data=body,
|
||||||
|
headers=merged,
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
return self._send(req)
|
||||||
|
|
||||||
|
def _send(self, req: urllib.request.Request) -> HttpResponse:
|
||||||
|
try:
|
||||||
|
with self._opener.open(
|
||||||
|
req, timeout=self._timeout
|
||||||
|
) as resp:
|
||||||
|
return HttpResponse(
|
||||||
|
status_code=resp.getcode(),
|
||||||
|
headers=dict(resp.getheaders()),
|
||||||
|
data=resp.read(),
|
||||||
|
)
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
return HttpResponse(
|
||||||
|
status_code=e.code,
|
||||||
|
headers=dict(e.headers.items()),
|
||||||
|
data=e.read(),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _merged_headers(
|
||||||
|
self, extra: dict[str, str] | None
|
||||||
|
) -> dict[str, str]:
|
||||||
|
merged = dict(self._default_headers)
|
||||||
|
if extra:
|
||||||
|
merged.update(extra)
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
def _request(
|
def _request(
|
||||||
url: str,
|
url: str,
|
||||||
method: str = "GET",
|
method: str = "GET",
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue