feat: add cookie-persisting HttpSession
HTTP client that persists cookies across requests via http.cookiejar, for sites requiring login followed by cookie-authenticated fetches. Supports GET with query params, form-encoded POST, default/per-request header merging, and HTTPError-to-response conversion.
This commit is contained in:
parent
9a4d2041f9
commit
bdd3892c5c
1 changed files with 88 additions and 0 deletions
|
|
@ -6,6 +6,7 @@ GitHub, Bitbucket, etc. are handled by higher-level modules.
|
|||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
import http.cookiejar
|
||||
import json
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
|
|
@ -29,6 +30,93 @@ class HttpResponse:
|
|||
return self.data.decode("utf-8", errors="replace")
|
||||
|
||||
|
||||
class HttpSession:
|
||||
"""HTTP client that persists cookies across requests.
|
||||
|
||||
Suitable for sites that require login followed by
|
||||
cookie-authenticated page fetches.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
default_headers: dict[str, str] | None = None,
|
||||
timeout: int = 30,
|
||||
) -> None:
|
||||
self._timeout = timeout
|
||||
self._default_headers = default_headers or {}
|
||||
self._jar = http.cookiejar.CookieJar()
|
||||
self._opener = urllib.request.build_opener(
|
||||
urllib.request.HTTPCookieProcessor(self._jar),
|
||||
)
|
||||
|
||||
def get(
|
||||
self,
|
||||
url: str,
|
||||
params: dict[str, str] | None = None,
|
||||
headers: dict[str, str] | None = None,
|
||||
) -> HttpResponse:
|
||||
if params:
|
||||
query = urllib.parse.urlencode(params)
|
||||
url = f"{url}?{query}"
|
||||
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers=self._merged_headers(headers),
|
||||
method="GET",
|
||||
)
|
||||
return self._send(req)
|
||||
|
||||
def post(
|
||||
self,
|
||||
url: str,
|
||||
data: dict[str, str] | None = None,
|
||||
headers: dict[str, str] | None = None,
|
||||
) -> HttpResponse:
|
||||
body = (
|
||||
urllib.parse.urlencode(data).encode()
|
||||
if data else None
|
||||
)
|
||||
|
||||
merged = self._merged_headers(headers)
|
||||
if data and "Content-Type" not in merged:
|
||||
merged["Content-Type"] = (
|
||||
"application/x-www-form-urlencoded"
|
||||
)
|
||||
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=body,
|
||||
headers=merged,
|
||||
method="POST",
|
||||
)
|
||||
return self._send(req)
|
||||
|
||||
def _send(self, req: urllib.request.Request) -> HttpResponse:
|
||||
try:
|
||||
with self._opener.open(
|
||||
req, timeout=self._timeout
|
||||
) as resp:
|
||||
return HttpResponse(
|
||||
status_code=resp.getcode(),
|
||||
headers=dict(resp.getheaders()),
|
||||
data=resp.read(),
|
||||
)
|
||||
except urllib.error.HTTPError as e:
|
||||
return HttpResponse(
|
||||
status_code=e.code,
|
||||
headers=dict(e.headers.items()),
|
||||
data=e.read(),
|
||||
)
|
||||
|
||||
def _merged_headers(
|
||||
self, extra: dict[str, str] | None
|
||||
) -> dict[str, str]:
|
||||
merged = dict(self._default_headers)
|
||||
if extra:
|
||||
merged.update(extra)
|
||||
return merged
|
||||
|
||||
|
||||
def _request(
|
||||
url: str,
|
||||
method: str = "GET",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue