-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcurl_wrapper.py
More file actions
96 lines (82 loc) · 2.59 KB
/
curl_wrapper.py
File metadata and controls
96 lines (82 loc) · 2.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import contextlib
import os
import re
import subprocess
import time
from typing import Optional
# To avoid 403 responses (default); caller may override per instance
DEFAULT_USER_AGENT = "Googlebot/2.1 (+http://www.google.com/bot.html)"
class CurlExitCodes:
"""
See: https://curl.se/libcurl/c/libcurl-errors.html
"""
OK = 0
COULDNT_RESOLVE_HOST = 6
HTTP_RETURNED_ERROR = 22
class CurlWrapper:
"""
Encapsulates a single curl execution with timeouts and helpers.
"""
CURL_STDERR_HTTP_RE = re.compile(r"^curl: \(22\) The requested URL returned error: (?P<http_code>\d+)")
def __init__(
self,
url: str,
*,
user_agent: str = DEFAULT_USER_AGENT,
connect_timeout: int = 5,
max_time: int = 10,
max_redirects: int = 3,
) -> None:
self.url = url
self._stderr: Optional[str] = None
self._started = time.time()
self._proc = subprocess.Popen(
[
"curl",
"-sSf",
"-L", # follow redirects
"--max-redirs",
f"{max_redirects}", # limit number of redirects
# "--proto", "=https", # (optional) only allow https for the initial URL
"--proto-redir",
"=all,https", # only allow https after redirects; http will fail
"--output",
"-", # discard body
"--connect-timeout",
f"{connect_timeout}",
"--max-time",
f"{max_time}",
"--user-agent",
f"{user_agent}",
self.url,
],
stdout=open(os.devnull, "w"),
stderr=subprocess.PIPE,
)
@property
def running(self) -> bool:
return self._proc.poll() is None
@property
def ret_code(self) -> int:
assert not self.running
return self._proc.returncode
@property
def stderr(self) -> str:
assert not self.running
if self._stderr is None:
assert self._proc.stderr is not None
self._stderr = self._proc.stderr.read().decode()
return self._stderr
@property
def age(self) -> float:
return time.time() - self._started
def terminate(self, timeout: float | None = None) -> None:
try:
self._proc.terminate()
if timeout is not None:
self._proc.wait(timeout=timeout)
except Exception:
pass
def kill(self) -> None:
with contextlib.suppress(Exception):
self._proc.kill()