Spaces:
Sleeping
Sleeping
:boom: [Fix] WebpageFetcher: raise timeout when request.get hangs
Browse files- networks/webpage_fetcher.py +16 -7
networks/webpage_fetcher.py
CHANGED
@@ -22,18 +22,27 @@ class WebpageFetcher:
|
|
22 |
return False
|
23 |
|
24 |
def send_request(self):
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
def save_response(self):
|
32 |
if not self.output_path.exists():
|
33 |
self.output_path.parent.mkdir(parents=True, exist_ok=True)
|
34 |
logger.success(f"Saving to: [{self.output_path}]")
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def fetch(self, url, overwrite=False, output_parent=None):
|
39 |
self.url = url
|
|
|
22 |
return False
|
23 |
|
24 |
def send_request(self):
|
25 |
+
try:
|
26 |
+
self.request_response = requests.get(
|
27 |
+
url=self.url,
|
28 |
+
headers=REQUESTS_HEADERS,
|
29 |
+
proxies=self.enver.requests_proxies,
|
30 |
+
timeout=15,
|
31 |
+
)
|
32 |
+
except:
|
33 |
+
logger.warn(f"Failed to fetch: [{self.url}]")
|
34 |
+
self.request_response = None
|
35 |
|
36 |
def save_response(self):
|
37 |
if not self.output_path.exists():
|
38 |
self.output_path.parent.mkdir(parents=True, exist_ok=True)
|
39 |
logger.success(f"Saving to: [{self.output_path}]")
|
40 |
+
|
41 |
+
if self.request_response is None:
|
42 |
+
return
|
43 |
+
else:
|
44 |
+
with open(self.output_path, "wb") as wf:
|
45 |
+
wf.write(self.request_response.content)
|
46 |
|
47 |
def fetch(self, url, overwrite=False, output_parent=None):
|
48 |
self.url = url
|