2014-10-12 21:56:44 +02:00
|
|
|
import time
|
|
|
|
import requests
|
|
|
|
import html.parser
|
|
|
|
|
2014-10-31 23:38:21 +01:00
|
|
|
def safe_request(session, url, method="GET", *args, **kwargs):
|
2014-10-12 21:56:44 +02:00
|
|
|
tries = 0
|
|
|
|
while True:
|
|
|
|
# try to connect to remote source
|
|
|
|
try:
|
2014-10-31 23:38:21 +01:00
|
|
|
r = session.request(method, url, *args, **kwargs)
|
2014-10-12 21:56:44 +02:00
|
|
|
except requests.exceptions.ConnectionError:
|
|
|
|
tries += 1
|
|
|
|
time.sleep(1)
|
|
|
|
if tries == 5:
|
|
|
|
raise
|
|
|
|
continue
|
|
|
|
|
|
|
|
# reject error-status-codes
|
|
|
|
if r.status_code != requests.codes.ok:
|
|
|
|
tries += 1
|
|
|
|
time.sleep(1)
|
|
|
|
if tries == 5:
|
|
|
|
r.raise_for_status()
|
|
|
|
continue
|
|
|
|
|
|
|
|
# everything ok -- proceed to download
|
|
|
|
return r
|
|
|
|
|
|
|
|
def filename_from_url(url):
|
|
|
|
pos = url.rfind("/")
|
|
|
|
return url[pos+1:]
|
|
|
|
|
|
|
|
unescape = html.parser.HTMLParser().unescape
|