1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

[bunkr] send proper Referer headers for file downloads (#6319)

This commit is contained in:
Mike Fährmann 2024-10-14 16:28:24 +02:00
parent 6506afb257
commit c148e200a4
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -64,30 +64,32 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"album_name" : text.unescape(info[0]), "album_name" : text.unescape(info[0]),
"album_size" : size[1:-1], "album_size" : size[1:-1],
"count" : len(urls), "count" : len(urls),
"_http_validate": self._validate,
} }
def _extract_files(self, urls): def _extract_files(self, urls):
for url in urls: for url in urls:
try: try:
url = self._extract_file(text.unescape(url)) yield self._extract_file(text.unescape(url))
except Exception as exc: except Exception as exc:
self.log.error("%s: %s", exc.__class__.__name__, exc) self.log.error("%s: %s", exc.__class__.__name__, exc)
continue
yield {"file": text.unescape(url)}
def _extract_file(self, url): def _extract_file(self, webpage_url):
page = self.request(url).text response = self.request(webpage_url)
url = (text.extr(page, '<source src="', '"') or page = response.text
text.extr(page, '<img src="', '"')) file_url = (text.extr(page, '<source src="', '"') or
text.extr(page, '<img src="', '"'))
if not url: if not file_url:
url_download = text.rextract( webpage_url = text.unescape(text.rextract(
page, ' href="', '"', page.rindex("Download"))[0] page, ' href="', '"', page.rindex("Download"))[0])
page = self.request(text.unescape(url_download)).text response = self.request(webpage_url)
url = text.unescape(text.rextract(page, ' href="', '"')[0]) file_url = text.rextract(response.text, ' href="', '"')[0]
return url return {
"file" : text.unescape(file_url),
"_http_headers" : {"Referer": response.url},
"_http_validate": self._validate,
}
def _validate(self, response): def _validate(self, response):
if response.history and response.url.endswith("/maintenance-vid.mp4"): if response.history and response.url.endswith("/maintenance-vid.mp4"):
@ -105,16 +107,15 @@ class BunkrMediaExtractor(BunkrAlbumExtractor):
def fetch_album(self, album_id): def fetch_album(self, album_id):
try: try:
url = self._extract_file(self.root + self.album_id) file = self._extract_file(self.root + album_id)
except Exception as exc: except Exception as exc:
self.log.error("%s: %s", exc.__class__.__name__, exc) self.log.error("%s: %s", exc.__class__.__name__, exc)
return (), {} return (), {}
return ({"file": text.unescape(url)},), { return (file,), {
"album_id" : "", "album_id" : "",
"album_name" : "", "album_name" : "",
"album_size" : -1, "album_size" : -1,
"description": "", "description": "",
"count" : 1, "count" : 1,
"_http_validate": self._validate,
} }