1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

[bunkr] try different domain when encountering a CF challenge page

(#6344, #6352, #6368)
This commit is contained in:
Mike Fährmann 2024-10-23 21:17:01 +02:00
parent 75674944f0
commit bce3c4b424
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -8,9 +8,10 @@
"""Extractors for https://bunkr.si/""" """Extractors for https://bunkr.si/"""
from .common import Extractor
from .lolisafe import LolisafeAlbumExtractor from .lolisafe import LolisafeAlbumExtractor
from .. import text, config from .. import text, config, exception
import random
if config.get(("extractor", "bunkr"), "tlds"): if config.get(("extractor", "bunkr"), "tlds"):
BASE_PATTERN = ( BASE_PATTERN = (
@ -21,11 +22,27 @@ else:
BASE_PATTERN = ( BASE_PATTERN = (
r"(?:bunkr:(?:https?://)?([^/?#]+)|" r"(?:bunkr:(?:https?://)?([^/?#]+)|"
r"(?:https?://)?(?:app\.)?(bunkr+" r"(?:https?://)?(?:app\.)?(bunkr+"
r"\.(?:s[kiu]|[cf]i|p[ks]|ru|la|is|to|a[cx]" r"\.(?:s[kiu]|[cf]i|p[hks]|ru|la|is|to|a[cx]"
r"|black|cat|media|red|site|ws|org)))" r"|black|cat|media|red|site|ws|org)))"
) )
DOMAINS = [
"bunkr.ac",
"bunkr.ci",
"bunkr.fi",
"bunkr.ph",
"bunkr.pk",
"bunkr.ps",
"bunkr.si",
"bunkr.sk",
"bunkr.ws",
"bunkr.black",
"bunkr.red",
"bunkr.media",
"bunkr.site",
]
LEGACY_DOMAINS = { LEGACY_DOMAINS = {
"bunkr.ax",
"bunkr.cat", "bunkr.cat",
"bunkr.ru", "bunkr.ru",
"bunkrr.ru", "bunkrr.ru",
@ -35,6 +52,7 @@ LEGACY_DOMAINS = {
"bunkr.is", "bunkr.is",
"bunkr.to", "bunkr.to",
} }
CF_DOMAINS = set()
class BunkrAlbumExtractor(LolisafeAlbumExtractor): class BunkrAlbumExtractor(LolisafeAlbumExtractor):
@ -50,6 +68,46 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
if domain not in LEGACY_DOMAINS: if domain not in LEGACY_DOMAINS:
self.root = "https://" + domain self.root = "https://" + domain
def request(self, url, **kwargs):
kwargs["allow_redirects"] = False
while True:
try:
response = Extractor.request(self, url, **kwargs)
if response.status_code < 300:
return response
# redirect
url = response.headers["Location"]
root, path = self._split(url)
if root not in CF_DOMAINS:
continue
self.log.debug("Redirect to known CF challenge domain '%s'",
root)
except exception.HttpError as exc:
if exc.status != 403:
raise
# CF challenge
root, path = self._split(url)
CF_DOMAINS.add(root)
self.log.debug("Added '%s' to CF challenge domains", root)
try:
DOMAINS.remove(root.rpartition("/")[2])
except ValueError:
pass
else:
if not DOMAINS:
raise exception.StopExtraction(
"All Bunkr domains require solving a CF challenge")
# select alternative domain
root = "https://" + random.choice(DOMAINS)
self.log.debug("Trying '%s' as fallback", root)
url = root + path
def fetch_album(self, album_id): def fetch_album(self, album_id):
# album metadata # album metadata
page = self.request(self.root + "/a/" + self.album_id).text page = self.request(self.root + "/a/" + self.album_id).text
@ -77,8 +135,11 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
info[-1], "%H:%M:%S %d/%m/%Y") info[-1], "%H:%M:%S %d/%m/%Y")
yield file yield file
except exception.StopExtraction:
raise
except Exception as exc: except Exception as exc:
self.log.error("%s: %s", exc.__class__.__name__, exc) self.log.error("%s: %s", exc.__class__.__name__, exc)
self.log.debug("", exc_info=exc)
def _extract_file(self, webpage_url): def _extract_file(self, webpage_url):
response = self.request(webpage_url) response = self.request(webpage_url)
@ -104,6 +165,10 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
return False return False
return True return True
def _split(self, url):
pos = url.index("/", 8)
return url[:pos], url[pos:]
class BunkrMediaExtractor(BunkrAlbumExtractor): class BunkrMediaExtractor(BunkrAlbumExtractor):
"""Extractor for bunkr.si media links""" """Extractor for bunkr.si media links"""