mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-25 04:02:32 +01:00
[bunkr] try different domain when encountering a CF challenge page
(#6344, #6352, #6368)
This commit is contained in:
parent
75674944f0
commit
bce3c4b424
@ -8,9 +8,10 @@
|
|||||||
|
|
||||||
"""Extractors for https://bunkr.si/"""
|
"""Extractors for https://bunkr.si/"""
|
||||||
|
|
||||||
|
from .common import Extractor
|
||||||
from .lolisafe import LolisafeAlbumExtractor
|
from .lolisafe import LolisafeAlbumExtractor
|
||||||
from .. import text, config
|
from .. import text, config, exception
|
||||||
|
import random
|
||||||
|
|
||||||
if config.get(("extractor", "bunkr"), "tlds"):
|
if config.get(("extractor", "bunkr"), "tlds"):
|
||||||
BASE_PATTERN = (
|
BASE_PATTERN = (
|
||||||
@ -21,11 +22,27 @@ else:
|
|||||||
BASE_PATTERN = (
|
BASE_PATTERN = (
|
||||||
r"(?:bunkr:(?:https?://)?([^/?#]+)|"
|
r"(?:bunkr:(?:https?://)?([^/?#]+)|"
|
||||||
r"(?:https?://)?(?:app\.)?(bunkr+"
|
r"(?:https?://)?(?:app\.)?(bunkr+"
|
||||||
r"\.(?:s[kiu]|[cf]i|p[ks]|ru|la|is|to|a[cx]"
|
r"\.(?:s[kiu]|[cf]i|p[hks]|ru|la|is|to|a[cx]"
|
||||||
r"|black|cat|media|red|site|ws|org)))"
|
r"|black|cat|media|red|site|ws|org)))"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
DOMAINS = [
|
||||||
|
"bunkr.ac",
|
||||||
|
"bunkr.ci",
|
||||||
|
"bunkr.fi",
|
||||||
|
"bunkr.ph",
|
||||||
|
"bunkr.pk",
|
||||||
|
"bunkr.ps",
|
||||||
|
"bunkr.si",
|
||||||
|
"bunkr.sk",
|
||||||
|
"bunkr.ws",
|
||||||
|
"bunkr.black",
|
||||||
|
"bunkr.red",
|
||||||
|
"bunkr.media",
|
||||||
|
"bunkr.site",
|
||||||
|
]
|
||||||
LEGACY_DOMAINS = {
|
LEGACY_DOMAINS = {
|
||||||
|
"bunkr.ax",
|
||||||
"bunkr.cat",
|
"bunkr.cat",
|
||||||
"bunkr.ru",
|
"bunkr.ru",
|
||||||
"bunkrr.ru",
|
"bunkrr.ru",
|
||||||
@ -35,6 +52,7 @@ LEGACY_DOMAINS = {
|
|||||||
"bunkr.is",
|
"bunkr.is",
|
||||||
"bunkr.to",
|
"bunkr.to",
|
||||||
}
|
}
|
||||||
|
CF_DOMAINS = set()
|
||||||
|
|
||||||
|
|
||||||
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||||
@ -50,6 +68,46 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
|||||||
if domain not in LEGACY_DOMAINS:
|
if domain not in LEGACY_DOMAINS:
|
||||||
self.root = "https://" + domain
|
self.root = "https://" + domain
|
||||||
|
|
||||||
|
def request(self, url, **kwargs):
|
||||||
|
kwargs["allow_redirects"] = False
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
response = Extractor.request(self, url, **kwargs)
|
||||||
|
if response.status_code < 300:
|
||||||
|
return response
|
||||||
|
|
||||||
|
# redirect
|
||||||
|
url = response.headers["Location"]
|
||||||
|
root, path = self._split(url)
|
||||||
|
if root not in CF_DOMAINS:
|
||||||
|
continue
|
||||||
|
self.log.debug("Redirect to known CF challenge domain '%s'",
|
||||||
|
root)
|
||||||
|
|
||||||
|
except exception.HttpError as exc:
|
||||||
|
if exc.status != 403:
|
||||||
|
raise
|
||||||
|
|
||||||
|
# CF challenge
|
||||||
|
root, path = self._split(url)
|
||||||
|
CF_DOMAINS.add(root)
|
||||||
|
self.log.debug("Added '%s' to CF challenge domains", root)
|
||||||
|
|
||||||
|
try:
|
||||||
|
DOMAINS.remove(root.rpartition("/")[2])
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
if not DOMAINS:
|
||||||
|
raise exception.StopExtraction(
|
||||||
|
"All Bunkr domains require solving a CF challenge")
|
||||||
|
|
||||||
|
# select alternative domain
|
||||||
|
root = "https://" + random.choice(DOMAINS)
|
||||||
|
self.log.debug("Trying '%s' as fallback", root)
|
||||||
|
url = root + path
|
||||||
|
|
||||||
def fetch_album(self, album_id):
|
def fetch_album(self, album_id):
|
||||||
# album metadata
|
# album metadata
|
||||||
page = self.request(self.root + "/a/" + self.album_id).text
|
page = self.request(self.root + "/a/" + self.album_id).text
|
||||||
@ -77,8 +135,11 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
|||||||
info[-1], "%H:%M:%S %d/%m/%Y")
|
info[-1], "%H:%M:%S %d/%m/%Y")
|
||||||
|
|
||||||
yield file
|
yield file
|
||||||
|
except exception.StopExtraction:
|
||||||
|
raise
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.log.error("%s: %s", exc.__class__.__name__, exc)
|
self.log.error("%s: %s", exc.__class__.__name__, exc)
|
||||||
|
self.log.debug("", exc_info=exc)
|
||||||
|
|
||||||
def _extract_file(self, webpage_url):
|
def _extract_file(self, webpage_url):
|
||||||
response = self.request(webpage_url)
|
response = self.request(webpage_url)
|
||||||
@ -104,6 +165,10 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
|||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def _split(self, url):
|
||||||
|
pos = url.index("/", 8)
|
||||||
|
return url[:pos], url[pos:]
|
||||||
|
|
||||||
|
|
||||||
class BunkrMediaExtractor(BunkrAlbumExtractor):
|
class BunkrMediaExtractor(BunkrAlbumExtractor):
|
||||||
"""Extractor for bunkr.si media links"""
|
"""Extractor for bunkr.si media links"""
|
||||||
|
Loading…
Reference in New Issue
Block a user