[bunkr] update domain (#3391)

and improve bunkr/app.bunkr handling
2024-11-22 02:32:33 +01:00 · 2022-12-11 17:34:34 +01:00 · 2022-12-11 17:34:34 +01:00 · 6b6f886dcf
commit 6b6f886dcf
parent eb94568e1f
5 changed files with 35 additions and 53 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -971,7 +971,7 @@ extractor.cyberdrop.domain
 Type
    ``string``
 Default
-    ``"auto"``
+    ``null``
 Example
    ``"cyberdrop.to"``
 Description
@ -1717,7 +1717,7 @@ extractor.lolisafe.domain
 Type
    ``string``
 Default
-    ``"auto"``
+    ``null``
 Description
    Specifies the domain used by a ``lolisafe`` extractor
    regardless of input URL.
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@ -55,7 +55,7 @@
        },
        "cyberdrop":
        {
-            "domain": "auto"
+            "domain": null
        },
        "danbooru":
        {
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -111,7 +111,7 @@ Consider all sites to be NSFW unless otherwise known.
 </tr>
 <tr>
    <td>Bunkr</td>
-    <td>https://bunkr.is/</td>
+    <td>https://bunkr.ru/</td>
    <td>Albums</td>
    <td></td>
 </tr>
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@ -6,7 +6,7 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-"""Extractors for https://bunkr.is/"""
+"""Extractors for https://bunkr.ru/"""

 from .lolisafe import LolisafeAlbumExtractor
 from .. import text
@ -14,13 +14,13 @@ import json


 class BunkrAlbumExtractor(LolisafeAlbumExtractor):
-    """Extractor for bunkr.is albums"""
+    """Extractor for bunkr.ru albums"""
    category = "bunkr"
-    root = "https://bunkr.is"
-    pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:is|to)/a/([^/?#]+)"
+    root = "https://bunkr.ru"
+    pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:ru|is|to)/a/([^/?#]+)"
    test = (
-        ("https://bunkr.is/a/Lktg9Keq", {
-            "pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png",
+        ("https://bunkr.ru/a/Lktg9Keq", {
+            "pattern": r"https://cdn\.bunkr\.ru/test-テスト-\"&>-QjgneIQv\.png",
            "content": "0c8768055e4e20e7c7259608b67799171b691140",
            "keyword": {
                "album_id": "Lktg9Keq",
@ -34,64 +34,46 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
        }),
        # mp4 (#2239)
        ("https://app.bunkr.is/a/ptRHaCn2", {
-            "pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4",
+            "pattern": r"https://media-files\.bunkr\.ru/_-RnHoW69L\.mp4",
            "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
        }),
        # cdn4
        ("https://bunkr.is/a/iXTTc1o2", {
-            "pattern": r"https://(cdn|media-files)4\.bunkr\.is/",
+            "pattern": r"https://(cdn|media-files)4\.bunkr\.ru/",
            "content": "da29aae371b7adc8c5ef8e6991b66b69823791e8",
        }),
        ("https://bunkr.to/a/Lktg9Keq"),
    )

    def fetch_album(self, album_id):
-        if "//app." in self.root:
-            return self._fetch_album_api(album_id)
-        else:
-            return self._fetch_album_site(album_id)
-
-    def _fetch_album_api(self, album_id):
-        files, data = LolisafeAlbumExtractor.fetch_album(self, album_id)
-
-        for file in files:
-            url = file["file"]
-            if url.endswith(".mp4"):
-                file["file"] = url.replace(
-                    "//cdn.bunkr.is/", "//media-files.bunkr.is/", 1)
-            else:
-                file["_fallback"] = (url.replace("//cdn.", "//cdn3.", 1),)
-
-        return files, data
-
-    def _fetch_album_site(self, album_id):
-        url = self.root + "/a/" + self.album_id
+        root = self.root

        try:
            data = json.loads(text.extr(
-                self.request(url).text,
+                self.request(root + "/a/" + self.album_id).text,
                'id="__NEXT_DATA__" type="application/json">', '<'))
            album = data["props"]["pageProps"]["album"]
            files = album["files"]
        except Exception as exc:
-            self.log.debug(exc.__class__.__name__, exc)
-            self.root = self.root.replace("bunkr", "app.bunkr", 1)
-            return self._fetch_album_api(album_id)
-
-        headers = {"Referer": "https://stream.bunkr.is/"}
+            self.log.debug("%s: %s", exc.__class__.__name__, exc)
+            self.root = root.replace("://", "://app.", 1)
+            files, data = LolisafeAlbumExtractor.fetch_album(self, album_id)
+        else:
+            for file in files:
+                file["file"] = file["cdn"] + "/" + file["name"]
+            data = {
+                "album_id"   : self.album_id,
+                "album_name" : text.unescape(album["name"]),
+                "description": text.unescape(album["description"]),
+                "count"      : len(files),
+            }

+        headers = {"Referer": root.replace("://", "://stream.", 1)}
        for file in files:
-            name = file["name"]
-            cdn = file["cdn"]
-            if name.endswith((".mp4", ".m4v", ".mov", ".webm",
-                              ".zip", ".rar", ".7z")):
-                cdn = cdn.replace("//cdn", "//media-files", 1)
+            if file["file"].endswith(
+                    (".mp4", ".m4v", ".mov", ".webm", ".zip", ".rar", ".7z")):
                file["_http_headers"] = headers
-            file["file"] = cdn + "/" + name
+                file["file"] = file["file"].replace(
+                    "//cdn", "//media-files", 1)

-        return files, {
-            "album_id"   : self.album_id,
-            "album_name" : text.unescape(album["name"]),
-            "description": text.unescape(album["description"]),
-            "count"      : len(files),
-        }
+        return files, data
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@ -23,7 +23,7 @@ BASE_PATTERN = LolisafeExtractor.update({
    "xbunkr": {
        "root": "https://xbunkr.com",
        "pattern": r"xbunkr\.com",
-    }
+    },
 })


@ -47,9 +47,9 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
        self.album_id = match.group(match.lastindex)

        domain = self.config("domain")
-        if domain is None or domain == "auto":
+        if domain == "auto":
            self.root = text.root_from_url(match.group(0))
-        else:
+        elif domain:
            self.root = text.ensure_http_scheme(domain)

    def items(self):