[bunkr] fix extraction (#2732)

move bunkr.is code to its own module
2024-11-22 10:42:34 +01:00 · 2022-07-15 12:38:30 +02:00 · 2022-07-15 12:38:30 +02:00 · 46f11a3118
commit 46f11a3118
parent baf3815ebd
4 changed files with 97 additions and 38 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -103,6 +103,12 @@ Consider all sites to be NSFW unless otherwise known.
    <td>Blogs, Posts, Search Results</td>
    <td></td>
 </tr>
+<tr>
+    <td>Bunkr</td>
+    <td>https://bunkr.is/</td>
+    <td>Albums</td>
+    <td></td>
+</tr>
 <tr>
    <td>Comic Vine</td>
    <td>https://comicvine.gamespot.com/</td>
@ -1261,12 +1267,6 @@ Consider all sites to be NSFW unless otherwise known.
 <tr>
    <td colspan="4"><strong>lolisafe and chibisafe</strong></td>
 </tr>
-<tr>
-    <td>Bunkr</td>
-    <td>https://app.bunkr.is/</td>
-    <td>Albums</td>
-    <td></td>
-</tr>
 <tr>
    <td>ZzZz</td>
    <td>https://zz.ht/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -25,6 +25,7 @@ modules = [
    "bcy",
    "behance",
    "blogger",
+    "bunkr",
    "comicvine",
    "cyberdrop",
    "danbooru",
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://bunkr.is/"""
+
+from .lolisafe import LolisafeAlbumExtractor
+from .. import text
+import json
+
+
+class BunkrAlbumExtractor(LolisafeAlbumExtractor):
+    """Extractor for bunkr.is albums"""
+    category = "bunkr"
+    root = "https://app.bunkr.is"
+    pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:is|to)/a/([^/?#]+)"
+    test = (
+        ("https://app.bunkr.is/a/Lktg9Keq", {
+            "pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png",
+            "content": "0c8768055e4e20e7c7259608b67799171b691140",
+            "keyword": {
+                "album_id": "Lktg9Keq",
+                "album_name": 'test テスト "&>',
+                "count": 1,
+                "filename": 'test-テスト-"&>-QjgneIQv',
+                "id": "QjgneIQv",
+                "name": 'test-テスト-"&>',
+                "num": int,
+            },
+        }),
+        # mp4 (#2239)
+        ("https://bunkr.is/a/ptRHaCn2", {
+            "pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4",
+            "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
+        }),
+        ("https://bunkr.to/a/Lktg9Keq"),
+    )
+
+    def fetch_album(self, album_id):
+        if "//app." in self.root:
+            return self._fetch_album_api(album_id)
+        else:
+            return self._fetch_album_site(album_id)
+
+    def _fetch_album_api(self, album_id):
+        files, data = LolisafeAlbumExtractor.fetch_album(self, album_id)
+
+        for file in files:
+            url = file["file"]
+            if url.endswith(".mp4"):
+                file["file"] = url.replace(
+                    "//cdn.bunkr.is/", "//media-files.bunkr.is/", 1)
+            else:
+                file["_fallback"] = (url.replace("//cdn.", "//cdn3.", 1),)
+
+        return files, data
+
+    def _fetch_album_site(self, album_id):
+        url = self.root + "/a/" + self.album_id
+
+        try:
+            data = json.loads(text.extract(
+                self.request(url).text,
+                'id="__NEXT_DATA__" type="application/json">', '<')[0])
+            props = data["props"]["pageProps"]
+            album = props["album"]
+            files = props["files"]
+        except Exception as exc:
+            self.log.debug(exc)
+            self.root = self.root.replace("bunkr", "app.bunkr", 1)
+            return self._fetch_album_api(album_id)
+
+        for file in files:
+            name = file["name"]
+            if name.endswith(".mp4"):
+                file["file"] = "https://media-files.bunkr.is/" + name
+            else:
+                file["file"] = file["cdn"] + "/" + name
+
+        return files, {
+            "album_id"   : self.album_id,
+            "album_name" : text.unescape(album["name"]),
+            "description": text.unescape(album["description"]),
+            "count"      : len(files),
+        }
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@ -20,10 +20,6 @@ class LolisafeExtractor(BaseExtractor):


 BASE_PATTERN = LolisafeExtractor.update({
-    "bunkr": {
-        "root": "https://app.bunkr.is",
-        "pattern": r"(?:app\.)?bunkr\.(?:is|to)",
-    },
    "zzzz" : {
        "root": "https://zz.ht",
        "pattern": r"zz\.(?:ht|fo)",
@ -35,25 +31,6 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
    subcategory = "album"
    pattern = BASE_PATTERN + "/a/([^/?#]+)"
    test = (
-        ("https://app.bunkr.is/a/Lktg9Keq", {
-            "pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png",
-            "content": "0c8768055e4e20e7c7259608b67799171b691140",
-            "keyword": {
-                "album_id": "Lktg9Keq",
-                "album_name": 'test テスト "&>',
-                "count": 1,
-                "filename": 'test-テスト-"&>-QjgneIQv',
-                "id": "QjgneIQv",
-                "name": 'test-テスト-"&>',
-                "num": int,
-            },
-        }),
-        # mp4 (#2239)
-        ("https://bunkr.is/a/ptRHaCn2", {
-            "pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4",
-            "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
-        }),
-        ("https://bunkr.to/a/Lktg9Keq"),
        ("https://zz.ht/a/lop7W6EZ", {
            "pattern": r"https://z\.zz\.fo/(4anuY|ih560)\.png",
            "count": 2,
@ -71,11 +48,7 @@ class LolisafeAlbumExtractor(LolisafeExtractor):

        domain = self.config("domain")
        if domain is None or domain == "auto":
-            if self.category == "bunkr":
-                self.root = "https://app.bunkr.is"
-            else:
-                self.root = text.root_from_url(match.group(0))
-
+            self.root = text.root_from_url(match.group(0))
        else:
            self.root = text.ensure_http_scheme(domain)

@ -89,10 +62,6 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
                data["_fallback"] = file["_fallback"]
            text.nameext_from_url(url, data)
            data["name"], sep, data["id"] = data["filename"].rpartition("-")
-
-            if data["extension"] == "mp4":
-                url = url.replace(
-                    "//cdn.bunkr.is/", "//media-files.bunkr.is/", 1)
            yield Message.Url, url, data

    def fetch_album(self, album_id):