gallery-dl/gallery_dl/extractor/bunkr.py

# -*- coding: utf-8 -*-

# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://bunkr.la/"""

from .lolisafe import LolisafeAlbumExtractor
from .. import text


class BunkrAlbumExtractor(LolisafeAlbumExtractor):
    """Extractor for bunkr.la albums"""
    category = "bunkr"
    root = "https://bunkr.la"
    pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:la|[sr]u|is|to)/a/([^/?#]+)"
    test = (
        ("https://bunkr.la/a/Lktg9Keq", {
            "pattern": r"https://cdn\.bunkr\.ru/test-テスト-\"&>-QjgneIQv\.png",
            "content": "0c8768055e4e20e7c7259608b67799171b691140",
            "keyword": {
                "album_id": "Lktg9Keq",
                "album_name": 'test テスト "&>',
                "count": 1,
                "filename": 'test-テスト-"&>-QjgneIQv',
                "id": "QjgneIQv",
                "name": 'test-テスト-"&>',
                "num": int,
            },
        }),
        # mp4 (#2239)
        ("https://app.bunkr.ru/a/ptRHaCn2", {
            "pattern": r"https://media-files\.bunkr\.ru/_-RnHoW69L\.mp4",
            "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
        }),
        # cdn4
        ("https://bunkr.is/a/iXTTc1o2", {
            "pattern": r"https://(cdn|media-files)4\.bunkr\.ru/",
            "content": "da29aae371b7adc8c5ef8e6991b66b69823791e8",
            "keyword": {
                "album_id": "iXTTc1o2",
                "album_name": "test2",
                "album_size": "691.1 KB",
                "count": 2,
                "description": "072022",
                "filename": "re:video-wFO9FtxG|image-sZrQUeOx",
                "id": "re:wFO9FtxG|sZrQUeOx",
                "name": "re:video|image",
                "num": int,
            },
        }),
        ("https://bunkr.la/a/Lktg9Keq"),
        ("https://bunkr.su/a/Lktg9Keq"),
        ("https://bunkr.ru/a/Lktg9Keq"),
        ("https://bunkr.is/a/Lktg9Keq"),
        ("https://bunkr.to/a/Lktg9Keq"),
    )

    def fetch_album(self, album_id):
        # album metadata
        page = self.request(self.root + "/a/" + self.album_id).text
        info = text.split_html(text.extr(
            page, "<h1", "</div>").partition(">")[2])
        count, _, size = info[1].split(None, 2)

        # files
        cdn = None
        files = []
        append = files.append
        headers = {"Referer": self.root.replace("://", "://stream.", 1) + "/"}

        pos = page.index('class="grid-images')
        for url in text.extract_iter(page, '<a href="', '"', pos):
            if url.startswith("/"):
                if not cdn:
                    # fetch cdn root from download page
                    durl = "{}/d/{}".format(self.root, url[3:])
                    cdn = text.extr(self.request(
                        durl).text, 'link.href = "', '"')
                    cdn = cdn[:cdn.index("/", 8)]
                url = cdn + url[2:]

            url = text.unescape(url)
            if url.endswith((".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts",
                             ".zip", ".rar", ".7z")):
                append({"file": url.replace("://cdn", "://media-files", 1),
                        "_http_headers": headers})
            else:
                append({"file": url})

        return files, {
            "album_id"   : self.album_id,
            "album_name" : text.unescape(info[0]),
            "album_size" : size[1:-1],
            "description": text.unescape(info[2]) if len(info) > 2 else "",
            "count"      : len(files),
        }
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2022-07-15 12:38:30 +02:00			`# -- coding: utf-8 --`

[bunkr] fix URLs returned by API (#3481) 2023-01-01 14:07:15 +01:00			`# Copyright 2022-2023 Mike Fährmann`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2022-07-15 12:38:30 +02:00			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License version 2 as`
			`# published by the Free Software Foundation.`

[bunkr] update domain to 'bunkr.la' 2023-03-28 20:10:36 +02:00			`"""Extractors for https://bunkr.la/"""`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2022-07-15 12:38:30 +02:00
			`from .lolisafe import LolisafeAlbumExtractor`
[bunkr] fix extraction (#3636, #3655) 2023-02-15 15:42:32 +01:00			`from .. import text`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2022-07-15 12:38:30 +02:00

			`class BunkrAlbumExtractor(LolisafeAlbumExtractor):`
[bunkr] update domain to 'bunkr.la' 2023-03-28 20:10:36 +02:00			`"""Extractor for bunkr.la albums"""`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2022-07-15 12:38:30 +02:00			`category = "bunkr"`
[bunkr] update domain to 'bunkr.la' 2023-03-28 20:10:36 +02:00			`root = "https://bunkr.la"`
			`pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:la\|[sr]u\|is\|to)/a/([^/?#]+)"`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2022-07-15 12:38:30 +02:00			`test = (`
[bunkr] update domain to 'bunkr.la' 2023-03-28 20:10:36 +02:00			`("https://bunkr.la/a/Lktg9Keq", {`
[bunkr] fix extraction (#3636, #3655) 2023-02-15 15:42:32 +01:00			`"pattern": r"https://cdn\.bunkr\.ru/test-テスト-\"&>-QjgneIQv\.png",`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2022-07-15 12:38:30 +02:00			`"content": "0c8768055e4e20e7c7259608b67799171b691140",`
			`"keyword": {`
			`"album_id": "Lktg9Keq",`
			`"album_name": 'test テスト "&>',`
			`"count": 1,`
			`"filename": 'test-テスト-"&>-QjgneIQv',`
			`"id": "QjgneIQv",`
			`"name": 'test-テスト-"&>',`
			`"num": int,`
			`},`
			`}),`
			`# mp4 (#2239)`
[bunkr] update domain (#3636) 2023-02-09 19:28:32 +01:00			`("https://app.bunkr.ru/a/ptRHaCn2", {`
[bunkr] update domain (#3391) and improve bunkr/app.bunkr handling 2022-12-11 17:34:34 +01:00			`"pattern": r"https://media-files\.bunkr\.ru/_-RnHoW69L\.mp4",`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2022-07-15 12:38:30 +02:00			`"content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",`
			`}),`
[bunkr] fix extraction (#2903) 2022-09-09 11:44:27 +02:00			`# cdn4`
			`("https://bunkr.is/a/iXTTc1o2", {`
[bunkr] update domain (#3391) and improve bunkr/app.bunkr handling 2022-12-11 17:34:34 +01:00			`"pattern": r"https://(cdn\|media-files)4\.bunkr\.ru/",`
[bunkr] fix extraction (#2903) 2022-09-09 11:44:27 +02:00			`"content": "da29aae371b7adc8c5ef8e6991b66b69823791e8",`
[bunkr] fix extraction (#3636, #3655) 2023-02-15 15:42:32 +01:00			`"keyword": {`
			`"album_id": "iXTTc1o2",`
			`"album_name": "test2",`
			`"album_size": "691.1 KB",`
			`"count": 2,`
			`"description": "072022",`
			`"filename": "re:video-wFO9FtxG\|image-sZrQUeOx",`
			`"id": "re:wFO9FtxG\|sZrQUeOx",`
			`"name": "re:video\|image",`
			`"num": int,`
			`},`
[bunkr] fix extraction (#2903) 2022-09-09 11:44:27 +02:00			`}),`
[bunkr] update domain to 'bunkr.la' 2023-03-28 20:10:36 +02:00			`("https://bunkr.la/a/Lktg9Keq"),`
			`("https://bunkr.su/a/Lktg9Keq"),`
			`("https://bunkr.ru/a/Lktg9Keq"),`
			`("https://bunkr.is/a/Lktg9Keq"),`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2022-07-15 12:38:30 +02:00			`("https://bunkr.to/a/Lktg9Keq"),`
			`)`

			`def fetch_album(self, album_id):`
[bunkr] fix extraction (#3636, #3655) 2023-02-15 15:42:32 +01:00			`# album metadata`
			`page = self.request(self.root + "/a/" + self.album_id).text`
			`info = text.split_html(text.extr(`
			`page, "<h1", "</div>").partition(">")[2])`
			`count, _, size = info[1].split(None, 2)`

			`# files`
			`cdn = None`
			`files = []`
			`append = files.append`
			`headers = {"Referer": self.root.replace("://", "://stream.", 1) + "/"}`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2022-07-15 12:38:30 +02:00
[bunkr] fix extraction (#3636, #3655) 2023-02-15 15:42:32 +01:00			`pos = page.index('class="grid-images')`
			`for url in text.extract_iter(page, '<a href="', '"', pos):`
			`if url.startswith("/"):`
			`if not cdn:`
			`# fetch cdn root from download page`
			`durl = "{}/d/{}".format(self.root, url[3:])`
			`cdn = text.extr(self.request(`
			`durl).text, 'link.href = "', '"')`
			`cdn = cdn[:cdn.index("/", 8)]`
			`url = cdn + url[2:]`
[bunkr] use 'media-files' servers for more file types 2022-12-01 18:02:32 +01:00
[bunkr] fix extraction (#3636, #3655) 2023-02-15 15:42:32 +01:00			`url = text.unescape(url)`
			`if url.endswith((".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts",`
			`".zip", ".rar", ".7z")):`
			`append({"file": url.replace("://cdn", "://media-files", 1),`
			`"_http_headers": headers})`
			`else:`
			`append({"file": url})`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2022-07-15 12:38:30 +02:00
[bunkr] fix extraction (#3636, #3655) 2023-02-15 15:42:32 +01:00			`return files, {`
			`"album_id" : self.album_id,`
			`"album_name" : text.unescape(info[0]),`
			`"album_size" : size[1:-1],`
			`"description": text.unescape(info[2]) if len(info) > 2 else "",`
			`"count" : len(files),`
			`}`