diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 4f9e1a1f..398cd113 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -787,6 +787,12 @@ Consider all listed sites to potentially be NSFW. Posts, Tag Searches + + Saint + https://saint2.su/ + Albums, Media Files + + Sankaku Channel https://sankaku.app/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 4e9fa506..b3c812bf 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -139,6 +139,7 @@ modules = [ "reddit", "redgifs", "rule34us", + "saint", "sankaku", "sankakucomplex", "scrolller", diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py index 044f4f51..295b9c49 100644 --- a/gallery_dl/extractor/lolisafe.py +++ b/gallery_dl/extractor/lolisafe.py @@ -46,12 +46,17 @@ class LolisafeAlbumExtractor(LolisafeExtractor): for data["num"], file in enumerate(files, 1): url = file["file"] file.update(data) - text.nameext_from_url(url, file) + + if "extension" not in file: + text.nameext_from_url(url, file) if "name" in file: name = file["name"] file["name"] = name.rpartition(".")[0] or name file["id"] = file["filename"].rpartition("-")[2] + elif "id" in file: + file["name"] = file["filename"] + file["filename"] = "{}-{}".format(file["name"], file["id"]) else: file["name"], sep, file["id"] = \ file["filename"].rpartition("-") diff --git a/gallery_dl/extractor/saint.py b/gallery_dl/extractor/saint.py new file mode 100644 index 00000000..784cdc03 --- /dev/null +++ b/gallery_dl/extractor/saint.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://saint2.su/""" + +from .lolisafe import LolisafeAlbumExtractor +from .. import text + +BASE_PATTERN = r"(?:https?://)?saint\d*\.(?:su|pk|to)" + + +class SaintAlbumExtractor(LolisafeAlbumExtractor): + """Extractor for saint albums""" + category = "saint" + root = "https://saint2.su" + pattern = BASE_PATTERN + r"/a/([^/?#]+)" + example = "https://saint2.su/a/ID" + + def fetch_album(self, album_id): + # album metadata + response = self.request(self.root + "/a/" + album_id) + extr = text.extract_from(response.text) + + title = extr("", "<") + descr = extr('name="description" content="', '"') + files = [] + + while True: + id2 = extr("/thumbs/", "-") + if not id2: + break + files.append({ + "id2" : id2, + "date" : text.parse_timestamp(extr("", ".")), + "id" : extr("/embed/", '"'), + "size" : text.parse_int(extr('data="', '"')), + "file" : text.unescape(extr( + "onclick=\"play(", ")").strip("\"'")), + "id_dl": extr("/d/", ")").rstrip("\"'"), + }) + + return files, { + "album_id" : album_id, + "album_name" : text.unescape(title.rpartition(" - ")[0]), + "album_size" : sum(file["size"] for file in files), + "description" : text.unescape(descr), + "count" : len(files), + "_http_headers": {"Referer": response.url} + } + + +class SaintMediaExtractor(SaintAlbumExtractor): + """Extractor for saint media links""" + subcategory = "media" + directory_fmt = ("{category}",) + pattern = BASE_PATTERN + r"(/(embe)?d/([^/?#]+))" + example = "https://saint2.su/embed/ID" + + def fetch_album(self, album_id): + try: + path, embed, _ = self.groups + + url = self.root + path + response = self.request(url) + extr = text.extract_from(response.text) + + if embed: + file = { + "id" : album_id, + "id2" : extr("/thumbs/", "-"), + "date" : text.parse_timestamp(extr("", ".")), + "file" : text.unescape(extr('<source src="', '"')), + "id_dl": extr("/d/", "'"), + } + + else: # /d/ + file = { + "file" : text.unescape(extr('<a href="', '"')), + "id_dl" : album_id, + "name" : album_id, + "filename" : album_id, + "extension": "mp4", + } + + file["_http_headers"] = {"Referer": response.url} + except Exception as exc: + self.log.error("%s: %s", exc.__class__.__name__, exc) + return (), {} + + return (file,), { + "album_id" : "", + "album_name" : "", + "album_size" : -1, + "description": "", + "count" : 1, + } diff --git a/test/results/saint.py b/test/results/saint.py new file mode 100644 index 00000000..7f14b317 --- /dev/null +++ b/test/results/saint.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import saint + + +__tests__ = ( +{ + "#url" : "https://saint2.su/a/2c5iuWHTumH", + "#class": saint.SaintAlbumExtractor, + "#urls" : ( + "https://cold1.saint2.cr/videos/3b1ccebf3576f8d5aac3ee0e5a12da95.mp4", + "https://cold1.saint2.cr/videos/3b125e3fb4b98693f17d85cb53590215.mp4", + ), + + "album_id" : "2c5iuWHTumH", + "album_name" : "animations", + "album_size" : 37083862, + "count" : 2, + "date" : "type:datetime", + "description": "Descriptions can contain only alphanumeric ASCII characters", + "extension" : "mp4", + "file" : r"re:https://...", + "filename" : {"3b1ccebf3576f8d5aac3ee0e5a12da95-6lC7mKrJst8", + "3b125e3fb4b98693f17d85cb53590215-ze10Ohbpoy5"}, + "id" : {"6lC7mKrJst8", + "ze10Ohbpoy5"}, + "id2" : {"6712834015d67", + "671284a627e0e"}, + "id_dl" : {"M2IxY2NlYmYzNTc2ZjhkNWFhYzNlZTBlNWExMmRhOTUubXA0", + "M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0"}, + "name" : {"3b1ccebf3576f8d5aac3ee0e5a12da95", + "3b125e3fb4b98693f17d85cb53590215"}, + "num" : {1, 2}, +}, + +{ + "#url" : "https://saint2.su/embed/6lC7mKrJst8", + "#class": saint.SaintMediaExtractor, + "#urls" : "https://cold1.saint2.cr/videos/3b1ccebf3576f8d5aac3ee0e5a12da95.mp4", + "#sha1_content": "39037a029b3fe96f838b4545316caaa545c84075", + + "count" : 1, + "date" : "dt:2024-10-18 15:48:16", + "extension": "mp4", + "file" : "https://cold1.saint2.cr/videos/3b1ccebf3576f8d5aac3ee0e5a12da95.mp4", + "filename" : "3b1ccebf3576f8d5aac3ee0e5a12da95-6lC7mKrJst8", + "id" : "6lC7mKrJst8", + "id2" : "6712834015d67", + "id_dl" : "M2IxY2NlYmYzNTc2ZjhkNWFhYzNlZTBlNWExMmRhOTUubXA0", + "name" : "3b1ccebf3576f8d5aac3ee0e5a12da95", + "num" : 1, +}, + +{ + "#url" : "https://saint2.su/d/M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0", + "#class": saint.SaintMediaExtractor, + "#urls" : "https://cold1.saint2.cr/api/download.php?file=M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0", + + "count" : 1, + "extension": "mp4", + "file" : "https://cold1.saint2.cr/api/download.php?file=M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0", + "filename" : "M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0", + "id" : "M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0", + "id_dl" : "M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0", + "name" : "M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0", + "num" : 1, +}, + +{ + "#url" : "https://saint2.pk/embed/6lC7mKrJst8", + "#class": saint.SaintMediaExtractor, +}, + +{ + "#url" : "https://saint.to/embed/6lC7mKrJst8", + "#class": saint.SaintMediaExtractor, +}, + +)