1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-24 19:52:32 +01:00

[saint] add 'album' and 'media' extractors (#4405, #6324)

This commit is contained in:
Mike Fährmann 2024-10-27 22:22:43 +01:00
parent 061b27f329
commit 10c076e7f2
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
5 changed files with 197 additions and 1 deletions

View File

@ -787,6 +787,12 @@ Consider all listed sites to potentially be NSFW.
<td>Posts, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>Saint</td>
<td>https://saint2.su/</td>
<td>Albums, Media Files</td>
<td></td>
</tr>
<tr>
<td>Sankaku Channel</td>
<td>https://sankaku.app/</td>

View File

@ -139,6 +139,7 @@ modules = [
"reddit",
"redgifs",
"rule34us",
"saint",
"sankaku",
"sankakucomplex",
"scrolller",

View File

@ -46,12 +46,17 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
for data["num"], file in enumerate(files, 1):
url = file["file"]
file.update(data)
text.nameext_from_url(url, file)
if "extension" not in file:
text.nameext_from_url(url, file)
if "name" in file:
name = file["name"]
file["name"] = name.rpartition(".")[0] or name
file["id"] = file["filename"].rpartition("-")[2]
elif "id" in file:
file["name"] = file["filename"]
file["filename"] = "{}-{}".format(file["name"], file["id"])
else:
file["name"], sep, file["id"] = \
file["filename"].rpartition("-")

View File

@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
# Copyright 2024 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://saint2.su/"""
from .lolisafe import LolisafeAlbumExtractor
from .. import text
BASE_PATTERN = r"(?:https?://)?saint\d*\.(?:su|pk|to)"
class SaintAlbumExtractor(LolisafeAlbumExtractor):
"""Extractor for saint albums"""
category = "saint"
root = "https://saint2.su"
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
example = "https://saint2.su/a/ID"
def fetch_album(self, album_id):
# album metadata
response = self.request(self.root + "/a/" + album_id)
extr = text.extract_from(response.text)
title = extr("<title>", "<")
descr = extr('name="description" content="', '"')
files = []
while True:
id2 = extr("/thumbs/", "-")
if not id2:
break
files.append({
"id2" : id2,
"date" : text.parse_timestamp(extr("", ".")),
"id" : extr("/embed/", '"'),
"size" : text.parse_int(extr('data="', '"')),
"file" : text.unescape(extr(
"onclick=\"play(", ")").strip("\"'")),
"id_dl": extr("/d/", ")").rstrip("\"'"),
})
return files, {
"album_id" : album_id,
"album_name" : text.unescape(title.rpartition(" - ")[0]),
"album_size" : sum(file["size"] for file in files),
"description" : text.unescape(descr),
"count" : len(files),
"_http_headers": {"Referer": response.url}
}
class SaintMediaExtractor(SaintAlbumExtractor):
"""Extractor for saint media links"""
subcategory = "media"
directory_fmt = ("{category}",)
pattern = BASE_PATTERN + r"(/(embe)?d/([^/?#]+))"
example = "https://saint2.su/embed/ID"
def fetch_album(self, album_id):
try:
path, embed, _ = self.groups
url = self.root + path
response = self.request(url)
extr = text.extract_from(response.text)
if embed:
file = {
"id" : album_id,
"id2" : extr("/thumbs/", "-"),
"date" : text.parse_timestamp(extr("", ".")),
"file" : text.unescape(extr('<source src="', '"')),
"id_dl": extr("/d/", "'"),
}
else: # /d/
file = {
"file" : text.unescape(extr('<a href="', '"')),
"id_dl" : album_id,
"name" : album_id,
"filename" : album_id,
"extension": "mp4",
}
file["_http_headers"] = {"Referer": response.url}
except Exception as exc:
self.log.error("%s: %s", exc.__class__.__name__, exc)
return (), {}
return (file,), {
"album_id" : "",
"album_name" : "",
"album_size" : -1,
"description": "",
"count" : 1,
}

83
test/results/saint.py Normal file
View File

@ -0,0 +1,83 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import saint
__tests__ = (
{
"#url" : "https://saint2.su/a/2c5iuWHTumH",
"#class": saint.SaintAlbumExtractor,
"#urls" : (
"https://cold1.saint2.cr/videos/3b1ccebf3576f8d5aac3ee0e5a12da95.mp4",
"https://cold1.saint2.cr/videos/3b125e3fb4b98693f17d85cb53590215.mp4",
),
"album_id" : "2c5iuWHTumH",
"album_name" : "animations",
"album_size" : 37083862,
"count" : 2,
"date" : "type:datetime",
"description": "Descriptions can contain only alphanumeric ASCII characters",
"extension" : "mp4",
"file" : r"re:https://...",
"filename" : {"3b1ccebf3576f8d5aac3ee0e5a12da95-6lC7mKrJst8",
"3b125e3fb4b98693f17d85cb53590215-ze10Ohbpoy5"},
"id" : {"6lC7mKrJst8",
"ze10Ohbpoy5"},
"id2" : {"6712834015d67",
"671284a627e0e"},
"id_dl" : {"M2IxY2NlYmYzNTc2ZjhkNWFhYzNlZTBlNWExMmRhOTUubXA0",
"M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0"},
"name" : {"3b1ccebf3576f8d5aac3ee0e5a12da95",
"3b125e3fb4b98693f17d85cb53590215"},
"num" : {1, 2},
},
{
"#url" : "https://saint2.su/embed/6lC7mKrJst8",
"#class": saint.SaintMediaExtractor,
"#urls" : "https://cold1.saint2.cr/videos/3b1ccebf3576f8d5aac3ee0e5a12da95.mp4",
"#sha1_content": "39037a029b3fe96f838b4545316caaa545c84075",
"count" : 1,
"date" : "dt:2024-10-18 15:48:16",
"extension": "mp4",
"file" : "https://cold1.saint2.cr/videos/3b1ccebf3576f8d5aac3ee0e5a12da95.mp4",
"filename" : "3b1ccebf3576f8d5aac3ee0e5a12da95-6lC7mKrJst8",
"id" : "6lC7mKrJst8",
"id2" : "6712834015d67",
"id_dl" : "M2IxY2NlYmYzNTc2ZjhkNWFhYzNlZTBlNWExMmRhOTUubXA0",
"name" : "3b1ccebf3576f8d5aac3ee0e5a12da95",
"num" : 1,
},
{
"#url" : "https://saint2.su/d/M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0",
"#class": saint.SaintMediaExtractor,
"#urls" : "https://cold1.saint2.cr/api/download.php?file=M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0",
"count" : 1,
"extension": "mp4",
"file" : "https://cold1.saint2.cr/api/download.php?file=M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0",
"filename" : "M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0",
"id" : "M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0",
"id_dl" : "M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0",
"name" : "M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0",
"num" : 1,
},
{
"#url" : "https://saint2.pk/embed/6lC7mKrJst8",
"#class": saint.SaintMediaExtractor,
},
{
"#url" : "https://saint.to/embed/6lC7mKrJst8",
"#class": saint.SaintMediaExtractor,
},
)