From bfd7401b1e888f6cc4af038564ccaf5c2631dada Mon Sep 17 00:00:00 2001 From: Alice <38675581+alice945@users.noreply.github.com> Date: Tue, 26 Oct 2021 11:00:41 -0700 Subject: [PATCH] [skeb] add 'user' and 'post' extractors (#1031) (#1971) * Create skeb.py * Update __init__.py * Update supportedsites.py * Update supportedsites.md * Update supportedsites.py * Update skeb.py --- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/skeb.py | 141 +++++++++++++++++++++++++++++++ 3 files changed, 148 insertions(+) create mode 100644 gallery_dl/extractor/skeb.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 94fc492a..335b714b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -697,6 +697,12 @@ Consider all sites to be NSFW unless otherwise known. Galleries, individual Images, Videos + + Skeb + https://skeb.jp/ + Posts, User Profiles + + SlickPic https://www.slickpic.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 93702ab7..79fe9713 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -115,6 +115,7 @@ modules = [ "senmanga", "sexcom", "simplyhentai", + "skeb", "slickpic", "slideshare", "smugmug", diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py new file mode 100644 index 00000000..c1a8878d --- /dev/null +++ b/gallery_dl/extractor/skeb.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://skeb.jp/""" + +from .common import Extractor, Message +from .. import text + + +class SkebExtractor(Extractor): + """Base class for skeb extractors""" + category = "skeb" + directory_fmt = ("{category}", "{creator[screen_name]}") + filename_fmt = "{post_num}_{file_id}.{extension}" + archive_fmt = "{post_num}_{file_id}_{content_category}" + root = "https://skeb.jp" + + def __init__(self, match): + Extractor.__init__(self, match) + self.user_name = match.group(1) + + def items(self): + for post_num in self.posts(): + response, post = self._get_post_data(post_num) + yield Message.Directory, post + for data in self._get_urls_from_post(response, post): + url = data["file_url"] + yield Message.Url, url, text.nameext_from_url(url, data) + + def posts(self): + """Return post number""" + + def _pagination(self): + url = "{}/api/users/{}/works".format(self.root, self.user_name) + params = {"role": "creator", "sort": "date", "offset": 0} + headers = {"Referer": self.root, "Authorization": "Bearer null"} + + while True: + posts = self.request(url, params=params, headers=headers).json() + + for post in posts: + post_num = post["path"].rpartition("/")[2] + if post["private"]: + self.log.debug("Skipping %s (private)", post_num) + continue + yield post_num + + if len(posts) < 30: + return + params["offset"] += 30 + + def _get_post_data(self, post_num): + url = "{}/api/users/{}/works/{}".format( + self.root, self.user_name, post_num) + headers = {"Referer": self.root, "Authorization": "Bearer null"} + resp = self.request(url, headers=headers).json() + creator = resp["creator"] + post = { + "post_num" : post_num, + "post_url" : self.root + resp["path"], + "body" : resp["body"], + "source_body" : resp["source_body"], + "translated_body" : resp["translated"], + "completed_at" : resp["completed_at"], + "date" : text.parse_datetime( + resp["completed_at"], "%Y-%m-%dT%H:%M:%S.%fZ"), + "nsfw" : resp["nsfw"], + "anonymous" : resp["anonymous"], + "tags" : resp["tag_list"], + "genre" : resp["genre"], + "thanks" : resp["thanks"], + "source_thanks" : resp["source_thanks"], + "translated_thanks": resp["translated_thanks"], + "creator": { + "id" : creator["id"], + "name" : creator["name"], + "screen_name" : creator["screen_name"], + "avatar_url" : creator["avatar_url"], + "header_url" : creator["header_url"], + } + } + if not resp["anonymous"] and "client" in resp: + client = resp["client"] + post["client"] = { + "id" : client["id"], + "name" : client["name"], + "screen_name" : client["screen_name"], + "avatar_url" : client["avatar_url"], + "header_url" : client["header_url"], + } + return resp, post + + def _get_urls_from_post(self, resp, post): + if "og_image_url" in resp: + post["content_category"] = "thumb" + post["file_id"] = "thumb" + post["file_url"] = resp["og_image_url"] + yield post + + for preview in resp["previews"]: + post["content_category"] = "preview" + post["file_id"] = preview["id"] + post["file_url"] = preview["url"] + info = preview["information"] + post["original"] = { + "width" : info["width"], + "height" : info["height"], + "byte_size" : info["byte_size"], + "duration" : info["duration"], + "frame_rate": info["frame_rate"], + "software" : info["software"], + "extension" : info["extension"], + "is_movie" : info["is_movie"], + "transcoder": info["transcoder"], + } + yield post + + +class SkebPostExtractor(SkebExtractor): + """Extractor for a single skeb post""" + subcategory = "post" + pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/works/(\d+)" + + def __init__(self, match): + SkebExtractor.__init__(self, match) + self.post_num = match.group(2) + + def posts(self): + return (self.post_num,) + + +class SkebUserExtractor(SkebExtractor): + """Extractor for all posts from a skeb user""" + subcategory = "user" + pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)" + + def posts(self): + return self._pagination()