From bfd7401b1e888f6cc4af038564ccaf5c2631dada Mon Sep 17 00:00:00 2001
From: Alice <38675581+alice945@users.noreply.github.com>
Date: Tue, 26 Oct 2021 11:00:41 -0700
Subject: [PATCH] [skeb] add 'user' and 'post' extractors (#1031) (#1971)
* Create skeb.py
* Update __init__.py
* Update supportedsites.py
* Update supportedsites.md
* Update supportedsites.py
* Update skeb.py
---
docs/supportedsites.md | 6 ++
gallery_dl/extractor/__init__.py | 1 +
gallery_dl/extractor/skeb.py | 141 +++++++++++++++++++++++++++++++
3 files changed, 148 insertions(+)
create mode 100644 gallery_dl/extractor/skeb.py
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 94fc492a..335b714b 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -697,6 +697,12 @@ Consider all sites to be NSFW unless otherwise known.
Galleries, individual Images, Videos |
|
+
+ Skeb |
+ https://skeb.jp/ |
+ Posts, User Profiles |
+ |
+
SlickPic |
https://www.slickpic.com/ |
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 93702ab7..79fe9713 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -115,6 +115,7 @@ modules = [
"senmanga",
"sexcom",
"simplyhentai",
+ "skeb",
"slickpic",
"slideshare",
"smugmug",
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
new file mode 100644
index 00000000..c1a8878d
--- /dev/null
+++ b/gallery_dl/extractor/skeb.py
@@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://skeb.jp/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class SkebExtractor(Extractor):
+ """Base class for skeb extractors"""
+ category = "skeb"
+ directory_fmt = ("{category}", "{creator[screen_name]}")
+ filename_fmt = "{post_num}_{file_id}.{extension}"
+ archive_fmt = "{post_num}_{file_id}_{content_category}"
+ root = "https://skeb.jp"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user_name = match.group(1)
+
+ def items(self):
+ for post_num in self.posts():
+ response, post = self._get_post_data(post_num)
+ yield Message.Directory, post
+ for data in self._get_urls_from_post(response, post):
+ url = data["file_url"]
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+ def posts(self):
+ """Return post number"""
+
+ def _pagination(self):
+ url = "{}/api/users/{}/works".format(self.root, self.user_name)
+ params = {"role": "creator", "sort": "date", "offset": 0}
+ headers = {"Referer": self.root, "Authorization": "Bearer null"}
+
+ while True:
+ posts = self.request(url, params=params, headers=headers).json()
+
+ for post in posts:
+ post_num = post["path"].rpartition("/")[2]
+ if post["private"]:
+ self.log.debug("Skipping %s (private)", post_num)
+ continue
+ yield post_num
+
+ if len(posts) < 30:
+ return
+ params["offset"] += 30
+
+ def _get_post_data(self, post_num):
+ url = "{}/api/users/{}/works/{}".format(
+ self.root, self.user_name, post_num)
+ headers = {"Referer": self.root, "Authorization": "Bearer null"}
+ resp = self.request(url, headers=headers).json()
+ creator = resp["creator"]
+ post = {
+ "post_num" : post_num,
+ "post_url" : self.root + resp["path"],
+ "body" : resp["body"],
+ "source_body" : resp["source_body"],
+ "translated_body" : resp["translated"],
+ "completed_at" : resp["completed_at"],
+ "date" : text.parse_datetime(
+ resp["completed_at"], "%Y-%m-%dT%H:%M:%S.%fZ"),
+ "nsfw" : resp["nsfw"],
+ "anonymous" : resp["anonymous"],
+ "tags" : resp["tag_list"],
+ "genre" : resp["genre"],
+ "thanks" : resp["thanks"],
+ "source_thanks" : resp["source_thanks"],
+ "translated_thanks": resp["translated_thanks"],
+ "creator": {
+ "id" : creator["id"],
+ "name" : creator["name"],
+ "screen_name" : creator["screen_name"],
+ "avatar_url" : creator["avatar_url"],
+ "header_url" : creator["header_url"],
+ }
+ }
+ if not resp["anonymous"] and "client" in resp:
+ client = resp["client"]
+ post["client"] = {
+ "id" : client["id"],
+ "name" : client["name"],
+ "screen_name" : client["screen_name"],
+ "avatar_url" : client["avatar_url"],
+ "header_url" : client["header_url"],
+ }
+ return resp, post
+
+ def _get_urls_from_post(self, resp, post):
+ if "og_image_url" in resp:
+ post["content_category"] = "thumb"
+ post["file_id"] = "thumb"
+ post["file_url"] = resp["og_image_url"]
+ yield post
+
+ for preview in resp["previews"]:
+ post["content_category"] = "preview"
+ post["file_id"] = preview["id"]
+ post["file_url"] = preview["url"]
+ info = preview["information"]
+ post["original"] = {
+ "width" : info["width"],
+ "height" : info["height"],
+ "byte_size" : info["byte_size"],
+ "duration" : info["duration"],
+ "frame_rate": info["frame_rate"],
+ "software" : info["software"],
+ "extension" : info["extension"],
+ "is_movie" : info["is_movie"],
+ "transcoder": info["transcoder"],
+ }
+ yield post
+
+
+class SkebPostExtractor(SkebExtractor):
+ """Extractor for a single skeb post"""
+ subcategory = "post"
+ pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/works/(\d+)"
+
+ def __init__(self, match):
+ SkebExtractor.__init__(self, match)
+ self.post_num = match.group(2)
+
+ def posts(self):
+ return (self.post_num,)
+
+
+class SkebUserExtractor(SkebExtractor):
+ """Extractor for all posts from a skeb user"""
+ subcategory = "user"
+ pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)"
+
+ def posts(self):
+ return self._pagination()