1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-26 12:42:29 +01:00
gallery-dl/gallery_dl/extractor/skeb.py
Alice bfd7401b1e
[skeb] add 'user' and 'post' extractors (#1031) (#1971)
* Create skeb.py

* Update __init__.py

* Update supportedsites.py

* Update supportedsites.md

* Update supportedsites.py

* Update skeb.py
2021-10-26 20:00:41 +02:00

142 lines
5.1 KiB
Python

# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://skeb.jp/"""
from .common import Extractor, Message
from .. import text
class SkebExtractor(Extractor):
"""Base class for skeb extractors"""
category = "skeb"
directory_fmt = ("{category}", "{creator[screen_name]}")
filename_fmt = "{post_num}_{file_id}.{extension}"
archive_fmt = "{post_num}_{file_id}_{content_category}"
root = "https://skeb.jp"
def __init__(self, match):
Extractor.__init__(self, match)
self.user_name = match.group(1)
def items(self):
for post_num in self.posts():
response, post = self._get_post_data(post_num)
yield Message.Directory, post
for data in self._get_urls_from_post(response, post):
url = data["file_url"]
yield Message.Url, url, text.nameext_from_url(url, data)
def posts(self):
"""Return post number"""
def _pagination(self):
url = "{}/api/users/{}/works".format(self.root, self.user_name)
params = {"role": "creator", "sort": "date", "offset": 0}
headers = {"Referer": self.root, "Authorization": "Bearer null"}
while True:
posts = self.request(url, params=params, headers=headers).json()
for post in posts:
post_num = post["path"].rpartition("/")[2]
if post["private"]:
self.log.debug("Skipping %s (private)", post_num)
continue
yield post_num
if len(posts) < 30:
return
params["offset"] += 30
def _get_post_data(self, post_num):
url = "{}/api/users/{}/works/{}".format(
self.root, self.user_name, post_num)
headers = {"Referer": self.root, "Authorization": "Bearer null"}
resp = self.request(url, headers=headers).json()
creator = resp["creator"]
post = {
"post_num" : post_num,
"post_url" : self.root + resp["path"],
"body" : resp["body"],
"source_body" : resp["source_body"],
"translated_body" : resp["translated"],
"completed_at" : resp["completed_at"],
"date" : text.parse_datetime(
resp["completed_at"], "%Y-%m-%dT%H:%M:%S.%fZ"),
"nsfw" : resp["nsfw"],
"anonymous" : resp["anonymous"],
"tags" : resp["tag_list"],
"genre" : resp["genre"],
"thanks" : resp["thanks"],
"source_thanks" : resp["source_thanks"],
"translated_thanks": resp["translated_thanks"],
"creator": {
"id" : creator["id"],
"name" : creator["name"],
"screen_name" : creator["screen_name"],
"avatar_url" : creator["avatar_url"],
"header_url" : creator["header_url"],
}
}
if not resp["anonymous"] and "client" in resp:
client = resp["client"]
post["client"] = {
"id" : client["id"],
"name" : client["name"],
"screen_name" : client["screen_name"],
"avatar_url" : client["avatar_url"],
"header_url" : client["header_url"],
}
return resp, post
def _get_urls_from_post(self, resp, post):
if "og_image_url" in resp:
post["content_category"] = "thumb"
post["file_id"] = "thumb"
post["file_url"] = resp["og_image_url"]
yield post
for preview in resp["previews"]:
post["content_category"] = "preview"
post["file_id"] = preview["id"]
post["file_url"] = preview["url"]
info = preview["information"]
post["original"] = {
"width" : info["width"],
"height" : info["height"],
"byte_size" : info["byte_size"],
"duration" : info["duration"],
"frame_rate": info["frame_rate"],
"software" : info["software"],
"extension" : info["extension"],
"is_movie" : info["is_movie"],
"transcoder": info["transcoder"],
}
yield post
class SkebPostExtractor(SkebExtractor):
"""Extractor for a single skeb post"""
subcategory = "post"
pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/works/(\d+)"
def __init__(self, match):
SkebExtractor.__init__(self, match)
self.post_num = match.group(2)
def posts(self):
return (self.post_num,)
class SkebUserExtractor(SkebExtractor):
"""Extractor for all posts from a skeb user"""
subcategory = "user"
pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)"
def posts(self):
return self._pagination()