mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 02:32:33 +01:00
[pixiv] implement workaround for 'limit_sanity_level' works
(#4327, #4747, #5054, #5435, #5651, #5655) Metadata should be ~95% identical (there might be some 'date' differences) and there could be issues with R-18 works, as these require some URL manipulation to transform /c/250x250_80_a2/ thumbnail URLs into /img-original/ ones.
This commit is contained in:
parent
d1432d02a1
commit
c5be50fdaa
@ -3525,6 +3525,16 @@ Description
|
|||||||
A value of ``0`` means no limit.
|
A value of ``0`` means no limit.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.pixiv.sanity
|
||||||
|
----------------------
|
||||||
|
Type
|
||||||
|
``bool``
|
||||||
|
Default
|
||||||
|
``true``
|
||||||
|
Description
|
||||||
|
Try to fetch ``limit_sanity_level`` works via web API.
|
||||||
|
|
||||||
|
|
||||||
extractor.plurk.comments
|
extractor.plurk.comments
|
||||||
------------------------
|
------------------------
|
||||||
Type
|
Type
|
||||||
|
@ -27,13 +27,14 @@ class PixivExtractor(Extractor):
|
|||||||
filename_fmt = "{id}_p{num}.{extension}"
|
filename_fmt = "{id}_p{num}.{extension}"
|
||||||
archive_fmt = "{id}{suffix}.{extension}"
|
archive_fmt = "{id}{suffix}.{extension}"
|
||||||
cookies_domain = None
|
cookies_domain = None
|
||||||
url_sanity = ("https://s.pximg.net/common/images"
|
sanity_url = ("https://s.pximg.net/common/images"
|
||||||
"/limit_sanity_level_360.png")
|
"/limit_sanity_level_360.png")
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.api = PixivAppAPI(self)
|
self.api = PixivAppAPI(self)
|
||||||
self.load_ugoira = self.config("ugoira", True)
|
self.load_ugoira = self.config("ugoira", True)
|
||||||
self.max_posts = self.config("max-posts", 0)
|
self.max_posts = self.config("max-posts", 0)
|
||||||
|
self.sanity_workaround = self.config("sanity", True)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
tags = self.config("tags", "japanese")
|
tags = self.config("tags", "japanese")
|
||||||
@ -102,10 +103,14 @@ class PixivExtractor(Extractor):
|
|||||||
|
|
||||||
elif work["page_count"] == 1:
|
elif work["page_count"] == 1:
|
||||||
url = meta_single_page["original_image_url"]
|
url = meta_single_page["original_image_url"]
|
||||||
if url == self.url_sanity:
|
if url == self.sanity_url:
|
||||||
self.log.warning(
|
if self.sanity_workaround:
|
||||||
"Unable to download work %s ('sanity_level' warning)",
|
self.log.warning("%s: 'sanity_level' warning", work["id"])
|
||||||
work["id"])
|
self._extract_ajax(work, files)
|
||||||
|
else:
|
||||||
|
self.log.warning(
|
||||||
|
"%s: Unable to download work ('sanity_level' warning)",
|
||||||
|
work["id"])
|
||||||
else:
|
else:
|
||||||
files.append({"url": url})
|
files.append({"url": url})
|
||||||
|
|
||||||
@ -147,13 +152,93 @@ class PixivExtractor(Extractor):
|
|||||||
"num": num,
|
"num": num,
|
||||||
"suffix": "_p{:02}".format(num),
|
"suffix": "_p{:02}".format(num),
|
||||||
"_ugoira_frame_index": num,
|
"_ugoira_frame_index": num,
|
||||||
|
|
||||||
}))
|
}))
|
||||||
else:
|
else:
|
||||||
files.append({
|
files.append({
|
||||||
"url": url.replace("_ugoira600x600", "_ugoira1920x1080", 1),
|
"url": url.replace("_ugoira600x600", "_ugoira1920x1080", 1),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
def _extract_ajax(self, work, files):
|
||||||
|
url = "{}/ajax/illust/{}".format(self.root, work["id"])
|
||||||
|
data = self.request(url, headers=self.headers_web).json()
|
||||||
|
body = data["body"]
|
||||||
|
|
||||||
|
for key_app, key_ajax in (
|
||||||
|
("title" , "illustTitle"),
|
||||||
|
("image_urls" , "urls"),
|
||||||
|
("caption" , "illustComment"),
|
||||||
|
("create_date" , "createDate"),
|
||||||
|
("width" , "width"),
|
||||||
|
("height" , "height"),
|
||||||
|
("sanity_level" , "sl"),
|
||||||
|
("total_view" , "viewCount"),
|
||||||
|
("total_comments" , "commentCount"),
|
||||||
|
("total_bookmarks" , "bookmarkCount"),
|
||||||
|
("restrict" , "restrict"),
|
||||||
|
("x_restrict" , "xRestrict"),
|
||||||
|
("illust_ai_type" , "aiType"),
|
||||||
|
("illust_book_style", "bookStyle"),
|
||||||
|
):
|
||||||
|
work[key_app] = body[key_ajax]
|
||||||
|
|
||||||
|
work["user"] = {
|
||||||
|
"account" : body["userAccount"],
|
||||||
|
"id" : int(body["userId"]),
|
||||||
|
"is_followed": False,
|
||||||
|
"name" : body["userName"],
|
||||||
|
"profile_image_urls": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
work["tags"] = tags = []
|
||||||
|
for tag in body["tags"]["tags"]:
|
||||||
|
name = tag["tag"]
|
||||||
|
try:
|
||||||
|
translated_name = tag["translation"]["en"]
|
||||||
|
except Exception:
|
||||||
|
translated_name = None
|
||||||
|
tags.append({"name": name, "translated_name": translated_name})
|
||||||
|
|
||||||
|
url = self._extract_ajax_url(body)
|
||||||
|
if not url:
|
||||||
|
return
|
||||||
|
|
||||||
|
work["page_count"] = count = body["pageCount"]
|
||||||
|
if count == 1:
|
||||||
|
files.append({"url": url})
|
||||||
|
else:
|
||||||
|
base, _, ext = url.rpartition("_p0.")
|
||||||
|
for num in range(count):
|
||||||
|
url = "{}_p{}.{}".format(base, num, ext)
|
||||||
|
files.append({
|
||||||
|
"url" : url,
|
||||||
|
"suffix": "_p{:02}".format(num),
|
||||||
|
})
|
||||||
|
|
||||||
|
def _extract_ajax_url(self, body):
|
||||||
|
try:
|
||||||
|
original = body["urls"]["original"]
|
||||||
|
if original:
|
||||||
|
return original
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
square1200 = body["userIllusts"][body["id"]]["url"]
|
||||||
|
except KeyError:
|
||||||
|
return
|
||||||
|
parts = square1200.rpartition("_p0")[0].split("/")
|
||||||
|
del parts[3:5]
|
||||||
|
parts[3] = "img-original"
|
||||||
|
base = "/".join(parts)
|
||||||
|
|
||||||
|
for ext in ("jpg", "png", "gif"):
|
||||||
|
try:
|
||||||
|
url = "{}_p0.{}".format(base, ext)
|
||||||
|
self.request(url, method="HEAD")
|
||||||
|
return url
|
||||||
|
except exception.HttpError:
|
||||||
|
pass
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _date_from_url(url, offset=timedelta(hours=9)):
|
def _date_from_url(url, offset=timedelta(hours=9)):
|
||||||
try:
|
try:
|
||||||
@ -860,6 +945,7 @@ class PixivAppAPI():
|
|||||||
self.username = extractor._get_auth_info()[0]
|
self.username = extractor._get_auth_info()[0]
|
||||||
self.user = None
|
self.user = None
|
||||||
|
|
||||||
|
extractor.headers_web = extractor.session.headers.copy()
|
||||||
extractor.session.headers.update({
|
extractor.session.headers.update({
|
||||||
"App-OS" : "ios",
|
"App-OS" : "ios",
|
||||||
"App-OS-Version": "16.7.2",
|
"App-OS-Version": "16.7.2",
|
||||||
|
@ -184,11 +184,88 @@ __tests__ = (
|
|||||||
{
|
{
|
||||||
"#url" : "https://www.pixiv.net/artworks/85960783",
|
"#url" : "https://www.pixiv.net/artworks/85960783",
|
||||||
"#comment" : "limit_sanity_level_360.png (#4327, #5180)",
|
"#comment" : "limit_sanity_level_360.png (#4327, #5180)",
|
||||||
"#category": ("", "pixiv", "work"),
|
|
||||||
"#class" : pixiv.PixivWorkExtractor,
|
"#class" : pixiv.PixivWorkExtractor,
|
||||||
|
"#options" : {"sanity": False},
|
||||||
"#count" : 0,
|
"#count" : 0,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://www.pixiv.net/en/artworks/102932581",
|
||||||
|
"#comment" : "limit_sanity_level_360.png (#4327, #5180)",
|
||||||
|
"#class" : pixiv.PixivWorkExtractor,
|
||||||
|
"#options" : {"sanity": True},
|
||||||
|
"#urls" : "https://i.pximg.net/img-original/img/2022/11/20/00/00/49/102932581_p0.jpg",
|
||||||
|
|
||||||
|
"caption" : "Meet a deer .",
|
||||||
|
"comment_access_control": 0,
|
||||||
|
"create_date" : "2022-11-19T15:00:00+00:00",
|
||||||
|
"date" : "dt:2022-11-19 15:00:00",
|
||||||
|
"date_url" : "dt:2022-11-19 15:00:49",
|
||||||
|
"extension" : "jpg",
|
||||||
|
"filename" : "102932581_p0",
|
||||||
|
"height" : 3840,
|
||||||
|
"id" : 102932581,
|
||||||
|
"illust_ai_type": 1,
|
||||||
|
"illust_book_style": 0,
|
||||||
|
"is_bookmarked" : False,
|
||||||
|
"is_muted" : False,
|
||||||
|
"num" : 0,
|
||||||
|
"page_count" : 1,
|
||||||
|
"rating" : "General",
|
||||||
|
"restrict" : 0,
|
||||||
|
"sanity_level" : 2,
|
||||||
|
"series" : None,
|
||||||
|
"suffix" : "",
|
||||||
|
"title" : "《 Bridge and Deer 》",
|
||||||
|
"tools" : [],
|
||||||
|
"total_bookmarks": range(1900, 3000),
|
||||||
|
"total_comments": range(3, 10),
|
||||||
|
"total_view" : range(11000, 20000),
|
||||||
|
"type" : "illust",
|
||||||
|
"url" : "https://i.pximg.net/img-original/img/2022/11/20/00/00/49/102932581_p0.jpg",
|
||||||
|
"visible" : False,
|
||||||
|
"width" : 2160,
|
||||||
|
"x_restrict" : 0,
|
||||||
|
"image_urls" : {
|
||||||
|
"mini" : "https://i.pximg.net/c/48x48/custom-thumb/img/2022/11/20/00/00/49/102932581_p0_custom1200.jpg",
|
||||||
|
"original": "https://i.pximg.net/img-original/img/2022/11/20/00/00/49/102932581_p0.jpg",
|
||||||
|
"regular" : "https://i.pximg.net/img-master/img/2022/11/20/00/00/49/102932581_p0_master1200.jpg",
|
||||||
|
"small" : "https://i.pximg.net/c/540x540_70/img-master/img/2022/11/20/00/00/49/102932581_p0_master1200.jpg",
|
||||||
|
"thumb" : "https://i.pximg.net/c/250x250_80_a2/custom-thumb/img/2022/11/20/00/00/49/102932581_p0_custom1200.jpg",
|
||||||
|
},
|
||||||
|
"tags" : [
|
||||||
|
"オリジナル",
|
||||||
|
"風景",
|
||||||
|
"イラスト",
|
||||||
|
"illustration",
|
||||||
|
"美しい",
|
||||||
|
"女の子",
|
||||||
|
"少女",
|
||||||
|
"deer",
|
||||||
|
"flower",
|
||||||
|
"spring",
|
||||||
|
],
|
||||||
|
"user" : {
|
||||||
|
"account" : "805482263",
|
||||||
|
"id" : 7386235,
|
||||||
|
"is_followed": False,
|
||||||
|
"name" : "岛的鲸",
|
||||||
|
"profile_image_urls": {},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://www.pixiv.net/en/artworks/109487939",
|
||||||
|
"#comment" : "R-18 limit_sanity_level_360.png (#4327, #5180)",
|
||||||
|
"#class" : pixiv.PixivWorkExtractor,
|
||||||
|
"#urls" : [
|
||||||
|
"https://i.pximg.net/img-original/img/2023/07/01/00/06/28/109487939_p0.png",
|
||||||
|
"https://i.pximg.net/img-original/img/2023/07/01/00/06/28/109487939_p1.png",
|
||||||
|
"https://i.pximg.net/img-original/img/2023/07/01/00/06/28/109487939_p2.png",
|
||||||
|
"https://i.pximg.net/img-original/img/2023/07/01/00/06/28/109487939_p3.png",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://www.pixiv.net/en/artworks/966412",
|
"#url" : "https://www.pixiv.net/en/artworks/966412",
|
||||||
"#category": ("", "pixiv", "work"),
|
"#category": ("", "pixiv", "work"),
|
||||||
|
Loading…
Reference in New Issue
Block a user