mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 02:32:33 +01:00
[pixiv] implement workaround for 'limit_sanity_level' works
(#4327, #4747, #5054, #5435, #5651, #5655) Metadata should be ~95% identical (there might be some 'date' differences) and there could be issues with R-18 works, as these require some URL manipulation to transform /c/250x250_80_a2/ thumbnail URLs into /img-original/ ones.
This commit is contained in:
parent
d1432d02a1
commit
c5be50fdaa
@ -3525,6 +3525,16 @@ Description
|
||||
A value of ``0`` means no limit.
|
||||
|
||||
|
||||
extractor.pixiv.sanity
|
||||
----------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``true``
|
||||
Description
|
||||
Try to fetch ``limit_sanity_level`` works via web API.
|
||||
|
||||
|
||||
extractor.plurk.comments
|
||||
------------------------
|
||||
Type
|
||||
|
@ -27,13 +27,14 @@ class PixivExtractor(Extractor):
|
||||
filename_fmt = "{id}_p{num}.{extension}"
|
||||
archive_fmt = "{id}{suffix}.{extension}"
|
||||
cookies_domain = None
|
||||
url_sanity = ("https://s.pximg.net/common/images"
|
||||
sanity_url = ("https://s.pximg.net/common/images"
|
||||
"/limit_sanity_level_360.png")
|
||||
|
||||
def _init(self):
|
||||
self.api = PixivAppAPI(self)
|
||||
self.load_ugoira = self.config("ugoira", True)
|
||||
self.max_posts = self.config("max-posts", 0)
|
||||
self.sanity_workaround = self.config("sanity", True)
|
||||
|
||||
def items(self):
|
||||
tags = self.config("tags", "japanese")
|
||||
@ -102,9 +103,13 @@ class PixivExtractor(Extractor):
|
||||
|
||||
elif work["page_count"] == 1:
|
||||
url = meta_single_page["original_image_url"]
|
||||
if url == self.url_sanity:
|
||||
if url == self.sanity_url:
|
||||
if self.sanity_workaround:
|
||||
self.log.warning("%s: 'sanity_level' warning", work["id"])
|
||||
self._extract_ajax(work, files)
|
||||
else:
|
||||
self.log.warning(
|
||||
"Unable to download work %s ('sanity_level' warning)",
|
||||
"%s: Unable to download work ('sanity_level' warning)",
|
||||
work["id"])
|
||||
else:
|
||||
files.append({"url": url})
|
||||
@ -147,13 +152,93 @@ class PixivExtractor(Extractor):
|
||||
"num": num,
|
||||
"suffix": "_p{:02}".format(num),
|
||||
"_ugoira_frame_index": num,
|
||||
|
||||
}))
|
||||
else:
|
||||
files.append({
|
||||
"url": url.replace("_ugoira600x600", "_ugoira1920x1080", 1),
|
||||
})
|
||||
|
||||
def _extract_ajax(self, work, files):
|
||||
url = "{}/ajax/illust/{}".format(self.root, work["id"])
|
||||
data = self.request(url, headers=self.headers_web).json()
|
||||
body = data["body"]
|
||||
|
||||
for key_app, key_ajax in (
|
||||
("title" , "illustTitle"),
|
||||
("image_urls" , "urls"),
|
||||
("caption" , "illustComment"),
|
||||
("create_date" , "createDate"),
|
||||
("width" , "width"),
|
||||
("height" , "height"),
|
||||
("sanity_level" , "sl"),
|
||||
("total_view" , "viewCount"),
|
||||
("total_comments" , "commentCount"),
|
||||
("total_bookmarks" , "bookmarkCount"),
|
||||
("restrict" , "restrict"),
|
||||
("x_restrict" , "xRestrict"),
|
||||
("illust_ai_type" , "aiType"),
|
||||
("illust_book_style", "bookStyle"),
|
||||
):
|
||||
work[key_app] = body[key_ajax]
|
||||
|
||||
work["user"] = {
|
||||
"account" : body["userAccount"],
|
||||
"id" : int(body["userId"]),
|
||||
"is_followed": False,
|
||||
"name" : body["userName"],
|
||||
"profile_image_urls": {},
|
||||
}
|
||||
|
||||
work["tags"] = tags = []
|
||||
for tag in body["tags"]["tags"]:
|
||||
name = tag["tag"]
|
||||
try:
|
||||
translated_name = tag["translation"]["en"]
|
||||
except Exception:
|
||||
translated_name = None
|
||||
tags.append({"name": name, "translated_name": translated_name})
|
||||
|
||||
url = self._extract_ajax_url(body)
|
||||
if not url:
|
||||
return
|
||||
|
||||
work["page_count"] = count = body["pageCount"]
|
||||
if count == 1:
|
||||
files.append({"url": url})
|
||||
else:
|
||||
base, _, ext = url.rpartition("_p0.")
|
||||
for num in range(count):
|
||||
url = "{}_p{}.{}".format(base, num, ext)
|
||||
files.append({
|
||||
"url" : url,
|
||||
"suffix": "_p{:02}".format(num),
|
||||
})
|
||||
|
||||
def _extract_ajax_url(self, body):
|
||||
try:
|
||||
original = body["urls"]["original"]
|
||||
if original:
|
||||
return original
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
try:
|
||||
square1200 = body["userIllusts"][body["id"]]["url"]
|
||||
except KeyError:
|
||||
return
|
||||
parts = square1200.rpartition("_p0")[0].split("/")
|
||||
del parts[3:5]
|
||||
parts[3] = "img-original"
|
||||
base = "/".join(parts)
|
||||
|
||||
for ext in ("jpg", "png", "gif"):
|
||||
try:
|
||||
url = "{}_p0.{}".format(base, ext)
|
||||
self.request(url, method="HEAD")
|
||||
return url
|
||||
except exception.HttpError:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def _date_from_url(url, offset=timedelta(hours=9)):
|
||||
try:
|
||||
@ -860,6 +945,7 @@ class PixivAppAPI():
|
||||
self.username = extractor._get_auth_info()[0]
|
||||
self.user = None
|
||||
|
||||
extractor.headers_web = extractor.session.headers.copy()
|
||||
extractor.session.headers.update({
|
||||
"App-OS" : "ios",
|
||||
"App-OS-Version": "16.7.2",
|
||||
|
@ -184,11 +184,88 @@ __tests__ = (
|
||||
{
|
||||
"#url" : "https://www.pixiv.net/artworks/85960783",
|
||||
"#comment" : "limit_sanity_level_360.png (#4327, #5180)",
|
||||
"#category": ("", "pixiv", "work"),
|
||||
"#class" : pixiv.PixivWorkExtractor,
|
||||
"#options" : {"sanity": False},
|
||||
"#count" : 0,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.pixiv.net/en/artworks/102932581",
|
||||
"#comment" : "limit_sanity_level_360.png (#4327, #5180)",
|
||||
"#class" : pixiv.PixivWorkExtractor,
|
||||
"#options" : {"sanity": True},
|
||||
"#urls" : "https://i.pximg.net/img-original/img/2022/11/20/00/00/49/102932581_p0.jpg",
|
||||
|
||||
"caption" : "Meet a deer .",
|
||||
"comment_access_control": 0,
|
||||
"create_date" : "2022-11-19T15:00:00+00:00",
|
||||
"date" : "dt:2022-11-19 15:00:00",
|
||||
"date_url" : "dt:2022-11-19 15:00:49",
|
||||
"extension" : "jpg",
|
||||
"filename" : "102932581_p0",
|
||||
"height" : 3840,
|
||||
"id" : 102932581,
|
||||
"illust_ai_type": 1,
|
||||
"illust_book_style": 0,
|
||||
"is_bookmarked" : False,
|
||||
"is_muted" : False,
|
||||
"num" : 0,
|
||||
"page_count" : 1,
|
||||
"rating" : "General",
|
||||
"restrict" : 0,
|
||||
"sanity_level" : 2,
|
||||
"series" : None,
|
||||
"suffix" : "",
|
||||
"title" : "《 Bridge and Deer 》",
|
||||
"tools" : [],
|
||||
"total_bookmarks": range(1900, 3000),
|
||||
"total_comments": range(3, 10),
|
||||
"total_view" : range(11000, 20000),
|
||||
"type" : "illust",
|
||||
"url" : "https://i.pximg.net/img-original/img/2022/11/20/00/00/49/102932581_p0.jpg",
|
||||
"visible" : False,
|
||||
"width" : 2160,
|
||||
"x_restrict" : 0,
|
||||
"image_urls" : {
|
||||
"mini" : "https://i.pximg.net/c/48x48/custom-thumb/img/2022/11/20/00/00/49/102932581_p0_custom1200.jpg",
|
||||
"original": "https://i.pximg.net/img-original/img/2022/11/20/00/00/49/102932581_p0.jpg",
|
||||
"regular" : "https://i.pximg.net/img-master/img/2022/11/20/00/00/49/102932581_p0_master1200.jpg",
|
||||
"small" : "https://i.pximg.net/c/540x540_70/img-master/img/2022/11/20/00/00/49/102932581_p0_master1200.jpg",
|
||||
"thumb" : "https://i.pximg.net/c/250x250_80_a2/custom-thumb/img/2022/11/20/00/00/49/102932581_p0_custom1200.jpg",
|
||||
},
|
||||
"tags" : [
|
||||
"オリジナル",
|
||||
"風景",
|
||||
"イラスト",
|
||||
"illustration",
|
||||
"美しい",
|
||||
"女の子",
|
||||
"少女",
|
||||
"deer",
|
||||
"flower",
|
||||
"spring",
|
||||
],
|
||||
"user" : {
|
||||
"account" : "805482263",
|
||||
"id" : 7386235,
|
||||
"is_followed": False,
|
||||
"name" : "岛的鲸",
|
||||
"profile_image_urls": {},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.pixiv.net/en/artworks/109487939",
|
||||
"#comment" : "R-18 limit_sanity_level_360.png (#4327, #5180)",
|
||||
"#class" : pixiv.PixivWorkExtractor,
|
||||
"#urls" : [
|
||||
"https://i.pximg.net/img-original/img/2023/07/01/00/06/28/109487939_p0.png",
|
||||
"https://i.pximg.net/img-original/img/2023/07/01/00/06/28/109487939_p1.png",
|
||||
"https://i.pximg.net/img-original/img/2023/07/01/00/06/28/109487939_p2.png",
|
||||
"https://i.pximg.net/img-original/img/2023/07/01/00/06/28/109487939_p3.png",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.pixiv.net/en/artworks/966412",
|
||||
"#category": ("", "pixiv", "work"),
|
||||
|
Loading…
Reference in New Issue
Block a user