1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before
This commit is contained in:
Mike Fährmann 2022-05-01 21:12:23 +02:00
parent d11e2191ae
commit 84756982e9
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
4 changed files with 179 additions and 134 deletions

View File

@ -1776,28 +1776,28 @@ Description
Download from video pins.
extractor.pixiv.user.avatar
---------------------------
extractor.pixiv.include
-----------------------
Type
``bool``
* ``string``
* ``list`` of ``strings``
Default
``false``
``"artworks"``
Example
* ``"avatar,background,artworks"``
* ``["avatar", "background", "artworks"]``
Description
Download user avatars.
A (comma-separated) list of subcategories to include
when processing a user profile.
Possible values are
``"artworks"``, ``"avatar"``, ``"background"``, ``"favorite"``.
It is possible to use ``"all"`` instead of listing all values separately.
extractor.pixiv.user.background
-------------------------------
Type
``bool``
Default
``false``
Description
Download user background banners.
extractor.pixiv.user.metadata
-----------------------------
extractor.pixiv.artworks.metadata
---------------------------------
Type
``bool``
Default

View File

@ -201,7 +201,7 @@
"pixiv":
{
"refresh-token": null,
"avatar": false,
"include": "artworks",
"tags": "japanese",
"ugoira": true
},

View File

@ -604,7 +604,7 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>Pixiv</td>
<td>https://www.pixiv.net/</td>
<td>Favorites, Follows, pixiv.me Links, pixivision, Rankings, Search Results, Sketch, User Profiles, individual Images</td>
<td>Artworks, Avatars, Backgrounds, Favorites, Follows, pixiv.me Links, pixivision, Rankings, Search Results, Sketch, User Profiles, individual Images</td>
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
</tr>
<tr>

View File

@ -19,6 +19,7 @@ import hashlib
class PixivExtractor(Extractor):
"""Base class for pixiv extractors"""
category = "pixiv"
root = "https://www.pixiv.net"
directory_fmt = ("{category}", "{user[id]} {user[account]}")
filename_fmt = "{id}_p{num}.{extension}"
archive_fmt = "{id}{suffix}.{extension}"
@ -90,118 +91,6 @@ class PixivExtractor(Extractor):
work["suffix"] = "_p{:02}".format(work["num"])
yield Message.Url, url, text.nameext_from_url(url, work)
def works(self):
"""Return an iterable containing all relevant 'work'-objects"""
def metadata(self):
"""Collect metadata for extractor-job"""
return {}
class PixivUserExtractor(PixivExtractor):
"""Extractor for works of a pixiv user"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
r"(?:en/)?users/(\d+)(?:/(?:artworks|illustrations|manga)"
r"(?:/([^/?#]+))?)?/?(?:$|[?#])"
r"|member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"
r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))")
test = (
("https://www.pixiv.net/en/users/173530/artworks", {
"url": "852c31ad83b6840bacbce824d85f2a997889efb7",
}),
# illusts with specific tag
(("https://www.pixiv.net/en/users/173530/artworks"
"/%E6%89%8B%E3%81%B6%E3%82%8D"), {
"url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
}),
(("https://www.pixiv.net/member_illust.php?id=173530"
"&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), {
"url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
}),
# avatar (#595, #623, #1124)
("https://www.pixiv.net/en/users/173530", {
"options": (("avatar", True),),
"content": "4e57544480cc2036ea9608103e8f024fa737fe66",
"range": "1",
}),
# background (#623, #1124, #2495)
("https://www.pixiv.net/en/users/194921", {
"options": (("background", True),),
"content": "aeda3536003ea3002f70657cb93c5053f26f5843",
"range": "1",
}),
# deleted account
("http://www.pixiv.net/member_illust.php?id=173531", {
"options": (("metadata", True),),
"exception": exception.NotFoundError,
}),
("https://www.pixiv.net/en/users/173530"),
("https://www.pixiv.net/en/users/173530/manga"),
("https://www.pixiv.net/en/users/173530/illustrations"),
("https://www.pixiv.net/member_illust.php?id=173530"),
("https://www.pixiv.net/u/173530"),
("https://www.pixiv.net/user/173530"),
("https://www.pixiv.net/mypage.php#id=173530"),
("https://www.pixiv.net/#id=173530"),
("https://touch.pixiv.net/member_illust.php?id=173530"),
)
def __init__(self, match):
PixivExtractor.__init__(self, match)
u1, t1, u2, t2, u3 = match.groups()
if t1:
t1 = text.unquote(t1)
elif t2:
t2 = text.parse_query(t2).get("tag")
self.user_id = u1 or u2 or u3
self.tag = t1 or t2
def metadata(self):
if self.config("metadata"):
return {"user": self.api.user_detail(self.user_id)["user"]}
return {}
def works(self):
works = self.api.user_illusts(self.user_id)
if self.tag:
tag = self.tag.lower()
works = (
work for work in works
if tag in [t["name"].lower() for t in work["tags"]]
)
avatar = self.config("avatar")
background = self.config("background")
if avatar or background:
work_list = []
detail = self.api.user_detail(self.user_id)
user = detail["user"]
if avatar:
url = user["profile_image_urls"]["medium"]
work_list.append((self._make_work(
"avatar", url.replace("_170.", "."), user),))
if background:
url = detail["profile"]["background_image_url"]
if url:
if "/c/" in url:
parts = url.split("/")
del parts[3:5]
url = "/".join(parts)
url = url.replace("_master1200.", ".")
work = self._make_work("background", url, user)
if url.endswith(".jpg"):
work["_fallback"] = (url[:-4] + ".png",)
work_list.append((work,))
work_list.append(works)
works = itertools.chain.from_iterable(work_list)
return works
@staticmethod
def _make_work(kind, url, user):
return {
@ -221,6 +110,162 @@ class PixivUserExtractor(PixivExtractor):
"x_restrict" : 0,
}
def works(self):
"""Return an iterable containing all relevant 'work' objects"""
def metadata(self):
"""Collect metadata for extractor job"""
return {}
class PixivUserExtractor(PixivExtractor):
"""Extractor for a pixiv user profile"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id="
r")(\d+)(?:$|[?#])")
test = (
("https://www.pixiv.net/en/users/173530"),
("https://www.pixiv.net/u/173530"),
("https://www.pixiv.net/member.php?id=173530"),
("https://www.pixiv.net/mypage.php#id=173530"),
("https://www.pixiv.net/#id=173530"),
)
def __init__(self, match):
PixivExtractor.__init__(self, match)
self.user_id = match.group(1)
def items(self):
default = []
if self.config("avatar"):
self.log.warning("'avatar' is deprecated, "
"use \"include\": \"…,avatar\" instead")
default.append("avatar")
if self.config("background"):
self.log.warning("'background' is deprecated, "
"use \"include\": \"…,background\" instead")
default.append("background")
default.append("artworks")
base = "{}/users/{}/".format(self.root, self.user_id)
return self._dispatch_extractors((
(PixivAvatarExtractor , base + "avatar"),
(PixivBackgroundExtractor, base + "background"),
(PixivArtworksExtractor , base + "artworks"),
(PixivFavoriteExtractor , base + "bookmarks/artworks"),
), default)
class PixivArtworksExtractor(PixivExtractor):
"""Extractor for artworks of a pixiv user"""
subcategory = "artworks"
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
r"(?:/([^/?#]+))?/?(?:$|[?#])"
r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
test = (
("https://www.pixiv.net/en/users/173530/artworks", {
"url": "852c31ad83b6840bacbce824d85f2a997889efb7",
}),
# illusts with specific tag
(("https://www.pixiv.net/en/users/173530/artworks"
"/%E6%89%8B%E3%81%B6%E3%82%8D"), {
"url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
}),
(("https://www.pixiv.net/member_illust.php?id=173530"
"&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), {
"url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
}),
# deleted account
("http://www.pixiv.net/member_illust.php?id=173531", {
"options": (("metadata", True),),
"exception": exception.NotFoundError,
}),
("https://www.pixiv.net/en/users/173530/manga"),
("https://www.pixiv.net/en/users/173530/illustrations"),
("https://www.pixiv.net/member_illust.php?id=173530"),
("https://touch.pixiv.net/member_illust.php?id=173530"),
)
def __init__(self, match):
PixivExtractor.__init__(self, match)
u1, t1, u2, t2 = match.groups()
if t1:
t1 = text.unquote(t1)
elif t2:
t2 = text.parse_query(t2).get("tag")
self.user_id = u1 or u2
self.tag = t1 or t2
def metadata(self):
if self.config("metadata"):
return {"user": self.api.user_detail(self.user_id)["user"]}
return {}
def works(self):
works = self.api.user_illusts(self.user_id)
if self.tag:
tag = self.tag.lower()
works = (
work for work in works
if tag in [t["name"].lower() for t in work["tags"]]
)
return works
class PixivAvatarExtractor(PixivExtractor):
"""Extractor for pixiv avatars"""
subcategory = "avatar"
archive_fmt = "avatar_{user[id]}"
pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net"
r"/(?:en/)?users/(\d+)/avatar")
test = ("https://www.pixiv.net/en/users/173530/avatar", {
"content": "4e57544480cc2036ea9608103e8f024fa737fe66",
})
def __init__(self, match):
PixivExtractor.__init__(self, match)
self.user_id = match.group(1)
def works(self):
user = self.api.user_detail(self.user_id)["user"]
url = user["profile_image_urls"]["medium"].replace("_170.", ".")
return (self._make_work("avatar", url, user),)
class PixivBackgroundExtractor(PixivExtractor):
"""Extractor for pixiv background banners"""
subcategory = "background"
archive_fmt = "background_{user[id]}"
pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net"
r"/(?:en/)?users/(\d+)/background")
test = ("https://www.pixiv.net/en/users/194921/background", {
"pattern": r"https://i\.pximg\.net/background/img/2021/01/30/16/12/02"
r"/194921_af1f71e557a42f499213d4b9eaccc0f8\.jpg",
})
def __init__(self, match):
PixivExtractor.__init__(self, match)
self.user_id = match.group(1)
def works(self):
detail = self.api.user_detail(self.user_id)
url = detail["profile"]["background_image_url"]
if not url:
return ()
if "/c/" in url:
parts = url.split("/")
del parts[3:5]
url = "/".join(parts)
url = url.replace("_master1200.", ".")
work = self._make_work("background", url, detail["user"])
if url.endswith(".jpg"):
work["_fallback"] = (url[:-4] + ".png",)
return (work,)
class PixivMeExtractor(PixivExtractor):
"""Extractor for pixiv.me URLs"""
@ -311,10 +356,10 @@ class PixivFavoriteExtractor(PixivExtractor):
r"|bookmark\.php)(?:\?([^#]*))?")
test = (
("https://www.pixiv.net/en/users/173530/bookmarks/artworks", {
"url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
"url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
}),
("https://www.pixiv.net/bookmark.php?id=173530", {
"url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
"url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
}),
# bookmarks with specific tag
(("https://www.pixiv.net/en/users/3137110"
@ -759,7 +804,7 @@ class PixivAppAPI():
params = {"user_id": user_id, "tag": tag, "restrict": restrict}
return self._pagination("/v1/user/bookmarks/illust", params)
@memcache()
@memcache(keyarg=1)
def user_detail(self, user_id):
params = {"user_id": user_id}
return self._call("/v1/user/detail", params)