1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 18:53:21 +01:00

[pixiv] respect more query parameters for user URLs

The API endpoint responsible for user illustrations does not
provide sufficient filter capabilities* to match the actual
website, so we are spinning our own filters.

Respected parameters are
    'type': illust, manga, ugoira
    'tag' : any image tag (this was already supported)
    'p'   : the page to start on

*
- API can filter for illustrations and manga, but not for ugoira.
- 'offset' is applied before filtering
- no 'tag' filter
This commit is contained in:
Mike Fährmann 2018-05-18 15:30:06 +02:00
parent b8e53b8c6b
commit 0a1863fce3
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 31 additions and 14 deletions

View File

@ -9,7 +9,7 @@
"""Extract images and ugoira from https://www.pixiv.net/""" """Extract images and ugoira from https://www.pixiv.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text, util, exception
from ..cache import cache from ..cache import cache
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -89,7 +89,7 @@ class PixivUserExtractor(PixivExtractor):
"""Extractor for works of a pixiv-user""" """Extractor for works of a pixiv-user"""
subcategory = "user" subcategory = "user"
pattern = [(r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" pattern = [(r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
r"/member(?:_illust)?\.php\?id=(\d+)(?:.*&tag=([^&#]+))?"), r"/member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"),
(r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
r"/(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+)()")] r"/(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+)()")]
test = [ test = [
@ -100,6 +100,10 @@ class PixivUserExtractor(PixivExtractor):
"&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), { "&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), {
"url": "25b1cd81153a8ff82eec440dd9f20a4a22079658", "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
}), }),
(("https://www.pixiv.net/member_illust.php?id=3137110"
"&tag=%E3%83%96%E3%82%A4%E3%82%BA&type=illust&p=2"), {
"count": ">= 55",
}),
("http://www.pixiv.net/member_illust.php?id=173531", { ("http://www.pixiv.net/member_illust.php?id=173531", {
"exception": exception.NotFoundError, "exception": exception.NotFoundError,
}), }),
@ -112,18 +116,32 @@ class PixivUserExtractor(PixivExtractor):
def __init__(self, match): def __init__(self, match):
PixivExtractor.__init__(self) PixivExtractor.__init__(self)
self.user_id, self.tag = match.groups() self.user_id, self.query = match.groups()
def works(self): def works(self):
if self.tag: works = self.api.user_illusts(self.user_id)
return self._tagged_works()
return self.api.user_illusts(self.user_id)
def _tagged_works(self): if self.query:
tag = text.unquote(self.tag).lower() qdict = text.parse_query(self.query)
for work in self.api.user_illusts(self.user_id): if "type" in qdict:
if tag in [tag["name"].lower() for tag in work["tags"]]: type_ = qdict["type"].lower()
yield work works = filter(self._is_type(type_), works)
if "tag" in qdict:
tag = text.unquote(qdict["tag"]).lower()
works = filter(self._has_tag(tag), works)
if "p" in qdict: # apply page-offset last
offset = (text.parse_int(qdict["p"], 1) - 1) * 20
works = util.advance(works, offset)
return works
@staticmethod
def _has_tag(tag):
return lambda work: tag in [t["name"].lower() for t in work["tags"]]
@staticmethod
def _is_type(type_):
return lambda work: work["type"] == type_
class PixivMeExtractor(PixivExtractor): class PixivMeExtractor(PixivExtractor):
@ -466,8 +484,8 @@ class PixivAppAPI():
params = {"user_id": user_id} params = {"user_id": user_id}
return self._call("v1/user/detail", params)["user"] return self._call("v1/user/detail", params)["user"]
def user_illusts(self, user_id, illust_type=None): def user_illusts(self, user_id):
params = {"user_id": user_id, "type": illust_type} params = {"user_id": user_id}
return self._pagination("v1/user/illusts", params) return self._pagination("v1/user/illusts", params)
def ugoira_metadata(self, illust_id): def ugoira_metadata(self, illust_id):

View File

@ -22,7 +22,6 @@ TRAVIS_SKIP = {
# temporary issues, etc. # temporary issues, etc.
BROKEN = { BROKEN = {
"pixiv", # /users/<id>/favorite_works API endpoint is gone
} }