From 0a1863fce3d62f8fdf107bdc11579d63e8f185bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 18 May 2018 15:30:06 +0200 Subject: [PATCH] [pixiv] respect more query parameters for user URLs The API endpoint responsible for user illustrations does not provide sufficient filter capabilities* to match the actual website, so we are spinning our own filters. Respected parameters are 'type': illust, manga, ugoira 'tag' : any image tag (this was already supported) 'p' : the page to start on * - API can filter for illustrations and manga, but not for ugoira. - 'offset' is applied before filtering - no 'tag' filter --- gallery_dl/extractor/pixiv.py | 44 ++++++++++++++++++++++++----------- test/test_results.py | 1 - 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 07ec107e..779dbd31 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -9,7 +9,7 @@ """Extract images and ugoira from https://www.pixiv.net/""" from .common import Extractor, Message -from .. import text, exception +from .. import text, util, exception from ..cache import cache from datetime import datetime, timedelta @@ -89,7 +89,7 @@ class PixivUserExtractor(PixivExtractor): """Extractor for works of a pixiv-user""" subcategory = "user" pattern = [(r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" - r"/member(?:_illust)?\.php\?id=(\d+)(?:.*&tag=([^&#]+))?"), + r"/member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"), (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" r"/(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+)()")] test = [ @@ -100,6 +100,10 @@ class PixivUserExtractor(PixivExtractor): "&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), { "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658", }), + (("https://www.pixiv.net/member_illust.php?id=3137110" + "&tag=%E3%83%96%E3%82%A4%E3%82%BA&type=illust&p=2"), { + "count": ">= 55", + }), ("http://www.pixiv.net/member_illust.php?id=173531", { "exception": exception.NotFoundError, }), @@ -112,18 +116,32 @@ class PixivUserExtractor(PixivExtractor): def __init__(self, match): PixivExtractor.__init__(self) - self.user_id, self.tag = match.groups() + self.user_id, self.query = match.groups() def works(self): - if self.tag: - return self._tagged_works() - return self.api.user_illusts(self.user_id) + works = self.api.user_illusts(self.user_id) - def _tagged_works(self): - tag = text.unquote(self.tag).lower() - for work in self.api.user_illusts(self.user_id): - if tag in [tag["name"].lower() for tag in work["tags"]]: - yield work + if self.query: + qdict = text.parse_query(self.query) + if "type" in qdict: + type_ = qdict["type"].lower() + works = filter(self._is_type(type_), works) + if "tag" in qdict: + tag = text.unquote(qdict["tag"]).lower() + works = filter(self._has_tag(tag), works) + if "p" in qdict: # apply page-offset last + offset = (text.parse_int(qdict["p"], 1) - 1) * 20 + works = util.advance(works, offset) + + return works + + @staticmethod + def _has_tag(tag): + return lambda work: tag in [t["name"].lower() for t in work["tags"]] + + @staticmethod + def _is_type(type_): + return lambda work: work["type"] == type_ class PixivMeExtractor(PixivExtractor): @@ -466,8 +484,8 @@ class PixivAppAPI(): params = {"user_id": user_id} return self._call("v1/user/detail", params)["user"] - def user_illusts(self, user_id, illust_type=None): - params = {"user_id": user_id, "type": illust_type} + def user_illusts(self, user_id): + params = {"user_id": user_id} return self._pagination("v1/user/illusts", params) def ugoira_metadata(self, illust_id): diff --git a/test/test_results.py b/test/test_results.py index 90fc1b96..0a604f08 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -22,7 +22,6 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { - "pixiv", # /users//favorite_works API endpoint is gone }