From 0a1863fce3d62f8fdf107bdc11579d63e8f185bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Fri, 18 May 2018 15:30:06 +0200
Subject: [PATCH] [pixiv] respect more query parameters for user URLs

The API endpoint responsible for user illustrations does not
provide sufficient filter capabilities* to match the actual
website, so we are spinning our own filters.

Respected parameters are
    'type': illust, manga, ugoira
    'tag' : any image tag (this was already supported)
    'p'   : the page to start on

*
- API can filter for illustrations and manga, but not for ugoira.
- 'offset' is applied before filtering
- no 'tag' filter
---
 gallery_dl/extractor/pixiv.py | 44 ++++++++++++++++++++++++-----------
 test/test_results.py          |  1 -
 2 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 07ec107e..779dbd31 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -9,7 +9,7 @@
 """Extract images and ugoira from https://www.pixiv.net/"""
 
 from .common import Extractor, Message
-from .. import text, exception
+from .. import text, util, exception
 from ..cache import cache
 from datetime import datetime, timedelta
 
@@ -89,7 +89,7 @@ class PixivUserExtractor(PixivExtractor):
     """Extractor for works of a pixiv-user"""
     subcategory = "user"
     pattern = [(r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
-                r"/member(?:_illust)?\.php\?id=(\d+)(?:.*&tag=([^&#]+))?"),
+                r"/member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"),
                (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
                 r"/(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+)()")]
     test = [
@@ -100,6 +100,10 @@ class PixivUserExtractor(PixivExtractor):
           "&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), {
             "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
         }),
+        (("https://www.pixiv.net/member_illust.php?id=3137110"
+          "&tag=%E3%83%96%E3%82%A4%E3%82%BA&type=illust&p=2"), {
+            "count": ">= 55",
+        }),
         ("http://www.pixiv.net/member_illust.php?id=173531", {
             "exception": exception.NotFoundError,
         }),
@@ -112,18 +116,32 @@ class PixivUserExtractor(PixivExtractor):
 
     def __init__(self, match):
         PixivExtractor.__init__(self)
-        self.user_id, self.tag = match.groups()
+        self.user_id, self.query = match.groups()
 
     def works(self):
-        if self.tag:
-            return self._tagged_works()
-        return self.api.user_illusts(self.user_id)
+        works = self.api.user_illusts(self.user_id)
 
-    def _tagged_works(self):
-        tag = text.unquote(self.tag).lower()
-        for work in self.api.user_illusts(self.user_id):
-            if tag in [tag["name"].lower() for tag in work["tags"]]:
-                yield work
+        if self.query:
+            qdict = text.parse_query(self.query)
+            if "type" in qdict:
+                type_ = qdict["type"].lower()
+                works = filter(self._is_type(type_), works)
+            if "tag" in qdict:
+                tag = text.unquote(qdict["tag"]).lower()
+                works = filter(self._has_tag(tag), works)
+            if "p" in qdict:  # apply page-offset last
+                offset = (text.parse_int(qdict["p"], 1) - 1) * 20
+                works = util.advance(works, offset)
+
+        return works
+
+    @staticmethod
+    def _has_tag(tag):
+        return lambda work: tag in [t["name"].lower() for t in work["tags"]]
+
+    @staticmethod
+    def _is_type(type_):
+        return lambda work: work["type"] == type_
 
 
 class PixivMeExtractor(PixivExtractor):
@@ -466,8 +484,8 @@ class PixivAppAPI():
         params = {"user_id": user_id}
         return self._call("v1/user/detail", params)["user"]
 
-    def user_illusts(self, user_id, illust_type=None):
-        params = {"user_id": user_id, "type": illust_type}
+    def user_illusts(self, user_id):
+        params = {"user_id": user_id}
         return self._pagination("v1/user/illusts", params)
 
     def ugoira_metadata(self, illust_id):
diff --git a/test/test_results.py b/test/test_results.py
index 90fc1b96..0a604f08 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -22,7 +22,6 @@ TRAVIS_SKIP = {
 
 # temporary issues, etc.
 BROKEN = {
-    "pixiv",  # /users/<id>/favorite_works API endpoint is gone
 }