gallery-dl/gallery_dl/extractor/pixiv.py

# -*- coding: utf-8 -*-

# Copyright 2014-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://www.pixiv.net/"""

from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
from datetime import datetime, timedelta
import itertools
import hashlib
import time


class PixivExtractor(Extractor):
    """Base class for pixiv extractors"""
    category = "pixiv"
    directory_fmt = ("{category}", "{user[id]} {user[account]}")
    filename_fmt = "{id}_p{num}.{extension}"
    archive_fmt = "{id}{suffix}.{extension}"
    cookiedomain = None

    def __init__(self, match):
        Extractor.__init__(self, match)
        self.api = PixivAppAPI(self)
        self.load_ugoira = self.config("ugoira", True)
        self.max_posts = self.config("max-posts", 0)

    def items(self):
        tags = self.config("tags", "japanese")
        if tags == "original":
            transform_tags = None
        elif tags == "translated":
            def transform_tags(work):
                work["tags"] = list(set(
                    tag["translated_name"] or tag["name"]
                    for tag in work["tags"]))
        else:
            def transform_tags(work):
                work["tags"] = [tag["name"] for tag in work["tags"]]

        ratings = {0: "General", 1: "R-18", 2: "R-18G"}
        metadata = self.metadata()

        works = self.works()
        if self.max_posts:
            works = itertools.islice(works, self.max_posts)
        for work in works:
            if not work["user"]["id"]:
                continue

            meta_single_page = work["meta_single_page"]
            meta_pages = work["meta_pages"]
            del work["meta_single_page"]
            del work["image_urls"]
            del work["meta_pages"]

            if transform_tags:
                transform_tags(work)
            work["num"] = 0
            work["date"] = text.parse_datetime(work["create_date"])
            work["rating"] = ratings.get(work["x_restrict"])
            work["suffix"] = ""
            work.update(metadata)

            yield Message.Directory, work

            if work["type"] == "ugoira":
                if not self.load_ugoira:
                    continue
                ugoira = self.api.ugoira_metadata(work["id"])

                url = ugoira["zip_urls"]["medium"].replace(
                    "_ugoira600x600", "_ugoira1920x1080")
                work["frames"] = ugoira["frames"]
                work["_http_adjust_extension"] = False
                yield Message.Url, url, text.nameext_from_url(url, work)

            elif work["page_count"] == 1:
                url = meta_single_page["original_image_url"]
                yield Message.Url, url, text.nameext_from_url(url, work)

            else:
                for work["num"], img in enumerate(meta_pages):
                    url = img["image_urls"]["original"]
                    work["suffix"] = "_p{:02}".format(work["num"])
                    yield Message.Url, url, text.nameext_from_url(url, work)

    def works(self):
        """Return an iterable containing all relevant 'work'-objects"""

    def metadata(self):
        """Collect metadata for extractor-job"""
        return {}


class PixivUserExtractor(PixivExtractor):
    """Extractor for works of a pixiv-user"""
    subcategory = "user"
    pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
               r"(?:en/)?users/(\d+)(?:/(?:artworks|illustrations|manga)"
               r"(?:/([^/?#]+))?)?/?(?:$|[?#])"
               r"|member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"
               r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))")
    test = (
        ("https://www.pixiv.net/en/users/173530/artworks", {
            "url": "852c31ad83b6840bacbce824d85f2a997889efb7",
        }),
        # illusts with specific tag
        (("https://www.pixiv.net/en/users/173530/artworks"
          "/%E6%89%8B%E3%81%B6%E3%82%8D"), {
            "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
        }),
        (("https://www.pixiv.net/member_illust.php?id=173530"
          "&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), {
            "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
        }),
        # avatar (#595, 623)
        ("https://www.pixiv.net/en/users/173530", {
            "options": (("avatar", True),),
            "content": "4e57544480cc2036ea9608103e8f024fa737fe66",
            "range": "1",
        }),
        # deleted account
        ("http://www.pixiv.net/member_illust.php?id=173531", {
            "options": (("metadata", True),),
            "exception": exception.NotFoundError,
        }),
        ("https://www.pixiv.net/en/users/173530"),
        ("https://www.pixiv.net/en/users/173530/manga"),
        ("https://www.pixiv.net/en/users/173530/illustrations"),
        ("https://www.pixiv.net/member_illust.php?id=173530"),
        ("https://www.pixiv.net/u/173530"),
        ("https://www.pixiv.net/user/173530"),
        ("https://www.pixiv.net/mypage.php#id=173530"),
        ("https://www.pixiv.net/#id=173530"),
        ("https://touch.pixiv.net/member_illust.php?id=173530"),
    )

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        u1, t1, u2, t2, u3 = match.groups()
        if t1:
            t1 = text.unquote(t1)
        elif t2:
            t2 = text.parse_query(t2).get("tag")
        self.user_id = u1 or u2 or u3
        self.tag = t1 or t2

    def metadata(self):
        if self.config("metadata"):
            return {"user": self.api.user_detail(self.user_id)}
        return {}

    def works(self):
        works = self.api.user_illusts(self.user_id)

        if self.tag:
            tag = self.tag.lower()
            works = (
                work for work in works
                if tag in [t["name"].lower() for t in work["tags"]]
            )

        if self.config("avatar"):
            user = self.api.user_detail(self.user_id)
            url = user["profile_image_urls"]["medium"].replace("_170.", ".")
            avatar = {
                "create_date"     : None,
                "height"          : 0,
                "id"              : "avatar",
                "image_urls"      : None,
                "meta_pages"      : (),
                "meta_single_page": {"original_image_url": url},
                "page_count"      : 1,
                "sanity_level"    : 0,
                "tags"            : (),
                "title"           : "avatar",
                "type"            : "avatar",
                "user"            : user,
                "width"           : 0,
                "x_restrict"      : 0,
            }
            works = itertools.chain((avatar,), works)

        return works


class PixivMeExtractor(PixivExtractor):
    """Extractor for pixiv.me URLs"""
    subcategory = "me"
    pattern = r"(?:https?://)?pixiv\.me/([^/?#]+)"
    test = (
        ("https://pixiv.me/del_shannon", {
            "url": "29c295ce75150177e6b0a09089a949804c708fbf",
        }),
        ("https://pixiv.me/del_shanno", {
            "exception": exception.NotFoundError,
        }),
    )

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        self.account = match.group(1)

    def items(self):
        url = "https://pixiv.me/" + self.account
        data = {"_extractor": PixivUserExtractor}
        response = self.request(
            url, method="HEAD", allow_redirects=False, notfound="user")
        yield Message.Queue, response.headers["Location"], data


class PixivWorkExtractor(PixivExtractor):
    """Extractor for a single pixiv work/illustration"""
    subcategory = "work"
    pattern = (r"(?:https?://)?(?:(?:www\.|touch\.)?pixiv\.net"
               r"/(?:(?:en/)?artworks/"
               r"|member_illust\.php\?(?:[^&]+&)*illust_id=)(\d+)"
               r"|(?:i(?:\d+\.pixiv|\.pximg)\.net"
               r"/(?:(?:.*/)?img-[^/]+/img/\d{4}(?:/\d\d){5}|img\d+/img/[^/]+)"
               r"|img\d*\.pixiv\.net/img/[^/]+|(?:www\.)?pixiv\.net/i)/(\d+))")
    test = (
        ("https://www.pixiv.net/artworks/966412", {
            "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
            "content": "69a8edfb717400d1c2e146ab2b30d2c235440c5a",
        }),
        (("http://www.pixiv.net/member_illust.php"
          "?mode=medium&illust_id=966411"), {
            "exception": exception.NotFoundError,
        }),
        # ugoira
        (("https://www.pixiv.net/member_illust.php"
          "?mode=medium&illust_id=66806629"), {
            "url": "7267695a985c4db8759bebcf8d21dbdd2d2317ef",
            "keywords": {"frames": list},
        }),
        # related works (#1237)
        ("https://www.pixiv.net/artworks/966412", {
            "options": (("related", True),),
            "range": "1-10",
            "count": ">= 10",
        }),
        ("https://www.pixiv.net/en/artworks/966412"),
        ("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=96641"),
        ("http://i1.pixiv.net/c/600x600/img-master"
         "/img/2008/06/13/00/29/13/966412_p0_master1200.jpg"),
        ("https://i.pximg.net/img-original"
         "/img/2017/04/25/07/33/29/62568267_p0.png"),
        ("https://www.pixiv.net/i/966412"),
        ("http://img.pixiv.net/img/soundcross/42626136.jpg"),
        ("http://i2.pixiv.net/img76/img/snailrin/42672235.jpg"),
    )

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        self.illust_id = match.group(1) or match.group(2)

    def works(self):
        works = (self.api.illust_detail(self.illust_id),)
        if self.config("related", False):
            related = self.api.illust_related(self.illust_id)
            works = itertools.chain(works, related)
        return works


class PixivFavoriteExtractor(PixivExtractor):
    """Extractor for all favorites/bookmarks of a pixiv-user"""
    subcategory = "favorite"
    directory_fmt = ("{category}", "bookmarks",
                     "{user_bookmark[id]} {user_bookmark[account]}")
    archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}"
    pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:(?:en/)?"
               r"users/(\d+)/(bookmarks/artworks|following)(?:/([^/?#]+))?"
               r"|bookmark\.php)(?:\?([^#]*))?")
    test = (
        ("https://www.pixiv.net/en/users/173530/bookmarks/artworks", {
            "url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
        }),
        ("https://www.pixiv.net/bookmark.php?id=173530", {
            "url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
        }),
        # bookmarks with specific tag
        (("https://www.pixiv.net/en/users/3137110"
          "/bookmarks/artworks/%E3%81%AF%E3%82%93%E3%82%82%E3%82%93"), {
            "url": "379b28275f786d946e01f721e54afe346c148a8c",
        }),
        # bookmarks with specific tag (legacy url)
        (("https://www.pixiv.net/bookmark.php?id=3137110"
          "&tag=%E3%81%AF%E3%82%93%E3%82%82%E3%82%93&p=1"), {
            "url": "379b28275f786d946e01f721e54afe346c148a8c",
        }),
        # own bookmarks
        ("https://www.pixiv.net/bookmark.php", {
            "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
        }),
        # own bookmarks with tag (#596)
        ("https://www.pixiv.net/bookmark.php?tag=foobar", {
            "count": 0,
        }),
        # followed users (#515)
        ("https://www.pixiv.net/en/users/173530/following", {
            "pattern": PixivUserExtractor.pattern,
            "count": ">= 12",
        }),
        # followed users (legacy url) (#515)
        ("https://www.pixiv.net/bookmark.php?id=173530&type=user", {
            "pattern": PixivUserExtractor.pattern,
            "count": ">= 12",
        }),
        # touch URLs
        ("https://touch.pixiv.net/bookmark.php?id=173530"),
        ("https://touch.pixiv.net/bookmark.php"),
    )

    def __init__(self, match):
        uid, kind, self.tag, query = match.groups()
        query = text.parse_query(query)

        if not uid:
            uid = query.get("id")
            if not uid:
                self.subcategory = "bookmark"

        if kind == "following" or query.get("type") == "user":
            self.subcategory = "following"
            self.items = self._items_following

        PixivExtractor.__init__(self, match)
        self.query = query
        self.user_id = uid

    def works(self):
        tag = None
        if "tag" in self.query:
            tag = text.unquote(self.query["tag"])
        elif self.tag:
            tag = text.unquote(self.tag)

        restrict = "public"
        if self.query.get("rest") == "hide":
            restrict = "private"

        return self.api.user_bookmarks_illust(self.user_id, tag, restrict)

    def metadata(self):
        if self.user_id:
            user = self.api.user_detail(self.user_id)
        else:
            self.api.login()
            user = self.api.user

        self.user_id = user["id"]
        return {"user_bookmark": user}

    def _items_following(self):
        restrict = "public"
        if self.query.get("rest") == "hide":
            restrict = "private"

        for preview in self.api.user_following(self.user_id, restrict):
            user = preview["user"]
            user["_extractor"] = PixivUserExtractor
            url = "https://www.pixiv.net/users/{}".format(user["id"])
            yield Message.Queue, url, user


class PixivRankingExtractor(PixivExtractor):
    """Extractor for pixiv ranking pages"""
    subcategory = "ranking"
    archive_fmt = "r_{ranking[mode]}_{ranking[date]}_{id}{num}.{extension}"
    directory_fmt = ("{category}", "rankings",
                     "{ranking[mode]}", "{ranking[date]}")
    pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
               r"/ranking\.php(?:\?([^#]*))?")
    test = (
        ("https://www.pixiv.net/ranking.php?mode=daily&date=20170818"),
        ("https://www.pixiv.net/ranking.php"),
        ("https://touch.pixiv.net/ranking.php"),
    )

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        self.query = match.group(1)
        self.mode = self.date = None

    def works(self):
        return self.api.illust_ranking(self.mode, self.date)

    def metadata(self):
        query = text.parse_query(self.query)

        mode = query.get("mode", "daily").lower()
        mode_map = {
            "daily": "day",
            "daily_r18": "day_r18",
            "weekly": "week",
            "weekly_r18": "week_r18",
            "monthly": "month",
            "male": "day_male",
            "male_r18": "day_male_r18",
            "female": "day_female",
            "female_r18": "day_female_r18",
            "original": "week_original",
            "rookie": "week_rookie",
            "r18g": "week_r18g",
        }
        if mode not in mode_map:
            self.log.warning("invalid mode '%s'", mode)
            mode = "daily"
        self.mode = mode_map[mode]

        date = query.get("date")
        if date:
            if len(date) == 8 and date.isdecimal():
                date = "{}-{}-{}".format(date[0:4], date[4:6], date[6:8])
            else:
                self.log.warning("invalid date '%s'", date)
                date = None
        if not date:
            date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")
        self.date = date

        return {"ranking": {
            "mode": mode,
            "date": self.date,
        }}


class PixivSearchExtractor(PixivExtractor):
    """Extractor for pixiv search results"""
    subcategory = "search"
    archive_fmt = "s_{search[word]}_{id}{num}.{extension}"
    directory_fmt = ("{category}", "search", "{search[word]}")
    pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
               r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?"
               r"|search\.php)(?:\?([^#]+))?")
    test = (
        ("https://www.pixiv.net/en/tags/Original", {
            "range": "1-10",
            "count": 10,
        }),
        ("https://www.pixiv.net/en/tags/foo/artworks?order=date&s_mode=s_tag"),
        ("https://www.pixiv.net/search.php?s_mode=s_tag&word=Original"),
        ("https://touch.pixiv.net/search.php?word=Original"),
    )

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        self.word, self.query = match.groups()
        self.sort = self.target = None

    def works(self):
        return self.api.search_illust(self.word, self.sort, self.target)

    def metadata(self):
        query = text.parse_query(self.query)

        if self.word:
            self.word = text.unquote(self.word)
        else:
            if "word" not in query:
                raise exception.StopExtraction("Missing search term")
            self.word = query["word"]

        sort = query.get("order", "date_d")
        sort_map = {
            "date": "date_asc",
            "date_d": "date_desc",
        }
        if sort not in sort_map:
            self.log.warning("invalid sort order '%s'", sort)
            sort = "date_d"
        self.sort = sort_map[sort]

        target = query.get("s_mode", "s_tag")
        target_map = {
            "s_tag": "partial_match_for_tags",
            "s_tag_full": "exact_match_for_tags",
            "s_tc": "title_and_caption",
        }
        if target not in target_map:
            self.log.warning("invalid search target '%s'", target)
            target = "s_tag"
        self.target = target_map[target]

        return {"search": {
            "word": self.word,
            "sort": self.sort,
            "target": self.target,
        }}


class PixivFollowExtractor(PixivExtractor):
    """Extractor for new illustrations from your followed artists"""
    subcategory = "follow"
    archive_fmt = "F_{user_follow[id]}_{id}{num}.{extension}"
    directory_fmt = ("{category}", "following")
    pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
               r"/bookmark_new_illust\.php")
    test = (
        ("https://www.pixiv.net/bookmark_new_illust.php"),
        ("https://touch.pixiv.net/bookmark_new_illust.php"),
    )

    def works(self):
        return self.api.illust_follow()

    def metadata(self):
        self.api.login()
        return {"user_follow": self.api.user}


class PixivPixivisionExtractor(PixivExtractor):
    """Extractor for illustrations from a pixivision article"""
    subcategory = "pixivision"
    directory_fmt = ("{category}", "pixivision",
                     "{pixivision_id} {pixivision_title}")
    archive_fmt = "V{pixivision_id}_{id}{suffix}.{extension}"
    pattern = r"(?:https?://)?(?:www\.)?pixivision\.net/(?:en/)?a/(\d+)"
    test = (
        ("https://www.pixivision.net/en/a/2791"),
        ("https://pixivision.net/a/2791", {
            "count": 7,
            "keyword": {
                "pixivision_id": "2791",
                "pixivision_title": "What's your favorite music? Editor’s "
                                    "picks featuring: “CD Covers”!",
            },
        }),
    )

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        self.pixivision_id = match.group(1)

    def works(self):
        return (
            self.api.illust_detail(illust_id)
            for illust_id in util.unique_sequence(text.extract_iter(
                self.page, '<a href="https://www.pixiv.net/en/artworks/', '"'))
        )

    def metadata(self):
        url = "https://www.pixivision.net/en/a/" + self.pixivision_id
        headers = {"User-Agent": "Mozilla/5.0"}
        self.page = self.request(url, headers=headers).text

        title = text.extract(self.page, '<title>', '<')[0]
        return {
            "pixivision_id"   : self.pixivision_id,
            "pixivision_title": text.unescape(title),
        }


class PixivAppAPI():
    """Minimal interface for the Pixiv App API for mobile devices

    For a more complete implementation or documentation, see
    - https://github.com/upbit/pixivpy
    - https://gist.github.com/ZipFile/3ba99b47162c23f8aea5d5942bb557b1
    """
    CLIENT_ID = "MOBrBDS8blbauoSck0ZfDbtuzpyT"
    CLIENT_SECRET = "lsACyCD94FhDUtGTXi3QzcFE2uU1hqtDaKeqrdwj"
    HASH_SECRET = ("28c1fdd170a5204386cb1313c7077b34"
                   "f83e4aaf4aa829ce78c231e05b0bae2c")

    def __init__(self, extractor):
        self.extractor = extractor
        self.log = extractor.log
        self.username = extractor._get_auth_info()[0]
        self.user = None

        extractor.session.headers.update({
            "App-OS"        : "ios",
            "App-OS-Version": "13.1.2",
            "App-Version"   : "7.7.6",
            "User-Agent"    : "PixivIOSApp/7.7.6 (iOS 13.1.2; iPhone11,8)",
            "Referer"       : "https://app-api.pixiv.net/",
        })

        self.client_id = extractor.config(
            "client-id", self.CLIENT_ID)
        self.client_secret = extractor.config(
            "client-secret", self.CLIENT_SECRET)

        token = extractor.config("refresh-token")
        if token is None or token == "cache":
            token = _refresh_token_cache(self.username)
        self.refresh_token = token

    def login(self):
        """Login and gain an access token"""
        self.user, auth = self._login_impl(self.username)
        self.extractor.session.headers["Authorization"] = auth

    @cache(maxage=3600, keyarg=1)
    def _login_impl(self, username):
        if not self.refresh_token:
            raise exception.AuthenticationError(
                "'refresh-token' required.\n"
                "Run `gallery-dl oauth:pixiv` to get one.")

        self.log.info("Refreshing access token")
        url = "https://oauth.secure.pixiv.net/auth/token"
        data = {
            "client_id"     : self.client_id,
            "client_secret" : self.client_secret,
            "grant_type"    : "refresh_token",
            "refresh_token" : self.refresh_token,
            "get_secure_url": "1",
        }

        time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00")
        headers = {
            "X-Client-Time": time,
            "X-Client-Hash": hashlib.md5(
                (time + self.HASH_SECRET).encode()).hexdigest(),
        }

        response = self.extractor.request(
            url, method="POST", headers=headers, data=data, fatal=False)
        if response.status_code >= 400:
            self.log.debug(response.text)
            raise exception.AuthenticationError("Invalid refresh token")

        data = response.json()["response"]
        return data["user"], "Bearer " + data["access_token"]

    def illust_detail(self, illust_id):
        params = {"illust_id": illust_id}
        return self._call("v1/illust/detail", params)["illust"]

    def illust_follow(self, restrict="all"):
        params = {"restrict": restrict}
        return self._pagination("v2/illust/follow", params)

    def illust_ranking(self, mode="day", date=None):
        params = {"mode": mode, "date": date}
        return self._pagination("v1/illust/ranking", params)

    def illust_related(self, illust_id):
        params = {"illust_id": illust_id}
        return self._pagination("v2/illust/related", params)

    def search_illust(self, word, sort=None, target=None, duration=None):
        params = {"word": word, "search_target": target,
                  "sort": sort, "duration": duration}
        return self._pagination("v1/search/illust", params)

    def user_bookmarks_illust(self, user_id, tag=None, restrict="public"):
        params = {"user_id": user_id, "tag": tag, "restrict": restrict}
        return self._pagination("v1/user/bookmarks/illust", params)

    def user_detail(self, user_id):
        params = {"user_id": user_id}
        return self._call("v1/user/detail", params)["user"]

    def user_following(self, user_id, restrict="public"):
        params = {"user_id": user_id, "restrict": restrict}
        return self._pagination("v1/user/following", params, "user_previews")

    def user_illusts(self, user_id):
        params = {"user_id": user_id}
        return self._pagination("v1/user/illusts", params)

    def ugoira_metadata(self, illust_id):
        params = {"illust_id": illust_id}
        return self._call("v1/ugoira/metadata", params)["ugoira_metadata"]

    def _call(self, endpoint, params=None):
        url = "https://app-api.pixiv.net/" + endpoint

        self.login()
        response = self.extractor.request(url, params=params, fatal=False)
        data = response.json()

        if "error" in data:
            if response.status_code == 404:
                raise exception.NotFoundError()

            error = data["error"]
            if "rate limit" in (error.get("message") or "").lower():
                self.log.info("Waiting two minutes for API rate limit reset.")
                time.sleep(120)
                return self._call(endpoint, params)
            raise exception.StopExtraction("API request failed: %s", error)

        return data

    def _pagination(self, endpoint, params, key="illusts"):
        while True:
            data = self._call(endpoint, params)
            yield from data[key]

            if not data["next_url"]:
                return
            query = data["next_url"].rpartition("?")[2]
            params = text.parse_query(query)


@cache(maxage=10*365*24*3600, keyarg=0)
def _refresh_token_cache(username):
    return None
-												[pixiv] update to new extractor interface

											
										
										
											2015-04-10 15:29:09 +02:00
+								# -*- coding: utf-8 -*-
-												update extractor test results

											
										
										
											2021-01-21 21:35:42 +01:00
+								# Copyright 2014-2021 Mike Fährmann
-												[pixiv] update to new extractor interface

											
										
										
											2015-04-10 15:29:09 +02:00
+								#
 								# This program is free software; you can redistribute it and/or modify
 								# it under the terms of the GNU General Public License version 2 as
 								# published by the Free Software Foundation.
-												[pixiv] add extractor for 'pixivision' articles (#1672)

											
										
										
											2021-07-07 02:22:44 +02:00
+								"""Extractors for https://www.pixiv.net/"""
-												[pixiv] update to new extractor interface

											
										
										
											2015-04-10 15:29:09 +02:00
-												remove SequentialExtractor class

											
										
										
											2015-10-05 17:15:31 +02:00
+								from .common import Extractor, Message
-												[pixiv] add extractor for 'pixivision' articles (#1672)

											
										
										
											2021-07-07 02:22:44 +02:00
+								from .. import text, util, exception
-												[pixiv] cache login sessions

											
										
										
											2016-03-06 21:00:42 +01:00
+								from ..cache import cache
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								from datetime import datetime, timedelta
-												[pixiv] implement 'avatar' option (#595, #623)

											
										
										
											2020-03-09 21:17:16 +01:00
+								import itertools
-												[pixiv] fix authentication

											
										
										
											2019-09-02 22:34:04 +02:00
+								import hashlib
-												[pixiv] wait and retry after rate limit error (closes #535)

											
										
										
											2019-12-28 22:06:58 +01:00
+								import time
-												initial commit

											
										
										
											2014-10-12 21:56:44 +02:00
-												code adjustments according to pep8 nr2

											
										
										
											2017-02-01 00:53:19 +01:00
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								class PixivExtractor(Extractor):
 								    """Base class for pixiv extractors"""
-												update all other extractors

											
										
										
											2015-11-21 04:26:30 +01:00
+								    category = "pixiv"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    directory_fmt = ("{category}", "{user[id]} {user[account]}")
-												[pixiv] simplify default filename format

(#366)

											
										
										
											2019-08-15 13:32:47 +02:00
+								    filename_fmt = "{id}_p{num}.{extension}"
-												[pixiv] update metadata entries (#366)

- change 'num' to a simple enumerating integer
- change default filename format
- provide content of the old 'num' field as 'suffix'
- add 'filename' for ugoira

											
										
										
											2019-08-04 22:35:56 +02:00
+								    archive_fmt = "{id}{suffix}.{extension}"
-												let extractors opt-out of cookie option usage

useful to avoid sending unnecessary cookies when all authentication
is done through OAuth tokens

											
										
										
											2020-01-01 16:07:23 +01:00
+								    cookiedomain = None
-												initial commit

											
										
										
											2014-10-12 21:56:44 +02:00
-												propagate 'match' to base extractor constructor

											
										
										
											2019-02-11 13:31:10 +01:00
+								    def __init__(self, match):
 								        Extractor.__init__(self, match)
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								        self.api = PixivAppAPI(self)
-												implement and use extractor.config() method

											
										
										
											2017-04-25 17:12:48 +02:00
+								        self.load_ugoira = self.config("ugoira", True)
-												[pixiv] implement 'max-posts' option (#1558)

* implement max-rank for pixiv

* rename to max-posts and make more generic
											
										
										
											2021-05-24 17:49:46 +02:00
+								        self.max_posts = self.config("max-posts", 0)
-												initial commit

											
										
										
											2014-10-12 21:56:44 +02:00
-												[pixiv] update to new extractor interface

											
										
										
											2015-04-10 15:29:09 +02:00
+								    def items(self):
-												[pixiv] change 'translated-tags' option (#1507)

- rename to 'tags'
- use string-values: "japanese", "translated", "noop"
- remove duplicate entries for "translated" tags

											
										
										
											2021-04-27 23:51:37 +02:00
+								        tags = self.config("tags", "japanese")
-												[pixiv] rename "noop" value for 'tags' option to "original"

(#1507)

											
										
										
											2021-05-07 20:41:54 +02:00
+								        if tags == "original":
-												[pixiv] change 'translated-tags' option (#1507)

- rename to 'tags'
- use string-values: "japanese", "translated", "noop"
- remove duplicate entries for "translated" tags

											
										
										
											2021-04-27 23:51:37 +02:00
+								            transform_tags = None
 								        elif tags == "translated":
 								            def transform_tags(work):
 								                work["tags"] = list(set(
 								                    tag["translated_name"] or tag["name"]
 								                    for tag in work["tags"]))
 								        else:
 								            def transform_tags(work):
 								                work["tags"] = [tag["name"] for tag in work["tags"]]
-												[pixiv] add 'rating' metadata field (#595)

A human-friendlier representation of 'x_restrict'

											
										
										
											2020-02-01 01:36:06 +01:00
+								        ratings = {0: "General", 1: "R-18", 2: "R-18G"}
-												[pixiv] reduce calls to '/user/detail'

											
										
										
											2020-02-09 13:54:58 +01:00
+								        metadata = self.metadata()
-												[pixiv] transition to pixiv public api

											
										
										
											2015-05-14 19:08:20 +02:00
-												[pixiv] implement 'max-posts' option (#1558)

* implement max-rank for pixiv

* rename to max-posts and make more generic
											
										
										
											2021-05-24 17:49:46 +02:00
+								        works = self.works()
 								        if self.max_posts:
 								            works = itertools.islice(works, self.max_posts)
 								        for work in works:
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								            if not work["user"]["id"]:
 								                continue
 								            meta_single_page = work["meta_single_page"]
 								            meta_pages = work["meta_pages"]
 								            del work["meta_single_page"]
 								            del work["image_urls"]
 								            del work["meta_pages"]
-												[pixiv] change 'translated-tags' option (#1507)

- rename to 'tags'
- use string-values: "japanese", "translated", "noop"
- remove duplicate entries for "translated" tags

											
										
										
											2021-04-27 23:51:37 +02:00
 								            if transform_tags:
 								                transform_tags(work)
-												[pixiv] update metadata entries (#366)

- change 'num' to a simple enumerating integer
- change default filename format
- provide content of the old 'num' field as 'suffix'
- add 'filename' for ugoira

											
										
										
											2019-08-04 22:35:56 +02:00
+								            work["num"] = 0
-												apply workaround from 4736912 in parse_datetime() itself

											
										
										
											2019-05-09 21:53:17 +02:00
+								            work["date"] = text.parse_datetime(work["create_date"])
-												[pixiv] add 'rating' metadata field (#595)

A human-friendlier representation of 'x_restrict'

											
										
										
											2020-02-01 01:36:06 +01:00
+								            work["rating"] = ratings.get(work["x_restrict"])
-												[pixiv] update metadata entries (#366)

- change 'num' to a simple enumerating integer
- change default filename format
- provide content of the old 'num' field as 'suffix'
- add 'filename' for ugoira

											
										
										
											2019-08-04 22:35:56 +02:00
+								            work["suffix"] = ""
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								            work.update(metadata)
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
-												[pixiv] create directory for each "work" item (#136)

											
										
										
											2018-12-11 20:37:47 +01:00
+								            yield Message.Directory, work
-												[pixiv] transition to pixiv public api

											
										
										
											2015-05-14 19:08:20 +02:00
+								            if work["type"] == "ugoira":
-												[pixiv] add option to skip ugoiras

											
										
										
											2016-07-20 14:19:46 +02:00
+								                if not self.load_ugoira:
 								                    continue
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								                ugoira = self.api.ugoira_metadata(work["id"])
 								                url = ugoira["zip_urls"]["medium"].replace(
 								                    "_ugoira600x600", "_ugoira1920x1080")
-												[pixiv] rework ugoira handling

Frame information now gets attached to the ZIP file's keyword dict
instead of being written to a separate text file.

											
										
										
											2018-06-18 17:25:52 +02:00
+								                work["frames"] = ugoira["frames"]
-												[downloader:http] disable filename extension changes for ugoira

(#1507)

											
										
										
											2021-04-27 00:48:53 +02:00
+								                work["_http_adjust_extension"] = False
-												[pixiv] update metadata entries (#366)

- change 'num' to a simple enumerating integer
- change default filename format
- provide content of the old 'num' field as 'suffix'
- add 'filename' for ugoira

											
										
										
											2019-08-04 22:35:56 +02:00
+								                yield Message.Url, url, text.nameext_from_url(url, work)
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
-												[pixiv] transition to pixiv public api

											
										
										
											2015-05-14 19:08:20 +02:00
+								            elif work["page_count"] == 1:
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								                url = meta_single_page["original_image_url"]
-												[pixiv] provide 'filename' and change default filename format

to '{filename}.{extension}' (closes #366)

											
										
										
											2019-08-02 22:35:10 +02:00
+								                yield Message.Url, url, text.nameext_from_url(url, work)
-												initial commit

											
										
										
											2014-10-12 21:56:44 +02:00
 								            else:
-												[pixiv] update metadata entries (#366)

- change 'num' to a simple enumerating integer
- change default filename format
- provide content of the old 'num' field as 'suffix'
- add 'filename' for ugoira

											
										
										
											2019-08-04 22:35:56 +02:00
+								                for work["num"], img in enumerate(meta_pages):
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								                    url = img["image_urls"]["original"]
-												[pixiv] update metadata entries (#366)

- change 'num' to a simple enumerating integer
- change default filename format
- provide content of the old 'num' field as 'suffix'
- add 'filename' for ugoira

											
										
										
											2019-08-04 22:35:56 +02:00
+								                    work["suffix"] = "_p{:02}".format(work["num"])
-												[pixiv] provide 'filename' and change default filename format

to '{filename}.{extension}' (closes #366)

											
										
										
											2019-08-02 22:35:10 +02:00
+								                    yield Message.Url, url, text.nameext_from_url(url, work)
-												[pixiv] transition to pixiv public api

											
										
										
											2015-05-14 19:08:20 +02:00
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								    def works(self):
-												[pixiv] update archive IDs and add metadata-fields

(Pixiv bookmarks actually have their own IDs, comments and tags,
independent of the bookmarked image, which makes creating an
archive ID a lot easier)

											
										
										
											2018-03-02 16:11:53 +01:00
+								        """Return an iterable containing all relevant 'work'-objects"""
-												initial commit

											
										
										
											2014-10-12 21:56:44 +02:00
-												[pixiv] reduce calls to '/user/detail'

											
										
										
											2020-02-09 13:54:58 +01:00
+								    def metadata(self):
-												[pixiv] update to new extractor interface

											
										
										
											2015-04-10 15:29:09 +02:00
+								        """Collect metadata for extractor-job"""
-												[pixiv] reduce calls to '/user/detail'

											
										
										
											2020-02-09 13:54:58 +01:00
+								        return {}
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
 								class PixivUserExtractor(PixivExtractor):
 								    """Extractor for works of a pixiv-user"""
 								    subcategory = "user"
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								    pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
 								               r"(?:en/)?users/(\d+)(?:/(?:artworks|illustrations|manga)"
-												remove '&' from URL patterns

'/?&#' -> '/?#' and '?&#' -> '?#'

According to https://www.ietf.org/rfc/rfc3986.txt, URLs are
"organized hierarchically" by using "the slash ("/"), question
mark ("?"), and number sign ("#") characters to delimit components"

											
										
										
											2020-10-22 23:12:59 +02:00
+								               r"(?:/([^/?#]+))?)?/?(?:$|[?#])"
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								               r"|member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								               r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))")
 								    test = (
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        ("https://www.pixiv.net/en/users/173530/artworks", {
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								            "url": "852c31ad83b6840bacbce824d85f2a997889efb7",
 								        }),
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        # illusts with specific tag
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        (("https://www.pixiv.net/en/users/173530/artworks"
 								          "/%E6%89%8B%E3%81%B6%E3%82%8D"), {
 								            "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
 								        }),
-												[pixiv] unquote tags

											
										
										
											2017-07-12 08:21:29 +02:00
+								        (("https://www.pixiv.net/member_illust.php?id=173530"
 								          "&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), {
 								            "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								        }),
-												[pixiv] implement 'avatar' option (#595, #623)

											
										
										
											2020-03-09 21:17:16 +01:00
+								        # avatar (#595, 623)
 								        ("https://www.pixiv.net/en/users/173530", {
 								            "options": (("avatar", True),),
-												update extractor test results

											
										
										
											2020-10-11 18:12:40 +02:00
+								            "content": "4e57544480cc2036ea9608103e8f024fa737fe66",
-												[pixiv] implement 'avatar' option (#595, #623)

											
										
										
											2020-03-09 21:17:16 +01:00
+								            "range": "1",
 								        }),
 								        # deleted account
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								        ("http://www.pixiv.net/member_illust.php?id=173531", {
-												[pixiv] add 'metadata' option (#1551)

											
										
										
											2021-05-14 20:30:28 +02:00
+								            "options": (("metadata", True),),
 								            "exception": exception.NotFoundError,
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								        }),
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        ("https://www.pixiv.net/en/users/173530"),
 								        ("https://www.pixiv.net/en/users/173530/manga"),
 								        ("https://www.pixiv.net/en/users/173530/illustrations"),
 								        ("https://www.pixiv.net/member_illust.php?id=173530"),
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								        ("https://www.pixiv.net/u/173530"),
 								        ("https://www.pixiv.net/user/173530"),
 								        ("https://www.pixiv.net/mypage.php#id=173530"),
 								        ("https://www.pixiv.net/#id=173530"),
 								        ("https://touch.pixiv.net/member_illust.php?id=173530"),
 								    )
-												[pixiv] update to new extractor interface

											
										
										
											2015-04-10 15:29:09 +02:00
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								    def __init__(self, match):
-												propagate 'match' to base extractor constructor

											
										
										
											2019-02-11 13:31:10 +01:00
+								        PixivExtractor.__init__(self, match)
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        u1, t1, u2, t2, u3 = match.groups()
 								        if t1:
 								            t1 = text.unquote(t1)
 								        elif t2:
 								            t2 = text.parse_query(t2).get("tag")
 								        self.user_id = u1 or u2 or u3
 								        self.tag = t1 or t2
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
-												[pixiv] add 'metadata' option (#1551)

											
										
										
											2021-05-14 20:30:28 +02:00
+								    def metadata(self):
 								        if self.config("metadata"):
 								            return {"user": self.api.user_detail(self.user_id)}
 								        return {}
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								    def works(self):
-												[pixiv] respect more query parameters for user URLs

The API endpoint responsible for user illustrations does not
provide sufficient filter capabilities* to match the actual
website, so we are spinning our own filters.

Respected parameters are
    'type': illust, manga, ugoira
    'tag' : any image tag (this was already supported)
    'p'   : the page to start on

*
- API can filter for illustrations and manga, but not for ugoira.
- 'offset' is applied before filtering
- no 'tag' filter

											
										
										
											2018-05-18 15:30:06 +02:00
+								        works = self.api.user_illusts(self.user_id)
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        if self.tag:
 								            tag = self.tag.lower()
-												[pixiv] remove 'type' and 'page' query parameter handling

The "new and improved" /member_illust.php and /bookmark.php listings
don't quite work with how things were.

											
										
										
											2018-10-03 16:08:36 +02:00
+								            works = (
 								                work for work in works
 								                if tag in [t["name"].lower() for t in work["tags"]]
 								            )
-												[pixiv] respect more query parameters for user URLs

The API endpoint responsible for user illustrations does not
provide sufficient filter capabilities* to match the actual
website, so we are spinning our own filters.

Respected parameters are
    'type': illust, manga, ugoira
    'tag' : any image tag (this was already supported)
    'p'   : the page to start on

*
- API can filter for illustrations and manga, but not for ugoira.
- 'offset' is applied before filtering
- no 'tag' filter

											
										
										
											2018-05-18 15:30:06 +02:00
-												[pixiv] implement 'avatar' option (#595, #623)

											
										
										
											2020-03-09 21:17:16 +01:00
+								        if self.config("avatar"):
 								            user = self.api.user_detail(self.user_id)
 								            url = user["profile_image_urls"]["medium"].replace("_170.", ".")
 								            avatar = {
 								                "create_date"     : None,
 								                "height"          : 0,
 								                "id"              : "avatar",
 								                "image_urls"      : None,
 								                "meta_pages"      : (),
 								                "meta_single_page": {"original_image_url": url},
 								                "page_count"      : 1,
 								                "sanity_level"    : 0,
 								                "tags"            : (),
 								                "title"           : "avatar",
 								                "type"            : "avatar",
 								                "user"            : user,
 								                "width"           : 0,
 								                "x_restrict"      : 0,
 								            }
 								            works = itertools.chain((avatar,), works)
-												[pixiv] respect more query parameters for user URLs

The API endpoint responsible for user illustrations does not
provide sufficient filter capabilities* to match the actual
website, so we are spinning our own filters.

Respected parameters are
    'type': illust, manga, ugoira
    'tag' : any image tag (this was already supported)
    'p'   : the page to start on

*
- API can filter for illustrations and manga, but not for ugoira.
- 'offset' is applied before filtering
- no 'tag' filter

											
										
										
											2018-05-18 15:30:06 +02:00
+								        return works
-												[pixiv] support pixiv.me URLs (#23)

											
										
										
											2017-06-25 20:18:27 +02:00
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
-												[pixiv] support pixiv.me URLs (#23)

											
										
										
											2017-06-25 20:18:27 +02:00
+								class PixivMeExtractor(PixivExtractor):
 								    """Extractor for pixiv.me URLs"""
 								    subcategory = "me"
-												remove '&' from URL patterns

'/?&#' -> '/?#' and '?&#' -> '?#'

According to https://www.ietf.org/rfc/rfc3986.txt, URLs are
"organized hierarchically" by using "the slash ("/"), question
mark ("?"), and number sign ("#") characters to delimit components"

											
										
										
											2020-10-22 23:12:59 +02:00
+								    pattern = r"(?:https?://)?pixiv\.me/([^/?#]+)"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    test = (
-												[pixiv] support pixiv.me URLs (#23)

											
										
										
											2017-06-25 20:18:27 +02:00
+								        ("https://pixiv.me/del_shannon", {
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								            "url": "29c295ce75150177e6b0a09089a949804c708fbf",
-												[pixiv] support pixiv.me URLs (#23)

											
										
										
											2017-06-25 20:18:27 +02:00
+								        }),
 								        ("https://pixiv.me/del_shanno", {
 								            "exception": exception.NotFoundError,
 								        }),
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    )
-												[pixiv] support pixiv.me URLs (#23)

											
										
										
											2017-06-25 20:18:27 +02:00
 								    def __init__(self, match):
-												propagate 'match' to base extractor constructor

											
										
										
											2019-02-11 13:31:10 +01:00
+								        PixivExtractor.__init__(self, match)
-												[pixiv] support pixiv.me URLs (#23)

											
										
										
											2017-06-25 20:18:27 +02:00
+								        self.account = match.group(1)
 								    def items(self):
-												use 'extractor.request()' for more HTTP requests

											
										
										
											2018-06-25 22:39:43 +02:00
+								        url = "https://pixiv.me/" + self.account
-												add '_extractor' information to redirect results

											
										
										
											2019-12-29 23:37:34 +01:00
+								        data = {"_extractor": PixivUserExtractor}
-												use 'extractor.request()' for more HTTP requests

											
										
										
											2018-06-25 22:39:43 +02:00
+								        response = self.request(
-												replace extractor.request() 'expect' argument

with
- 'fatal': allow 4xx status codes
- 'notfound': raise NotFoundError on 404

											
										
										
											2019-07-04 23:45:26 +02:00
+								            url, method="HEAD", allow_redirects=False, notfound="user")
-												add '_extractor' information to redirect results

											
										
										
											2019-12-29 23:37:34 +01:00
+								        yield Message.Queue, response.headers["Location"], data
-												[pixiv] support pixiv.me URLs (#23)

											
										
										
											2017-06-25 20:18:27 +02:00
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								class PixivWorkExtractor(PixivExtractor):
-												consistent extractor naming scheme + docstrings

											
										
										
											2016-09-12 10:20:57 +02:00
+								    """Extractor for a single pixiv work/illustration"""
-												add subcategories to extractors

											
										
										
											2015-11-30 01:11:13 +01:00
+								    subcategory = "work"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    pattern = (r"(?:https?://)?(?:(?:www\.|touch\.)?pixiv\.net"
-												[pixiv] match '/artworks/' URLs

											
										
										
											2019-09-24 21:42:31 +02:00
+								               r"/(?:(?:en/)?artworks/"
 								               r"|member_illust\.php\?(?:[^&]+&)*illust_id=)(\d+)"
-												rewrite URL patterns to use only 1 per extractor

											
										
										
											2019-02-08 12:03:10 +01:00
+								               r"|(?:i(?:\d+\.pixiv|\.pximg)\.net"
 								               r"/(?:(?:.*/)?img-[^/]+/img/\d{4}(?:/\d\d){5}|img\d+/img/[^/]+)"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								               r"|img\d*\.pixiv\.net/img/[^/]+|(?:www\.)?pixiv\.net/i)/(\d+))")
 								    test = (
-												[pixiv] match '/artworks/' URLs

											
										
										
											2019-09-24 21:42:31 +02:00
+								        ("https://www.pixiv.net/artworks/966412", {
-												update unit test results

											
										
										
											2017-04-14 14:40:36 +02:00
+								            "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
-												[pixiv] match direct-links to images

											
										
										
											2016-08-25 20:10:02 +02:00
+								            "content": "69a8edfb717400d1c2e146ab2b30d2c235440c5a",
 								        }),
-												code adjustments according to pep8 nr2

											
										
										
											2017-02-01 00:53:19 +01:00
+								        (("http://www.pixiv.net/member_illust.php"
 								          "?mode=medium&illust_id=966411"), {
-												add a few tests expecting exceptions

											
										
										
											2016-12-30 01:46:42 +01:00
+								            "exception": exception.NotFoundError,
 								        }),
-												[pixiv] fix ugoira extraction (closes #78)

											
										
										
											2018-02-19 08:51:09 +01:00
+								        # ugoira
 								        (("https://www.pixiv.net/member_illust.php"
 								          "?mode=medium&illust_id=66806629"), {
-												[pixiv] fix ugoira test

											
										
										
											2018-06-18 19:22:54 +02:00
+								            "url": "7267695a985c4db8759bebcf8d21dbdd2d2317ef",
 								            "keywords": {"frames": list},
-												[pixiv] fix ugoira extraction (closes #78)

											
										
										
											2018-02-19 08:51:09 +01:00
+								        }),
-												[pixiv] add 'related' option (#1237)

											
										
										
											2021-01-17 16:37:07 +01:00
+								        # related works (#1237)
 								        ("https://www.pixiv.net/artworks/966412", {
 								            "options": (("related", True),),
-												update extractor test results

											
										
										
											2021-01-21 21:35:42 +01:00
+								            "range": "1-10",
-												[pixiv] add 'related' option (#1237)

											
										
										
											2021-01-17 16:37:07 +01:00
+								            "count": ">= 10",
 								        }),
-												[pixiv] match '/artworks/' URLs

											
										
										
											2019-09-24 21:42:31 +02:00
+								        ("https://www.pixiv.net/en/artworks/966412"),
 								        ("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=96641"),
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								        ("http://i1.pixiv.net/c/600x600/img-master"
 								         "/img/2008/06/13/00/29/13/966412_p0_master1200.jpg"),
 								        ("https://i.pximg.net/img-original"
 								         "/img/2017/04/25/07/33/29/62568267_p0.png"),
 								        ("https://www.pixiv.net/i/966412"),
 								        ("http://img.pixiv.net/img/soundcross/42626136.jpg"),
 								        ("http://i2.pixiv.net/img76/img/snailrin/42672235.jpg"),
 								    )
-												[pixiv] add single work/illust extractor

											
										
										
											2015-11-22 02:21:02 +01:00
 								    def __init__(self, match):
-												propagate 'match' to base extractor constructor

											
										
										
											2019-02-11 13:31:10 +01:00
+								        PixivExtractor.__init__(self, match)
-												rewrite URL patterns to use only 1 per extractor

											
										
										
											2019-02-08 12:03:10 +01:00
+								        self.illust_id = match.group(1) or match.group(2)
-												[pixiv] add single work/illust extractor

											
										
										
											2015-11-22 02:21:02 +01:00
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								    def works(self):
-												[pixiv] add 'related' option (#1237)

											
										
										
											2021-01-17 16:37:07 +01:00
+								        works = (self.api.illust_detail(self.illust_id),)
 								        if self.config("related", False):
 								            related = self.api.illust_related(self.illust_id)
 								            works = itertools.chain(works, related)
 								        return works
-												[pixiv] add single work/illust extractor

											
										
										
											2015-11-22 02:21:02 +01:00
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								class PixivFavoriteExtractor(PixivExtractor):
-												consistent extractor naming scheme + docstrings

											
										
										
											2016-09-12 10:20:57 +02:00
+								    """Extractor for all favorites/bookmarks of a pixiv-user"""
-												add subcategories to extractors

											
										
										
											2015-11-30 01:11:13 +01:00
+								    subcategory = "favorite"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    directory_fmt = ("{category}", "bookmarks",
 								                     "{user_bookmark[id]} {user_bookmark[account]}")
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}"
-												[pixiv] handle tags at the end of new bookmark URLs

											
										
										
											2020-02-06 23:42:13 +01:00
+								    pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:(?:en/)?"
-												fix PixivFavoriteExtractor regex (#1405)

* fix PixivFavoriteExtractor regex

* do not use lookbehind
											
										
										
											2021-03-25 14:59:33 +01:00
+								               r"users/(\d+)/(bookmarks/artworks|following)(?:/([^/?#]+))?"
 								               r"|bookmark\.php)(?:\?([^#]*))?")
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    test = (
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        ("https://www.pixiv.net/en/users/173530/bookmarks/artworks", {
-												update extractor test results

											
										
										
											2020-10-11 18:12:40 +02:00
+								            "url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        }),
-												[pixiv] support mobile URLs (https://touch.pixiv.net/)

											
										
										
											2017-10-17 16:49:42 +02:00
+								        ("https://www.pixiv.net/bookmark.php?id=173530", {
-												update extractor test results

											
										
										
											2020-10-11 18:12:40 +02:00
+								            "url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
-												[pixiv] support mobile URLs (https://touch.pixiv.net/)

											
										
										
											2017-10-17 16:49:42 +02:00
+								        }),
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        # bookmarks with specific tag
-												[pixiv] handle tags at the end of new bookmark URLs

											
										
										
											2020-02-06 23:42:13 +01:00
+								        (("https://www.pixiv.net/en/users/3137110"
 								          "/bookmarks/artworks/%E3%81%AF%E3%82%93%E3%82%82%E3%82%93"), {
 								            "url": "379b28275f786d946e01f721e54afe346c148a8c",
 								        }),
 								        # bookmarks with specific tag (legacy url)
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        (("https://www.pixiv.net/bookmark.php?id=3137110"
 								          "&tag=%E3%81%AF%E3%82%93%E3%82%82%E3%82%93&p=1"), {
-												[pixiv] handle tags at the end of new bookmark URLs

											
										
										
											2020-02-06 23:42:13 +01:00
+								            "url": "379b28275f786d946e01f721e54afe346c148a8c",
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        }),
 								        # own bookmarks
 								        ("https://www.pixiv.net/bookmark.php", {
 								            "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
 								        }),
-												[pixiv] fix user id for bookmarks API calls (closes #596)

											
										
										
											2020-02-01 01:44:21 +01:00
+								        # own bookmarks with tag (#596)
 								        ("https://www.pixiv.net/bookmark.php?tag=foobar", {
 								            "count": 0,
 								        }),
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
+								        # followed users (#515)
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        ("https://www.pixiv.net/en/users/173530/following", {
 								            "pattern": PixivUserExtractor.pattern,
 								            "count": ">= 12",
 								        }),
-												[pixiv] handle tags at the end of new bookmark URLs

											
										
										
											2020-02-06 23:42:13 +01:00
+								        # followed users (legacy url) (#515)
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
+								        ("https://www.pixiv.net/bookmark.php?id=173530&type=user", {
 								            "pattern": PixivUserExtractor.pattern,
 								            "count": ">= 12",
 								        }),
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        # touch URLs
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								        ("https://touch.pixiv.net/bookmark.php?id=173530"),
 								        ("https://touch.pixiv.net/bookmark.php"),
 								    )
-												[pixiv] add user-favorite extractor

											
										
										
											2015-11-23 00:15:43 +01:00
 								    def __init__(self, match):
-												[pixiv] handle tags at the end of new bookmark URLs

											
										
										
											2020-02-06 23:42:13 +01:00
+								        uid, kind, self.tag, query = match.groups()
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
+								        query = text.parse_query(query)
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
+								        if not uid:
 								            uid = query.get("id")
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								            if not uid:
 								                self.subcategory = "bookmark"
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
 								        if kind == "following" or query.get("type") == "user":
 								            self.subcategory = "following"
 								            self.items = self._items_following
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
-												propagate 'match' to base extractor constructor

											
										
										
											2019-02-11 13:31:10 +01:00
+								        PixivExtractor.__init__(self, match)
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
+								        self.query = query
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        self.user_id = uid
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
 								    def works(self):
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        tag = None
 								        if "tag" in self.query:
 								            tag = text.unquote(self.query["tag"])
-												[pixiv] handle tags at the end of new bookmark URLs

											
										
										
											2020-02-06 23:42:13 +01:00
+								        elif self.tag:
 								            tag = text.unquote(self.tag)
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
+								        restrict = "public"
 								        if self.query.get("rest") == "hide":
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								            restrict = "private"
-												[pixiv] add user-favorite extractor

											
										
										
											2015-11-23 00:15:43 +01:00
-												[pixiv] remove 'type' and 'page' query parameter handling

The "new and improved" /member_illust.php and /bookmark.php listings
don't quite work with how things were.

											
										
										
											2018-10-03 16:08:36 +02:00
+								        return self.api.user_bookmarks_illust(self.user_id, tag, restrict)
-												[pixiv] add bookmark extractor

											
										
										
											2015-11-23 02:58:31 +01:00
-												[pixiv] reduce calls to '/user/detail'

											
										
										
											2020-02-09 13:54:58 +01:00
+								    def metadata(self):
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        if self.user_id:
 								            user = self.api.user_detail(self.user_id)
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        else:
 								            self.api.login()
 								            user = self.api.user
 								        self.user_id = user["id"]
 								        return {"user_bookmark": user}
-												[pixiv] refresh access-token every 50 minutes

											
										
										
											2015-10-06 17:19:54 +02:00
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
+								    def _items_following(self):
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
+								        restrict = "public"
 								        if self.query.get("rest") == "hide":
 								            restrict = "private"
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
+								        for preview in self.api.user_following(self.user_id, restrict):
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
+								            user = preview["user"]
 								            user["_extractor"] = PixivUserExtractor
-												[pixiv] update URLs of followed users to the new format

											
										
										
											2020-01-29 22:54:31 +01:00
+								            url = "https://www.pixiv.net/users/{}".format(user["id"])
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
+								            yield Message.Queue, url, user
-												code adjustments according to pep8 nr2

											
										
										
											2017-02-01 00:53:19 +01:00
-												[pixiv] add extractor for ranking lists

											
										
										
											2017-08-20 20:21:52 +02:00
+								class PixivRankingExtractor(PixivExtractor):
 								    """Extractor for pixiv ranking pages"""
 								    subcategory = "ranking"
-												[pixiv] update archive IDs and add metadata-fields

(Pixiv bookmarks actually have their own IDs, comments and tags,
independent of the bookmarked image, which makes creating an
archive ID a lot easier)

											
										
										
											2018-03-02 16:11:53 +01:00
+								    archive_fmt = "r_{ranking[mode]}_{ranking[date]}_{id}{num}.{extension}"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    directory_fmt = ("{category}", "rankings",
 								                     "{ranking[mode]}", "{ranking[date]}")
 								    pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
 								               r"/ranking\.php(?:\?([^#]*))?")
 								    test = (
 								        ("https://www.pixiv.net/ranking.php?mode=daily&date=20170818"),
 								        ("https://www.pixiv.net/ranking.php"),
 								        ("https://touch.pixiv.net/ranking.php"),
 								    )
-												[pixiv] add extractor for ranking lists

											
										
										
											2017-08-20 20:21:52 +02:00
 								    def __init__(self, match):
-												propagate 'match' to base extractor constructor

											
										
										
											2019-02-11 13:31:10 +01:00
+								        PixivExtractor.__init__(self, match)
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								        self.query = match.group(1)
 								        self.mode = self.date = None
-												[pixiv] add extractor for ranking lists

											
										
										
											2017-08-20 20:21:52 +02:00
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								    def works(self):
 								        return self.api.illust_ranking(self.mode, self.date)
-												[pixiv] reduce calls to '/user/detail'

											
										
										
											2020-02-09 13:54:58 +01:00
+								    def metadata(self):
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								        query = text.parse_query(self.query)
 								        mode = query.get("mode", "daily").lower()
 								        mode_map = {
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								            "daily": "day",
 								            "daily_r18": "day_r18",
 								            "weekly": "week",
 								            "weekly_r18": "week_r18",
 								            "monthly": "month",
 								            "male": "day_male",
 								            "male_r18": "day_male_r18",
 								            "female": "day_female",
 								            "female_r18": "day_female_r18",
 								            "original": "week_original",
 								            "rookie": "week_rookie",
 								            "r18g": "week_r18g",
-												[pixiv] update archive IDs and add metadata-fields

(Pixiv bookmarks actually have their own IDs, comments and tags,
independent of the bookmarked image, which makes creating an
archive ID a lot easier)

											
										
										
											2018-03-02 16:11:53 +01:00
+								        }
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								        if mode not in mode_map:
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								            self.log.warning("invalid mode '%s'", mode)
 								            mode = "daily"
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								        self.mode = mode_map[mode]
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								        date = query.get("date")
 								        if date:
 								            if len(date) == 8 and date.isdecimal():
 								                date = "{}-{}-{}".format(date[0:4], date[4:6], date[6:8])
 								            else:
 								                self.log.warning("invalid date '%s'", date)
 								                date = None
 								        if not date:
 								            date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")
 								        self.date = date
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								        return {"ranking": {
 								            "mode": mode,
 								            "date": self.date,
 								        }}
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
+								class PixivSearchExtractor(PixivExtractor):
 								    """Extractor for pixiv search results"""
 								    subcategory = "search"
 								    archive_fmt = "s_{search[word]}_{id}{num}.{extension}"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    directory_fmt = ("{category}", "search", "{search[word]}")
 								    pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
-												remove '&' from URL patterns

'/?&#' -> '/?#' and '?&#' -> '?#'

According to https://www.ietf.org/rfc/rfc3986.txt, URLs are
"organized hierarchically" by using "the slash ("/"), question
mark ("?"), and number sign ("#") characters to delimit components"

											
										
										
											2020-10-22 23:12:59 +02:00
+								               r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?"
-												[pixiv] match new search URLs (closes #507)

											
										
										
											2019-12-06 18:10:22 +01:00
+								               r"|search\.php)(?:\?([^#]+))?")
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    test = (
-												[pixiv] match new search URLs (closes #507)

											
										
										
											2019-12-06 18:10:22 +01:00
+								        ("https://www.pixiv.net/en/tags/Original", {
 								            "range": "1-10",
 								            "count": 10,
 								        }),
 								        ("https://www.pixiv.net/en/tags/foo/artworks?order=date&s_mode=s_tag"),
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								        ("https://www.pixiv.net/search.php?s_mode=s_tag&word=Original"),
 								        ("https://touch.pixiv.net/search.php?word=Original"),
 								    )
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
 								    def __init__(self, match):
-												propagate 'match' to base extractor constructor

											
										
										
											2019-02-11 13:31:10 +01:00
+								        PixivExtractor.__init__(self, match)
-												[pixiv] match new search URLs (closes #507)

											
										
										
											2019-12-06 18:10:22 +01:00
+								        self.word, self.query = match.groups()
 								        self.sort = self.target = None
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
 								    def works(self):
 								        return self.api.search_illust(self.word, self.sort, self.target)
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
-												[pixiv] reduce calls to '/user/detail'

											
										
										
											2020-02-09 13:54:58 +01:00
+								    def metadata(self):
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								        query = text.parse_query(self.query)
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
-												[pixiv] match new search URLs (closes #507)

											
										
										
											2019-12-06 18:10:22 +01:00
+								        if self.word:
 								            self.word = text.unquote(self.word)
 								        else:
 								            if "word" not in query:
 								                raise exception.StopExtraction("Missing search term")
 								            self.word = query["word"]
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
 								        sort = query.get("order", "date_d")
 								        sort_map = {
 								            "date": "date_asc",
 								            "date_d": "date_desc",
 								        }
 								        if sort not in sort_map:
 								            self.log.warning("invalid sort order '%s'", sort)
 								            sort = "date_d"
 								        self.sort = sort_map[sort]
 								        target = query.get("s_mode", "s_tag")
 								        target_map = {
 								            "s_tag": "partial_match_for_tags",
 								            "s_tag_full": "exact_match_for_tags",
 								            "s_tc": "title_and_caption",
 								        }
 								        if target not in target_map:
 								            self.log.warning("invalid search target '%s'", target)
 								            target = "s_tag"
 								        self.target = target_map[target]
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								        return {"search": {
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
+								            "word": self.word,
 								            "sort": self.sort,
 								            "target": self.target,
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								        }}
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
-												[pixiv] add extractor for illusts from followed users

											
										
										
											2018-05-15 13:02:49 +02:00
+								class PixivFollowExtractor(PixivExtractor):
 								    """Extractor for new illustrations from your followed artists"""
 								    subcategory = "follow"
 								    archive_fmt = "F_{user_follow[id]}_{id}{num}.{extension}"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    directory_fmt = ("{category}", "following")
 								    pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
 								               r"/bookmark_new_illust\.php")
 								    test = (
 								        ("https://www.pixiv.net/bookmark_new_illust.php"),
 								        ("https://touch.pixiv.net/bookmark_new_illust.php"),
 								    )
-												[pixiv] add extractor for illusts from followed users

											
										
										
											2018-05-15 13:02:49 +02:00
 								    def works(self):
 								        return self.api.illust_follow()
-												[pixiv] reduce calls to '/user/detail'

											
										
										
											2020-02-09 13:54:58 +01:00
+								    def metadata(self):
-												[pixiv] add extractor for illusts from followed users

											
										
										
											2018-05-15 13:02:49 +02:00
+								        self.api.login()
 								        return {"user_follow": self.api.user}
-												[pixiv] add extractor for 'pixivision' articles (#1672)

											
										
										
											2021-07-07 02:22:44 +02:00
+								class PixivPixivisionExtractor(PixivExtractor):
 								    """Extractor for illustrations from a pixivision article"""
 								    subcategory = "pixivision"
 								    directory_fmt = ("{category}", "pixivision",
 								                     "{pixivision_id} {pixivision_title}")
 								    archive_fmt = "V{pixivision_id}_{id}{suffix}.{extension}"
 								    pattern = r"(?:https?://)?(?:www\.)?pixivision\.net/(?:en/)?a/(\d+)"
 								    test = (
 								        ("https://www.pixivision.net/en/a/2791"),
 								        ("https://pixivision.net/a/2791", {
 								            "count": 7,
 								            "keyword": {
 								                "pixivision_id": "2791",
 								                "pixivision_title": "What's your favorite music? Editor’s "
 								                                    "picks featuring: “CD Covers”!",
 								            },
 								        }),
 								    )
 								    def __init__(self, match):
 								        PixivExtractor.__init__(self, match)
 								        self.pixivision_id = match.group(1)
 								    def works(self):
 								        return (
 								            self.api.illust_detail(illust_id)
 								            for illust_id in util.unique_sequence(text.extract_iter(
 								                self.page, '<a href="https://www.pixiv.net/en/artworks/', '"'))
 								        )
 								    def metadata(self):
 								        url = "https://www.pixivision.net/en/a/" + self.pixivision_id
 								        headers = {"User-Agent": "Mozilla/5.0"}
 								        self.page = self.request(url, headers=headers).text
-												[pixiv] fix pixivision title extraction

											
										
										
											2021-09-02 22:33:45 +02:00
+								        title = text.extract(self.page, '<title>', '<')[0]
-												[pixiv] add extractor for 'pixivision' articles (#1672)

											
										
										
											2021-07-07 02:22:44 +02:00
+								        return {
 								            "pixivision_id"   : self.pixivision_id,
 								            "pixivision_title": text.unescape(title),
 								        }
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								class PixivAppAPI():
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    """Minimal interface for the Pixiv App API for mobile devices
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    For a more complete implementation or documentation, see
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								    - https://github.com/upbit/pixivpy
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    - https://gist.github.com/ZipFile/3ba99b47162c23f8aea5d5942bb557b1
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								    """
 								    CLIENT_ID = "MOBrBDS8blbauoSck0ZfDbtuzpyT"
 								    CLIENT_SECRET = "lsACyCD94FhDUtGTXi3QzcFE2uU1hqtDaKeqrdwj"
-												[pixiv] fix authentication

											
										
										
											2019-09-02 22:34:04 +02:00
+								    HASH_SECRET = ("28c1fdd170a5204386cb1313c7077b34"
 								                   "f83e4aaf4aa829ce78c231e05b0bae2c")
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
 								    def __init__(self, extractor):
-												use extractor.request for all other API calls

- deviantart
- pawoo
- pixiv
- reddit

											
										
										
											2018-12-22 14:40:35 +01:00
+								        self.extractor = extractor
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								        self.log = extractor.log
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								        self.username = extractor._get_auth_info()[0]
-												[pixiv] add extractor for illusts from followed users

											
										
										
											2018-05-15 13:02:49 +02:00
+								        self.user = None
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								        extractor.session.headers.update({
 								            "App-OS"        : "ios",
 								            "App-OS-Version": "13.1.2",
 								            "App-Version"   : "7.7.6",
 								            "User-Agent"    : "PixivIOSApp/7.7.6 (iOS 13.1.2; iPhone11,8)",
 								            "Referer"       : "https://app-api.pixiv.net/",
 								        })
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								        self.client_id = extractor.config(
 								            "client-id", self.CLIENT_ID)
 								        self.client_secret = extractor.config(
 								            "client-secret", self.CLIENT_SECRET)
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
 								        token = extractor.config("refresh-token")
 								        if token is None or token == "cache":
 								            token = _refresh_token_cache(self.username)
 								        self.refresh_token = token
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    def login(self):
 								        """Login and gain an access token"""
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								        self.user, auth = self._login_impl(self.username)
-												use extractor.request for all other API calls

- deviantart
- pawoo
- pixiv
- reddit

											
										
										
											2018-12-22 14:40:35 +01:00
+								        self.extractor.session.headers["Authorization"] = auth
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
-												adjust cache maxage values

											
										
										
											2019-03-14 22:21:49 +01:00
+								    @cache(maxage=3600, keyarg=1)
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								    def _login_impl(self, username):
 								        if not self.refresh_token:
-												raise error when required username or password are missing

do not try to login as 'None' (#1192)

											
										
										
											2020-12-22 14:40:18 +01:00
+								            raise exception.AuthenticationError(
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								                "'refresh-token' required.\n"
 								                "Run `gallery-dl oauth:pixiv` to get one.")
-												raise error when required username or password are missing

do not try to login as 'None' (#1192)

											
										
										
											2020-12-22 14:40:18 +01:00
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								        self.log.info("Refreshing access token")
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								        url = "https://oauth.secure.pixiv.net/auth/token"
 								        data = {
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								            "client_id"     : self.client_id,
 								            "client_secret" : self.client_secret,
 								            "grant_type"    : "refresh_token",
 								            "refresh_token" : self.refresh_token,
 								            "get_secure_url": "1",
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								        }
-												[pixiv] fix authentication

											
										
										
											2019-09-02 22:34:04 +02:00
+								        time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00")
 								        headers = {
 								            "X-Client-Time": time,
 								            "X-Client-Hash": hashlib.md5(
 								                (time + self.HASH_SECRET).encode()).hexdigest(),
 								        }
-												use extractor.request for all other API calls

- deviantart
- pawoo
- pixiv
- reddit

											
										
										
											2018-12-22 14:40:35 +01:00
+								        response = self.extractor.request(
-												[pixiv] fix authentication

											
										
										
											2019-09-02 22:34:04 +02:00
+								            url, method="POST", headers=headers, data=data, fatal=False)
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								        if response.status_code >= 400:
-												[pixiv] output debug message on failed login attempt

(#1192)

											
										
										
											2020-12-22 14:59:31 +01:00
+								            self.log.debug(response.text)
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								            raise exception.AuthenticationError("Invalid refresh token")
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
 								        data = response.json()["response"]
 								        return data["user"], "Bearer " + data["access_token"]
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    def illust_detail(self, illust_id):
 								        params = {"illust_id": illust_id}
 								        return self._call("v1/illust/detail", params)["illust"]
-												[pixiv] add extractor for illusts from followed users

											
										
										
											2018-05-15 13:02:49 +02:00
+								    def illust_follow(self, restrict="all"):
 								        params = {"restrict": restrict}
 								        return self._pagination("v2/illust/follow", params)
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    def illust_ranking(self, mode="day", date=None):
 								        params = {"mode": mode, "date": date}
 								        return self._pagination("v1/illust/ranking", params)
-												[pixiv] add 'related' option (#1237)

											
										
										
											2021-01-17 16:37:07 +01:00
+								    def illust_related(self, illust_id):
 								        params = {"illust_id": illust_id}
 								        return self._pagination("v2/illust/related", params)
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
+								    def search_illust(self, word, sort=None, target=None, duration=None):
 								        params = {"word": word, "search_target": target,
 								                  "sort": sort, "duration": duration}
 								        return self._pagination("v1/search/illust", params)
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								    def user_bookmarks_illust(self, user_id, tag=None, restrict="public"):
 								        params = {"user_id": user_id, "tag": tag, "restrict": restrict}
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								        return self._pagination("v1/user/bookmarks/illust", params)
 								    def user_detail(self, user_id):
 								        params = {"user_id": user_id}
 								        return self._call("v1/user/detail", params)["user"]
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
+								    def user_following(self, user_id, restrict="public"):
 								        params = {"user_id": user_id, "restrict": restrict}
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
+								        return self._pagination("v1/user/following", params, "user_previews")
-												[pixiv] respect more query parameters for user URLs

The API endpoint responsible for user illustrations does not
provide sufficient filter capabilities* to match the actual
website, so we are spinning our own filters.

Respected parameters are
    'type': illust, manga, ugoira
    'tag' : any image tag (this was already supported)
    'p'   : the page to start on

*
- API can filter for illustrations and manga, but not for ugoira.
- 'offset' is applied before filtering
- no 'tag' filter

											
										
										
											2018-05-18 15:30:06 +02:00
+								    def user_illusts(self, user_id):
 								        params = {"user_id": user_id}
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								        return self._pagination("v1/user/illusts", params)
 								    def ugoira_metadata(self, illust_id):
 								        params = {"illust_id": illust_id}
 								        return self._call("v1/ugoira/metadata", params)["ugoira_metadata"]
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								    def _call(self, endpoint, params=None):
 								        url = "https://app-api.pixiv.net/" + endpoint
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								        self.login()
-												replace extractor.request() 'expect' argument

with
- 'fatal': allow 4xx status codes
- 'notfound': raise NotFoundError on 404

											
										
										
											2019-07-04 23:45:26 +02:00
+								        response = self.extractor.request(url, params=params, fatal=False)
-												[pixiv] wait and retry after rate limit error (closes #535)

											
										
										
											2019-12-28 22:06:58 +01:00
+								        data = response.json()
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
-												[pixiv] wait and retry after rate limit error (closes #535)

											
										
										
											2019-12-28 22:06:58 +01:00
+								        if "error" in data:
 								            if response.status_code == 404:
 								                raise exception.NotFoundError()
 								            error = data["error"]
 								            if "rate limit" in (error.get("message") or "").lower():
 								                self.log.info("Waiting two minutes for API rate limit reset.")
 								                time.sleep(120)
 								                return self._call(endpoint, params)
 								            raise exception.StopExtraction("API request failed: %s", error)
 								        return data
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
+								    def _pagination(self, endpoint, params, key="illusts"):
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								        while True:
 								            data = self._call(endpoint, params)
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
+								            yield from data[key]
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
 								            if not data["next_url"]:
 								                return
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								            query = data["next_url"].rpartition("?")[2]
 								            params = text.parse_query(query)
-												[pixiv] use refresh_token based authentication

The first login will still use username and password, but everything
afterwards will use the refresh_token obtained from that.

This will prevent pixiv from sending a "New login to pixiv" email every
time a new access_token is requested.

											
										
										
											2018-10-12 22:26:27 +02:00
-												adjust cache maxage values

											
										
										
											2019-03-14 22:21:49 +01:00
+								@cache(maxage=10*365*24*3600, keyarg=0)
-												[pixiv] use refresh_token based authentication

The first login will still use username and password, but everything
afterwards will use the refresh_token obtained from that.

This will prevent pixiv from sending a "New login to pixiv" email every
time a new access_token is requested.

											
										
										
											2018-10-12 22:26:27 +02:00
+								def _refresh_token_cache(username):
 								    return None