gallery-dl/gallery_dl/extractor/pixiv.py

# -*- coding: utf-8 -*-

# Copyright 2014-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://www.pixiv.net/"""

from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache, memcache
from datetime import datetime, timedelta
import itertools
import hashlib

BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)"


class PixivExtractor(Extractor):
    """Base class for pixiv extractors"""
    category = "pixiv"
    root = "https://www.pixiv.net"
    directory_fmt = ("{category}", "{user[id]} {user[account]}")
    filename_fmt = "{id}_p{num}.{extension}"
    archive_fmt = "{id}{suffix}.{extension}"
    cookiedomain = None

    def __init__(self, match):
        Extractor.__init__(self, match)
        self.api = PixivAppAPI(self)
        self.load_ugoira = self.config("ugoira", True)
        self.max_posts = self.config("max-posts", 0)

    def items(self):
        tags = self.config("tags", "japanese")
        if tags == "original":
            transform_tags = None
        elif tags == "translated":
            def transform_tags(work):
                work["tags"] = list(dict.fromkeys(
                    tag["translated_name"] or tag["name"]
                    for tag in work["tags"]))
        else:
            def transform_tags(work):
                work["tags"] = [tag["name"] for tag in work["tags"]]

        ratings = {0: "General", 1: "R-18", 2: "R-18G"}
        meta_user = self.config("metadata")
        meta_bookmark = self.config("metadata-bookmark")
        metadata = self.metadata()

        works = self.works()
        if self.max_posts:
            works = itertools.islice(works, self.max_posts)
        for work in works:
            if not work["user"]["id"]:
                continue

            meta_single_page = work["meta_single_page"]
            meta_pages = work["meta_pages"]
            del work["meta_single_page"]
            del work["image_urls"]
            del work["meta_pages"]

            if meta_user:
                work.update(self.api.user_detail(work["user"]["id"]))
            if meta_bookmark and work["is_bookmarked"]:
                detail = self.api.illust_bookmark_detail(work["id"])
                work["tags_bookmark"] = [tag["name"] for tag in detail["tags"]
                                         if tag["is_registered"]]
            if transform_tags:
                transform_tags(work)
            work["num"] = 0
            work["date"] = text.parse_datetime(work["create_date"])
            work["rating"] = ratings.get(work["x_restrict"])
            work["suffix"] = ""
            work.update(metadata)

            yield Message.Directory, work

            if work["type"] == "ugoira":
                if not self.load_ugoira:
                    continue

                try:
                    ugoira = self.api.ugoira_metadata(work["id"])
                except exception.StopExtraction as exc:
                    self.log.warning(
                        "Unable to retrieve Ugoira metatdata (%s - %s)",
                        work.get("id"), exc.message)
                    continue

                url = ugoira["zip_urls"]["medium"].replace(
                    "_ugoira600x600", "_ugoira1920x1080")
                work["frames"] = ugoira["frames"]
                work["date_url"] = self._date_from_url(url)
                work["_http_adjust_extension"] = False
                yield Message.Url, url, text.nameext_from_url(url, work)

            elif work["page_count"] == 1:
                url = meta_single_page["original_image_url"]
                work["date_url"] = self._date_from_url(url)
                yield Message.Url, url, text.nameext_from_url(url, work)

            else:
                for work["num"], img in enumerate(meta_pages):
                    url = img["image_urls"]["original"]
                    work["date_url"] = self._date_from_url(url)
                    work["suffix"] = "_p{:02}".format(work["num"])
                    yield Message.Url, url, text.nameext_from_url(url, work)

    @staticmethod
    def _date_from_url(url, offset=timedelta(hours=9)):
        try:
            _, _, _, _, _, y, m, d, H, M, S, _ = url.split("/")
            return datetime(
                int(y), int(m), int(d), int(H), int(M), int(S)) - offset
        except Exception:
            return None

    @staticmethod
    def _make_work(kind, url, user):
        p = url.split("/")
        return {
            "create_date"     : "{}-{}-{}T{}:{}:{}+09:00".format(
                p[5], p[6], p[7], p[8], p[9], p[10]) if len(p) > 9 else None,
            "height"          : 0,
            "id"              : kind,
            "image_urls"      : None,
            "meta_pages"      : (),
            "meta_single_page": {"original_image_url": url},
            "page_count"      : 1,
            "sanity_level"    : 0,
            "tags"            : (),
            "title"           : kind,
            "type"            : kind,
            "user"            : user,
            "width"           : 0,
            "x_restrict"      : 0,
        }

    def works(self):
        """Return an iterable containing all relevant 'work' objects"""

    def metadata(self):
        """Collect metadata for extractor job"""
        return {}


class PixivUserExtractor(PixivExtractor):
    """Extractor for a pixiv user profile"""
    subcategory = "user"
    pattern = (BASE_PATTERN + r"/(?:"
               r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id="
               r")(\d+)(?:$|[?#])")
    test = (
        ("https://www.pixiv.net/en/users/173530"),
        ("https://www.pixiv.net/u/173530"),
        ("https://www.pixiv.net/member.php?id=173530"),
        ("https://www.pixiv.net/mypage.php#id=173530"),
        ("https://www.pixiv.net/#id=173530"),
    )

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        self.user_id = match.group(1)

    def items(self):
        base = "{}/users/{}/".format(self.root, self.user_id)
        return self._dispatch_extractors((
            (PixivAvatarExtractor       , base + "avatar"),
            (PixivBackgroundExtractor   , base + "background"),
            (PixivArtworksExtractor     , base + "artworks"),
            (PixivFavoriteExtractor     , base + "bookmarks/artworks"),
            (PixivNovelBookmarkExtractor, base + "bookmarks/novels"),
            (PixivNovelUserExtractor    , base + "novels"),
        ), ("artworks",))


class PixivArtworksExtractor(PixivExtractor):
    """Extractor for artworks of a pixiv user"""
    subcategory = "artworks"
    pattern = (BASE_PATTERN + r"/(?:"
               r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
               r"(?:/([^/?#]+))?/?(?:$|[?#])"
               r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
    test = (
        ("https://www.pixiv.net/en/users/173530/artworks", {
            "url": "852c31ad83b6840bacbce824d85f2a997889efb7",
        }),
        # illusts with specific tag
        (("https://www.pixiv.net/en/users/173530/artworks"
          "/%E6%89%8B%E3%81%B6%E3%82%8D"), {
            "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
        }),
        (("https://www.pixiv.net/member_illust.php?id=173530"
          "&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), {
            "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
        }),
        # deleted account
        ("http://www.pixiv.net/member_illust.php?id=173531", {
            "options": (("metadata", True),),
            "exception": exception.NotFoundError,
        }),
        ("https://www.pixiv.net/en/users/173530/manga"),
        ("https://www.pixiv.net/en/users/173530/illustrations"),
        ("https://www.pixiv.net/member_illust.php?id=173530"),
        ("https://touch.pixiv.net/member_illust.php?id=173530"),
    )

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        u1, t1, u2, t2 = match.groups()
        if t1:
            t1 = text.unquote(t1)
        elif t2:
            t2 = text.parse_query(t2).get("tag")
        self.user_id = u1 or u2
        self.tag = t1 or t2

    def metadata(self):
        if self.config("metadata"):
            self.api.user_detail(self.user_id)
        return {}

    def works(self):
        works = self.api.user_illusts(self.user_id)

        if self.tag:
            tag = self.tag.lower()
            works = (
                work for work in works
                if tag in [t["name"].lower() for t in work["tags"]]
            )

        return works


class PixivAvatarExtractor(PixivExtractor):
    """Extractor for pixiv avatars"""
    subcategory = "avatar"
    filename_fmt = "avatar{date:?_//%Y-%m-%d}.{extension}"
    archive_fmt = "avatar_{user[id]}_{date}"
    pattern = USER_PATTERN + r"/avatar"
    test = ("https://www.pixiv.net/en/users/173530/avatar", {
        "content": "4e57544480cc2036ea9608103e8f024fa737fe66",
    })

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        self.user_id = match.group(1)

    def works(self):
        user = self.api.user_detail(self.user_id)["user"]
        url = user["profile_image_urls"]["medium"].replace("_170.", ".")
        return (self._make_work("avatar", url, user),)


class PixivBackgroundExtractor(PixivExtractor):
    """Extractor for pixiv background banners"""
    subcategory = "background"
    filename_fmt = "background{date:?_//%Y-%m-%d}.{extension}"
    archive_fmt = "background_{user[id]}_{date}"
    pattern = USER_PATTERN + "/background"
    test = ("https://www.pixiv.net/en/users/194921/background", {
        "pattern": r"https://i\.pximg\.net/background/img/2021/01/30/16/12/02"
                   r"/194921_af1f71e557a42f499213d4b9eaccc0f8\.jpg",
    })

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        self.user_id = match.group(1)

    def works(self):
        detail = self.api.user_detail(self.user_id)
        url = detail["profile"]["background_image_url"]
        if not url:
            return ()
        if "/c/" in url:
            parts = url.split("/")
            del parts[3:5]
            url = "/".join(parts)
        url = url.replace("_master1200.", ".")
        work = self._make_work("background", url, detail["user"])
        if url.endswith(".jpg"):
            url = url[:-4]
            work["_fallback"] = (url + ".png", url + ".gif")
        return (work,)


class PixivMeExtractor(PixivExtractor):
    """Extractor for pixiv.me URLs"""
    subcategory = "me"
    pattern = r"(?:https?://)?pixiv\.me/([^/?#]+)"
    test = (
        ("https://pixiv.me/del_shannon", {
            "url": "29c295ce75150177e6b0a09089a949804c708fbf",
        }),
        ("https://pixiv.me/del_shanno", {
            "exception": exception.NotFoundError,
        }),
    )

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        self.account = match.group(1)

    def items(self):
        url = "https://pixiv.me/" + self.account
        data = {"_extractor": PixivUserExtractor}
        response = self.request(
            url, method="HEAD", allow_redirects=False, notfound="user")
        yield Message.Queue, response.headers["Location"], data


class PixivWorkExtractor(PixivExtractor):
    """Extractor for a single pixiv work/illustration"""
    subcategory = "work"
    pattern = (r"(?:https?://)?(?:(?:www\.|touch\.)?pixiv\.net"
               r"/(?:(?:en/)?artworks/"
               r"|member_illust\.php\?(?:[^&]+&)*illust_id=)(\d+)"
               r"|(?:i(?:\d+\.pixiv|\.pximg)\.net"
               r"/(?:(?:.*/)?img-[^/]+/img/\d{4}(?:/\d\d){5}|img\d+/img/[^/]+)"
               r"|img\d*\.pixiv\.net/img/[^/]+|(?:www\.)?pixiv\.net/i)/(\d+))")
    test = (
        ("https://www.pixiv.net/artworks/966412", {
            "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
            "content": "69a8edfb717400d1c2e146ab2b30d2c235440c5a",
            "keyword": {
                "date"    : "dt:2008-06-12 15:29:13",
                "date_url": "dt:2008-06-12 15:29:13",
            },
        }),
        (("http://www.pixiv.net/member_illust.php"
          "?mode=medium&illust_id=966411"), {
            "exception": exception.NotFoundError,
        }),
        # ugoira
        (("https://www.pixiv.net/member_illust.php"
          "?mode=medium&illust_id=66806629"), {
            "url": "7267695a985c4db8759bebcf8d21dbdd2d2317ef",
            "keyword": {
                "frames"  : list,
                "date"    : "dt:2018-01-14 15:06:08",
                "date_url": "dt:2018-01-15 04:24:48",
            },
        }),
        # related works (#1237)
        ("https://www.pixiv.net/artworks/966412", {
            "options": (("related", True),),
            "range": "1-10",
            "count": ">= 10",
        }),
        ("https://www.pixiv.net/en/artworks/966412"),
        ("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=96641"),
        ("http://i1.pixiv.net/c/600x600/img-master"
         "/img/2008/06/13/00/29/13/966412_p0_master1200.jpg"),
        ("https://i.pximg.net/img-original"
         "/img/2017/04/25/07/33/29/62568267_p0.png"),
        ("https://www.pixiv.net/i/966412"),
        ("http://img.pixiv.net/img/soundcross/42626136.jpg"),
        ("http://i2.pixiv.net/img76/img/snailrin/42672235.jpg"),
    )

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        self.illust_id = match.group(1) or match.group(2)

    def works(self):
        works = (self.api.illust_detail(self.illust_id),)
        if self.config("related", False):
            related = self.api.illust_related(self.illust_id)
            works = itertools.chain(works, related)
        return works


class PixivFavoriteExtractor(PixivExtractor):
    """Extractor for all favorites/bookmarks of a pixiv user"""
    subcategory = "favorite"
    directory_fmt = ("{category}", "bookmarks",
                     "{user_bookmark[id]} {user_bookmark[account]}")
    archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}"
    pattern = (BASE_PATTERN + r"/(?:(?:en/)?"
               r"users/(\d+)/(bookmarks/artworks|following)(?:/([^/?#]+))?"
               r"|bookmark\.php)(?:\?([^#]*))?")
    test = (
        ("https://www.pixiv.net/en/users/173530/bookmarks/artworks", {
            "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
        }),
        ("https://www.pixiv.net/bookmark.php?id=173530", {
            "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
        }),
        # bookmarks with specific tag
        (("https://www.pixiv.net/en/users/3137110"
          "/bookmarks/artworks/%E3%81%AF%E3%82%93%E3%82%82%E3%82%93"), {
            "url": "379b28275f786d946e01f721e54afe346c148a8c",
        }),
        # bookmarks with specific tag (legacy url)
        (("https://www.pixiv.net/bookmark.php?id=3137110"
          "&tag=%E3%81%AF%E3%82%93%E3%82%82%E3%82%93&p=1"), {
            "url": "379b28275f786d946e01f721e54afe346c148a8c",
        }),
        # own bookmarks
        ("https://www.pixiv.net/bookmark.php", {
            "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
            "keyword": {"tags_bookmark": ["47", "hitman"]},
            "options": (("metadata-bookmark", True),),
        }),
        # own bookmarks with tag (#596)
        ("https://www.pixiv.net/bookmark.php?tag=foobar", {
            "count": 0,
        }),
        # followed users (#515)
        ("https://www.pixiv.net/en/users/173530/following", {
            "pattern": PixivUserExtractor.pattern,
            "count": ">= 12",
        }),
        # followed users (legacy url) (#515)
        ("https://www.pixiv.net/bookmark.php?id=173530&type=user", {
            "pattern": PixivUserExtractor.pattern,
            "count": ">= 12",
        }),
        # touch URLs
        ("https://touch.pixiv.net/bookmark.php?id=173530"),
        ("https://touch.pixiv.net/bookmark.php"),
    )

    def __init__(self, match):
        uid, kind, self.tag, query = match.groups()
        query = text.parse_query(query)

        if not uid:
            uid = query.get("id")
            if not uid:
                self.subcategory = "bookmark"

        if kind == "following" or query.get("type") == "user":
            self.subcategory = "following"
            self.items = self._items_following

        PixivExtractor.__init__(self, match)
        self.query = query
        self.user_id = uid

    def works(self):
        tag = None
        if "tag" in self.query:
            tag = text.unquote(self.query["tag"])
        elif self.tag:
            tag = text.unquote(self.tag)

        restrict = "public"
        if self.query.get("rest") == "hide":
            restrict = "private"

        return self.api.user_bookmarks_illust(self.user_id, tag, restrict)

    def metadata(self):
        if self.user_id:
            user = self.api.user_detail(self.user_id)["user"]
        else:
            self.api.login()
            user = self.api.user

        self.user_id = user["id"]
        return {"user_bookmark": user}

    def _items_following(self):
        restrict = "public"
        if self.query.get("rest") == "hide":
            restrict = "private"

        for preview in self.api.user_following(self.user_id, restrict):
            user = preview["user"]
            user["_extractor"] = PixivUserExtractor
            url = "https://www.pixiv.net/users/{}".format(user["id"])
            yield Message.Queue, url, user


class PixivRankingExtractor(PixivExtractor):
    """Extractor for pixiv ranking pages"""
    subcategory = "ranking"
    archive_fmt = "r_{ranking[mode]}_{ranking[date]}_{id}{num}.{extension}"
    directory_fmt = ("{category}", "rankings",
                     "{ranking[mode]}", "{ranking[date]}")
    pattern = BASE_PATTERN + r"/ranking\.php(?:\?([^#]*))?"
    test = (
        ("https://www.pixiv.net/ranking.php?mode=daily&date=20170818"),
        ("https://www.pixiv.net/ranking.php"),
        ("https://touch.pixiv.net/ranking.php"),
        ("https://www.pixiv.net/ranking.php?mode=unknown", {
            "exception": exception.StopExtraction,
        }),
    )

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        self.query = match.group(1)
        self.mode = self.date = None

    def works(self):
        return self.api.illust_ranking(self.mode, self.date)

    def metadata(self):
        query = text.parse_query(self.query)

        mode = query.get("mode", "daily").lower()
        mode_map = {
            "daily": "day",
            "daily_r18": "day_r18",
            "daily_ai": "day_ai",
            "daily_r18_ai": "day_r18_ai",
            "weekly": "week",
            "weekly_r18": "week_r18",
            "monthly": "month",
            "male": "day_male",
            "male_r18": "day_male_r18",
            "female": "day_female",
            "female_r18": "day_female_r18",
            "original": "week_original",
            "rookie": "week_rookie",
            "r18g": "week_r18g",
        }
        try:
            self.mode = mode = mode_map[mode]
        except KeyError:
            raise exception.StopExtraction("Invalid mode '%s'", mode)

        date = query.get("date")
        if date:
            if len(date) == 8 and date.isdecimal():
                date = "{}-{}-{}".format(date[0:4], date[4:6], date[6:8])
            else:
                self.log.warning("invalid date '%s'", date)
                date = None
        if not date:
            date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")
        self.date = date

        return {"ranking": {
            "mode": mode,
            "date": self.date,
        }}


class PixivSearchExtractor(PixivExtractor):
    """Extractor for pixiv search results"""
    subcategory = "search"
    archive_fmt = "s_{search[word]}_{id}{num}.{extension}"
    directory_fmt = ("{category}", "search", "{search[word]}")
    pattern = (BASE_PATTERN + r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?"
               r"|search\.php)(?:\?([^#]+))?")
    test = (
        ("https://www.pixiv.net/en/tags/Original", {
            "range": "1-10",
            "count": 10,
        }),
        ("https://pixiv.net/en/tags/foo/artworks?order=week&s_mode=s_tag", {
            "exception": exception.StopExtraction,
        }),
        ("https://pixiv.net/en/tags/foo/artworks?order=date&s_mode=tag", {
            "exception": exception.StopExtraction,
        }),
        ("https://www.pixiv.net/search.php?s_mode=s_tag&name=Original", {
            "exception": exception.StopExtraction,
        }),
        ("https://www.pixiv.net/en/tags/foo/artworks?order=date&s_mode=s_tag"),
        ("https://www.pixiv.net/search.php?s_mode=s_tag&word=Original"),
        ("https://touch.pixiv.net/search.php?word=Original"),
    )

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        self.word, self.query = match.groups()
        self.sort = self.target = None

    def works(self):
        return self.api.search_illust(
            self.word, self.sort, self.target,
            date_start=self.date_start, date_end=self.date_end)

    def metadata(self):
        query = text.parse_query(self.query)

        if self.word:
            self.word = text.unquote(self.word)
        else:
            try:
                self.word = query["word"]
            except KeyError:
                raise exception.StopExtraction("Missing search term")

        sort = query.get("order", "date_d")
        sort_map = {
            "date": "date_asc",
            "date_d": "date_desc",
            "popular_d": "popular_desc",
            "popular_male_d": "popular_male_desc",
            "popular_female_d": "popular_female_desc",
        }
        try:
            self.sort = sort = sort_map[sort]
        except KeyError:
            raise exception.StopExtraction("Invalid search order '%s'", sort)

        target = query.get("s_mode", "s_tag_full")
        target_map = {
            "s_tag": "partial_match_for_tags",
            "s_tag_full": "exact_match_for_tags",
            "s_tc": "title_and_caption",
        }
        try:
            self.target = target = target_map[target]
        except KeyError:
            raise exception.StopExtraction("Invalid search mode '%s'", target)

        self.date_start = query.get("scd")
        self.date_end = query.get("ecd")

        return {"search": {
            "word": self.word,
            "sort": self.sort,
            "target": self.target,
            "date_start": self.date_start,
            "date_end": self.date_end,
        }}


class PixivFollowExtractor(PixivExtractor):
    """Extractor for new illustrations from your followed artists"""
    subcategory = "follow"
    archive_fmt = "F_{user_follow[id]}_{id}{num}.{extension}"
    directory_fmt = ("{category}", "following")
    pattern = BASE_PATTERN + r"/bookmark_new_illust\.php"
    test = (
        ("https://www.pixiv.net/bookmark_new_illust.php"),
        ("https://touch.pixiv.net/bookmark_new_illust.php"),
    )

    def works(self):
        return self.api.illust_follow()

    def metadata(self):
        self.api.login()
        return {"user_follow": self.api.user}


class PixivPixivisionExtractor(PixivExtractor):
    """Extractor for illustrations from a pixivision article"""
    subcategory = "pixivision"
    directory_fmt = ("{category}", "pixivision",
                     "{pixivision_id} {pixivision_title}")
    archive_fmt = "V{pixivision_id}_{id}{suffix}.{extension}"
    pattern = r"(?:https?://)?(?:www\.)?pixivision\.net/(?:en/)?a/(\d+)"
    test = (
        ("https://www.pixivision.net/en/a/2791"),
        ("https://pixivision.net/a/2791", {
            "count": 7,
            "keyword": {
                "pixivision_id": "2791",
                "pixivision_title": "What's your favorite music? Editor’s "
                                    "picks featuring: “CD Covers”!",
            },
        }),
    )

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        self.pixivision_id = match.group(1)

    def works(self):
        return (
            self.api.illust_detail(illust_id.partition("?")[0])
            for illust_id in util.unique_sequence(text.extract_iter(
                self.page, '<a href="https://www.pixiv.net/en/artworks/', '"'))
        )

    def metadata(self):
        url = "https://www.pixivision.net/en/a/" + self.pixivision_id
        headers = {"User-Agent": "Mozilla/5.0"}
        self.page = self.request(url, headers=headers).text

        title = text.extr(self.page, '<title>', '<')
        return {
            "pixivision_id"   : self.pixivision_id,
            "pixivision_title": text.unescape(title),
        }


class PixivSeriesExtractor(PixivExtractor):
    """Extractor for illustrations from a Pixiv series"""
    subcategory = "series"
    directory_fmt = ("{category}", "{user[id]} {user[account]}",
                     "{series[id]} {series[title]}")
    filename_fmt = "{num_series:>03}_{id}_p{num}.{extension}"
    pattern = BASE_PATTERN + r"/user/(\d+)/series/(\d+)"
    test = ("https://www.pixiv.net/user/10509347/series/21859", {
        "range": "1-10",
        "count": 10,
        "keyword": {
            "num_series": int,
            "series": {
                "canonical": "https://www.pixiv.net/user/10509347"
                             "/series/21859",
                "description": str,
                "ogp": dict,
                "title": "先輩がうざい後輩の話",
                "total": int,
                "twitter": dict,
            },
        },
    })

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        self.user_id, self.series_id = match.groups()

    def works(self):
        url = self.root + "/ajax/series/" + self.series_id
        params = {"p": 1}
        headers = {
            "Accept": "application/json",
            "Referer": "{}/user/{}/series/{}".format(
                self.root, self.user_id, self.series_id),
            "Alt-Used": "www.pixiv.net",
        }

        while True:
            data = self.request(url, params=params, headers=headers).json()
            body = data["body"]
            page = body["page"]

            series = body["extraData"]["meta"]
            series["id"] = self.series_id
            series["total"] = page["total"]
            series["title"] = text.extr(series["title"], '"', '"')

            for info in page["series"]:
                work = self.api.illust_detail(info["workId"])
                work["num_series"] = info["order"]
                work["series"] = series
                yield work

            if len(page["series"]) < 10:
                return
            params["p"] += 1


class PixivNovelExtractor(PixivExtractor):
    """Extractor for pixiv novels"""
    subcategory = "novel"
    request_interval = 1.0
    pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)"
    test = (
        ("https://www.pixiv.net/novel/show.php?id=19612040", {
            "count": 1,
            "content": "8c818474153cbd2f221ee08766e1d634c821d8b4",
            "keyword": {
                "caption": r"re:「無能な名無し」と呼ばれ虐げられて育った鈴\(すず\)は、",
                "comment_access_control": 0,
                "create_date": "2023-04-02T15:18:58+09:00",
                "date": "dt:2023-04-02 06:18:58",
                "id": 19612040,
                "is_bookmarked": False,
                "is_muted": False,
                "is_mypixiv_only": False,
                "is_original": True,
                "is_x_restricted": False,
                "novel_ai_type": 1,
                "page_count": 1,
                "rating": "General",
                "restrict": 0,
                "series": {
                    "id": 10278364,
                    "title": "龍の贄嫁〜無能な名無しと虐げられていましたが、"
                             "どうやら異母妹に霊力を搾取されていたようです〜",
                },
                "tags": ["和風ファンタジー", "溺愛", "神様", "ヤンデレ", "執着",
                         "異能", "ざまぁ", "学園", "神嫁"],
                "text_length": 5974,
                "title": "異母妹から「無能な名無し」と虐げられていた私、"
                         "どうやら異母妹に霊力を搾取されていたようです（１）",
                "user": {
                    "account": "yukinaga_chifuyu",
                    "id": 77055466,
                },
                "visible": True,
                "x_restrict": 0,
            },
        }),
        # embeds
        ("https://www.pixiv.net/novel/show.php?id=16422450", {
            "options": (("embeds", True),),
            "count": 3,
        }),
        # short URL
        ("https://www.pixiv.net/n/19612040"),
    )

    def __init__(self, match):
        PixivExtractor.__init__(self, match)
        self.novel_id = match.group(1)

    def items(self):
        tags = self.config("tags", "japanese")
        if tags == "original":
            transform_tags = None
        elif tags == "translated":
            def transform_tags(work):
                work["tags"] = list(dict.fromkeys(
                    tag["translated_name"] or tag["name"]
                    for tag in work["tags"]))
        else:
            def transform_tags(work):
                work["tags"] = [tag["name"] for tag in work["tags"]]

        ratings = {0: "General", 1: "R-18", 2: "R-18G"}
        meta_user = self.config("metadata")
        meta_bookmark = self.config("metadata-bookmark")
        embeds = self.config("embeds")

        if embeds:
            headers = {
                "User-Agent"    : "Mozilla/5.0",
                "App-OS"        : None,
                "App-OS-Version": None,
                "App-Version"   : None,
                "Referer"       : self.root + "/",
                "Authorization" : None,
            }

        novels = self.novels()
        if self.max_posts:
            novels = itertools.islice(novels, self.max_posts)
        for novel in novels:
            if meta_user:
                novel.update(self.api.user_detail(novel["user"]["id"]))
            if meta_bookmark and novel["is_bookmarked"]:
                detail = self.api.novel_bookmark_detail(novel["id"])
                novel["tags_bookmark"] = [tag["name"] for tag in detail["tags"]
                                          if tag["is_registered"]]
            if transform_tags:
                transform_tags(novel)
            novel["num"] = 0
            novel["date"] = text.parse_datetime(novel["create_date"])
            novel["rating"] = ratings.get(novel["x_restrict"])
            novel["suffix"] = ""

            yield Message.Directory, novel

            novel["extension"] = "txt"
            content = self.api.novel_text(novel["id"])["novel_text"]
            yield Message.Url, "text:" + content, novel

            if embeds:
                desktop = False
                illusts = {}

                for marker in text.extract_iter(content, "[", "]"):
                    if marker.startswith("[jumpuri:"):
                        desktop = True
                    elif marker.startswith("pixivimage:"):
                        illusts[marker[11:].partition("-")[0]] = None

                if desktop:
                    novel_id = str(novel["id"])
                    url = "{}/novel/show.php?id={}".format(
                        self.root, novel_id)
                    data = util.json_loads(text.extr(
                        self.request(url, headers=headers).text,
                        "id=\"meta-preload-data\" content='", "'"))

                    for image in (data["novel"][novel_id]
                                  ["textEmbeddedImages"]).values():
                        url = image.pop("urls")["original"]
                        novel.update(image)
                        novel["date_url"] = self._date_from_url(url)
                        novel["num"] += 1
                        novel["suffix"] = "_p{:02}".format(novel["num"])
                        text.nameext_from_url(url, novel)
                        yield Message.Url, url, novel

                if illusts:
                    novel["_extractor"] = PixivWorkExtractor
                    novel["date_url"] = None
                    for illust_id in illusts:
                        novel["num"] += 1
                        novel["suffix"] = "_p{:02}".format(novel["num"])
                        url = "{}/artworks/{}".format(self.root, illust_id)
                        yield Message.Queue, url, novel

    def novels(self):
        return (self.api.novel_detail(self.novel_id),)


class PixivNovelUserExtractor(PixivNovelExtractor):
    """Extractor for pixiv users' novels"""
    subcategory = "novel-user"
    pattern = USER_PATTERN + r"/novels"
    test = ("https://www.pixiv.net/en/users/77055466/novels", {
        "pattern": "^text:",
        "range": "1-5",
        "count": 5,
    })

    def novels(self):
        return self.api.user_novels(self.novel_id)


class PixivNovelSeriesExtractor(PixivNovelExtractor):
    """Extractor for pixiv novel series"""
    subcategory = "novel-series"
    pattern = BASE_PATTERN + r"/novel/series/(\d+)"
    test = ("https://www.pixiv.net/novel/series/10278364", {
        "count": 4,
        "content": "b06abed001b3f6ccfb1579699e9a238b46d38ea2",
    })

    def novels(self):
        return self.api.novel_series(self.novel_id)


class PixivNovelBookmarkExtractor(PixivNovelExtractor):
    """Extractor for bookmarked pixiv novels"""
    subcategory = "novel-bookmark"
    pattern = (USER_PATTERN + r"/bookmarks/novels"
               r"(?:/([^/?#]+))?(?:/?\?([^#]+))?")
    test = (
        ("https://www.pixiv.net/en/users/77055466/bookmarks/novels", {
            "count": 1,
            "content": "7194e8faa876b2b536f185ee271a2b6e46c69089",
        }),
        ("https://www.pixiv.net/en/users/11/bookmarks/novels/TAG?rest=hide"),
    )

    def __init__(self, match):
        PixivNovelExtractor.__init__(self, match)
        self.user_id, self.tag, self.query = match.groups()

    def novels(self):
        if self.tag:
            tag = text.unquote(self.tag)
        else:
            tag = None

        if text.parse_query(self.query).get("rest") == "hide":
            restrict = "private"
        else:
            restrict = "public"

        return self.api.user_bookmarks_novel(self.user_id, tag, restrict)


class PixivSketchExtractor(Extractor):
    """Extractor for user pages on sketch.pixiv.net"""
    category = "pixiv"
    subcategory = "sketch"
    directory_fmt = ("{category}", "sketch", "{user[unique_name]}")
    filename_fmt = "{post_id} {id}.{extension}"
    archive_fmt = "S{user[id]}_{id}"
    root = "https://sketch.pixiv.net"
    cookiedomain = ".pixiv.net"
    pattern = r"(?:https?://)?sketch\.pixiv\.net/@([^/?#]+)"
    test = ("https://sketch.pixiv.net/@nicoby", {
        "pattern": r"https://img\-sketch\.pixiv\.net/uploads/medium"
                   r"/file/\d+/\d+\.(jpg|png)",
        "count": ">= 35",
    })

    def __init__(self, match):
        Extractor.__init__(self, match)
        self.username = match.group(1)

    def items(self):
        headers = {"Referer": "{}/@{}".format(self.root, self.username)}

        for post in self.posts():
            media = post["media"]
            post["post_id"] = post["id"]
            post["date"] = text.parse_datetime(
                post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
            util.delete_items(post, ("id", "media", "_links"))

            yield Message.Directory, post
            post["_http_headers"] = headers

            for photo in media:
                original = photo["photo"]["original"]
                post["id"] = photo["id"]
                post["width"] = original["width"]
                post["height"] = original["height"]

                url = original["url"]
                text.nameext_from_url(url, post)
                yield Message.Url, url, post

    def posts(self):
        url = "{}/api/walls/@{}/posts/public.json".format(
            self.root, self.username)
        headers = {
            "Accept": "application/vnd.sketch-v4+json",
            "X-Requested-With": "{}/@{}".format(self.root, self.username),
            "Referer": self.root + "/",
        }

        while True:
            data = self.request(url, headers=headers).json()
            yield from data["data"]["items"]

            next_url = data["_links"].get("next")
            if not next_url:
                return
            url = self.root + next_url["href"]


class PixivAppAPI():
    """Minimal interface for the Pixiv App API for mobile devices

    For a more complete implementation or documentation, see
    - https://github.com/upbit/pixivpy
    - https://gist.github.com/ZipFile/3ba99b47162c23f8aea5d5942bb557b1
    """
    CLIENT_ID = "MOBrBDS8blbauoSck0ZfDbtuzpyT"
    CLIENT_SECRET = "lsACyCD94FhDUtGTXi3QzcFE2uU1hqtDaKeqrdwj"
    HASH_SECRET = ("28c1fdd170a5204386cb1313c7077b34"
                   "f83e4aaf4aa829ce78c231e05b0bae2c")

    def __init__(self, extractor):
        self.extractor = extractor
        self.log = extractor.log
        self.username = extractor._get_auth_info()[0]
        self.user = None

        extractor.session.headers.update({
            "App-OS"        : "ios",
            "App-OS-Version": "13.1.2",
            "App-Version"   : "7.7.6",
            "User-Agent"    : "PixivIOSApp/7.7.6 (iOS 13.1.2; iPhone11,8)",
            "Referer"       : "https://app-api.pixiv.net/",
        })

        self.client_id = extractor.config(
            "client-id", self.CLIENT_ID)
        self.client_secret = extractor.config(
            "client-secret", self.CLIENT_SECRET)

        token = extractor.config("refresh-token")
        if token is None or token == "cache":
            token = _refresh_token_cache(self.username)
        self.refresh_token = token

    def login(self):
        """Login and gain an access token"""
        self.user, auth = self._login_impl(self.username)
        self.extractor.session.headers["Authorization"] = auth

    @cache(maxage=3600, keyarg=1)
    def _login_impl(self, username):
        if not self.refresh_token:
            raise exception.AuthenticationError(
                "'refresh-token' required.\n"
                "Run `gallery-dl oauth:pixiv` to get one.")

        self.log.info("Refreshing access token")
        url = "https://oauth.secure.pixiv.net/auth/token"
        data = {
            "client_id"     : self.client_id,
            "client_secret" : self.client_secret,
            "grant_type"    : "refresh_token",
            "refresh_token" : self.refresh_token,
            "get_secure_url": "1",
        }

        time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00")
        headers = {
            "X-Client-Time": time,
            "X-Client-Hash": hashlib.md5(
                (time + self.HASH_SECRET).encode()).hexdigest(),
        }

        response = self.extractor.request(
            url, method="POST", headers=headers, data=data, fatal=False)
        if response.status_code >= 400:
            self.log.debug(response.text)
            raise exception.AuthenticationError("Invalid refresh token")

        data = response.json()["response"]
        return data["user"], "Bearer " + data["access_token"]

    def illust_detail(self, illust_id):
        params = {"illust_id": illust_id}
        return self._call("/v1/illust/detail", params)["illust"]

    def illust_bookmark_detail(self, illust_id):
        params = {"illust_id": illust_id}
        return self._call(
            "/v2/illust/bookmark/detail", params)["bookmark_detail"]

    def illust_follow(self, restrict="all"):
        params = {"restrict": restrict}
        return self._pagination("/v2/illust/follow", params)

    def illust_ranking(self, mode="day", date=None):
        params = {"mode": mode, "date": date}
        return self._pagination("/v1/illust/ranking", params)

    def illust_related(self, illust_id):
        params = {"illust_id": illust_id}
        return self._pagination("/v2/illust/related", params)

    def novel_bookmark_detail(self, novel_id):
        params = {"novel_id": novel_id}
        return self._call(
            "/v2/novel/bookmark/detail", params)["bookmark_detail"]

    def novel_detail(self, novel_id):
        params = {"novel_id": novel_id}
        return self._call("/v2/novel/detail", params)["novel"]

    def novel_series(self, series_id):
        params = {"series_id": series_id}
        return self._pagination("/v1/novel/series", params, "novels")

    def novel_text(self, novel_id):
        params = {"novel_id": novel_id}
        return self._call("/v1/novel/text", params)

    def search_illust(self, word, sort=None, target=None, duration=None,
                      date_start=None, date_end=None):
        params = {"word": word, "search_target": target,
                  "sort": sort, "duration": duration,
                  "start_date": date_start, "end_date": date_end}
        return self._pagination("/v1/search/illust", params)

    def user_bookmarks_illust(self, user_id, tag=None, restrict="public"):
        """Return illusts bookmarked by a user"""
        params = {"user_id": user_id, "tag": tag, "restrict": restrict}
        return self._pagination("/v1/user/bookmarks/illust", params)

    def user_bookmarks_novel(self, user_id, tag=None, restrict="public"):
        """Return novels bookmarked by a user"""
        params = {"user_id": user_id, "tag": tag, "restrict": restrict}
        return self._pagination("/v1/user/bookmarks/novel", params, "novels")

    def user_bookmark_tags_illust(self, user_id, restrict="public"):
        """Return bookmark tags defined by a user"""
        params = {"user_id": user_id, "restrict": restrict}
        return self._pagination(
            "/v1/user/bookmark-tags/illust", params, "bookmark_tags")

    @memcache(keyarg=1)
    def user_detail(self, user_id):
        params = {"user_id": user_id}
        return self._call("/v1/user/detail", params)

    def user_following(self, user_id, restrict="public"):
        params = {"user_id": user_id, "restrict": restrict}
        return self._pagination("/v1/user/following", params, "user_previews")

    def user_illusts(self, user_id):
        params = {"user_id": user_id}
        return self._pagination("/v1/user/illusts", params)

    def user_novels(self, user_id):
        params = {"user_id": user_id}
        return self._pagination("/v1/user/novels", params, "novels")

    def ugoira_metadata(self, illust_id):
        params = {"illust_id": illust_id}
        return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"]

    def _call(self, endpoint, params=None):
        url = "https://app-api.pixiv.net" + endpoint

        while True:
            self.login()
            response = self.extractor.request(url, params=params, fatal=False)
            data = response.json()

            if "error" not in data:
                return data

            self.log.debug(data)

            if response.status_code == 404:
                raise exception.NotFoundError()

            error = data["error"]
            if "rate limit" in (error.get("message") or "").lower():
                self.extractor.wait(seconds=300)
                continue

            raise exception.StopExtraction("API request failed: %s", error)

    def _pagination(self, endpoint, params, key="illusts"):
        while True:
            data = self._call(endpoint, params)
            yield from data[key]

            if not data["next_url"]:
                return
            query = data["next_url"].rpartition("?")[2]
            params = text.parse_query(query)


@cache(maxage=10*365*24*3600, keyarg=0)
def _refresh_token_cache(username):
    return None
-												[pixiv] update to new extractor interface

											
										
										
											2015-04-10 15:29:09 +02:00
+								# -*- coding: utf-8 -*-
-												[pixiv] implement 'metadata-bookmark' option (#3417)

											
										
										
											2023-01-07 23:12:36 +01:00
+								# Copyright 2014-2023 Mike Fährmann
-												[pixiv] update to new extractor interface

											
										
										
											2015-04-10 15:29:09 +02:00
+								#
 								# This program is free software; you can redistribute it and/or modify
 								# it under the terms of the GNU General Public License version 2 as
 								# published by the Free Software Foundation.
-												[pixiv] add extractor for 'pixivision' articles (#1672)

											
										
										
											2021-07-07 02:22:44 +02:00
+								"""Extractors for https://www.pixiv.net/"""
-												[pixiv] update to new extractor interface

											
										
										
											2015-04-10 15:29:09 +02:00
-												remove SequentialExtractor class

											
										
										
											2015-10-05 17:15:31 +02:00
+								from .common import Extractor, Message
-												[pixiv] add extractor for 'pixivision' articles (#1672)

											
										
										
											2021-07-07 02:22:44 +02:00
+								from .. import text, util, exception
-												[pixiv] update API interface

- start all endpoints with '/'
- use extractor.wait() for rate limit
- retry with while loop instead of recursion
- in case of error, write entire response to debug log

											
										
										
											2022-04-29 16:29:12 +02:00
+								from ..cache import cache, memcache
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								from datetime import datetime, timedelta
-												[pixiv] implement 'avatar' option (#595, #623)

											
										
										
											2020-03-09 21:17:16 +01:00
+								import itertools
-												[pixiv] fix authentication

											
										
										
											2019-09-02 22:34:04 +02:00
+								import hashlib
-												initial commit

											
										
										
											2014-10-12 21:56:44 +02:00
-												[pixiv] use BASE_PATTERN

											
										
										
											2023-05-28 18:06:47 +02:00
+								BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
 								USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)"
-												code adjustments according to pep8 nr2

											
										
										
											2017-02-01 00:53:19 +01:00
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								class PixivExtractor(Extractor):
 								    """Base class for pixiv extractors"""
-												update all other extractors

											
										
										
											2015-11-21 04:26:30 +01:00
+								    category = "pixiv"
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								    root = "https://www.pixiv.net"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    directory_fmt = ("{category}", "{user[id]} {user[account]}")
-												[pixiv] simplify default filename format

(#366)

											
										
										
											2019-08-15 13:32:47 +02:00
+								    filename_fmt = "{id}_p{num}.{extension}"
-												[pixiv] update metadata entries (#366)

- change 'num' to a simple enumerating integer
- change default filename format
- provide content of the old 'num' field as 'suffix'
- add 'filename' for ugoira

											
										
										
											2019-08-04 22:35:56 +02:00
+								    archive_fmt = "{id}{suffix}.{extension}"
-												let extractors opt-out of cookie option usage

useful to avoid sending unnecessary cookies when all authentication
is done through OAuth tokens

											
										
										
											2020-01-01 16:07:23 +01:00
+								    cookiedomain = None
-												initial commit

											
										
										
											2014-10-12 21:56:44 +02:00
-												propagate 'match' to base extractor constructor

											
										
										
											2019-02-11 13:31:10 +01:00
+								    def __init__(self, match):
 								        Extractor.__init__(self, match)
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								        self.api = PixivAppAPI(self)
-												implement and use extractor.config() method

											
										
										
											2017-04-25 17:12:48 +02:00
+								        self.load_ugoira = self.config("ugoira", True)
-												[pixiv] implement 'max-posts' option (#1558)

* implement max-rank for pixiv

* rename to max-posts and make more generic
											
										
										
											2021-05-24 17:49:46 +02:00
+								        self.max_posts = self.config("max-posts", 0)
-												initial commit

											
										
										
											2014-10-12 21:56:44 +02:00
-												[pixiv] update to new extractor interface

											
										
										
											2015-04-10 15:29:09 +02:00
+								    def items(self):
-												[pixiv] change 'translated-tags' option (#1507)

- rename to 'tags'
- use string-values: "japanese", "translated", "noop"
- remove duplicate entries for "translated" tags

											
										
										
											2021-04-27 23:51:37 +02:00
+								        tags = self.config("tags", "japanese")
-												[pixiv] rename "noop" value for 'tags' option to "original"

(#1507)

											
										
										
											2021-05-07 20:41:54 +02:00
+								        if tags == "original":
-												[pixiv] change 'translated-tags' option (#1507)

- rename to 'tags'
- use string-values: "japanese", "translated", "noop"
- remove duplicate entries for "translated" tags

											
										
										
											2021-04-27 23:51:37 +02:00
+								            transform_tags = None
 								        elif tags == "translated":
 								            def transform_tags(work):
-												[pixiv] preserve 'tags' order (#3266)

for '"tags": "translated"'

As it turns out, set() does *not* preserve insertion order.

											
										
										
											2022-11-22 19:11:37 +01:00
+								                work["tags"] = list(dict.fromkeys(
-												[pixiv] change 'translated-tags' option (#1507)

- rename to 'tags'
- use string-values: "japanese", "translated", "noop"
- remove duplicate entries for "translated" tags

											
										
										
											2021-04-27 23:51:37 +02:00
+								                    tag["translated_name"] or tag["name"]
 								                    for tag in work["tags"]))
 								        else:
 								            def transform_tags(work):
 								                work["tags"] = [tag["name"] for tag in work["tags"]]
-												[pixiv] add 'rating' metadata field (#595)

A human-friendlier representation of 'x_restrict'

											
										
										
											2020-02-01 01:36:06 +01:00
+								        ratings = {0: "General", 1: "R-18", 2: "R-18G"}
-												[pixiv] implement 'metadata-bookmark' option (#3417)

											
										
										
											2023-01-07 23:12:36 +01:00
+								        meta_user = self.config("metadata")
 								        meta_bookmark = self.config("metadata-bookmark")
-												[pixiv] reduce calls to '/user/detail'

											
										
										
											2020-02-09 13:54:58 +01:00
+								        metadata = self.metadata()
-												[pixiv] transition to pixiv public api

											
										
										
											2015-05-14 19:08:20 +02:00
-												[pixiv] implement 'max-posts' option (#1558)

* implement max-rank for pixiv

* rename to max-posts and make more generic
											
										
										
											2021-05-24 17:49:46 +02:00
+								        works = self.works()
 								        if self.max_posts:
 								            works = itertools.islice(works, self.max_posts)
 								        for work in works:
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								            if not work["user"]["id"]:
 								                continue
 								            meta_single_page = work["meta_single_page"]
 								            meta_pages = work["meta_pages"]
 								            del work["meta_single_page"]
 								            del work["image_urls"]
 								            del work["meta_pages"]
-												[pixiv] change 'translated-tags' option (#1507)

- rename to 'tags'
- use string-values: "japanese", "translated", "noop"
- remove duplicate entries for "translated" tags

											
										
										
											2021-04-27 23:51:37 +02:00
-												[pixiv] implement 'metadata-bookmark' option (#3417)

											
										
										
											2023-01-07 23:12:36 +01:00
+								            if meta_user:
-												[pixiv] extend 'metadata' option (#3057)

make it usable for all 'pixiv' extractors

											
										
										
											2022-10-16 15:32:31 +02:00
+								                work.update(self.api.user_detail(work["user"]["id"]))
-												[pixiv] implement 'metadata-bookmark' option (#3417)

											
										
										
											2023-01-07 23:12:36 +01:00
+								            if meta_bookmark and work["is_bookmarked"]:
 								                detail = self.api.illust_bookmark_detail(work["id"])
 								                work["tags_bookmark"] = [tag["name"] for tag in detail["tags"]
 								                                         if tag["is_registered"]]
-												[pixiv] change 'translated-tags' option (#1507)

- rename to 'tags'
- use string-values: "japanese", "translated", "noop"
- remove duplicate entries for "translated" tags

											
										
										
											2021-04-27 23:51:37 +02:00
+								            if transform_tags:
 								                transform_tags(work)
-												[pixiv] update metadata entries (#366)

- change 'num' to a simple enumerating integer
- change default filename format
- provide content of the old 'num' field as 'suffix'
- add 'filename' for ugoira

											
										
										
											2019-08-04 22:35:56 +02:00
+								            work["num"] = 0
-												apply workaround from 4736912 in parse_datetime() itself

											
										
										
											2019-05-09 21:53:17 +02:00
+								            work["date"] = text.parse_datetime(work["create_date"])
-												[pixiv] add 'rating' metadata field (#595)

A human-friendlier representation of 'x_restrict'

											
										
										
											2020-02-01 01:36:06 +01:00
+								            work["rating"] = ratings.get(work["x_restrict"])
-												[pixiv] update metadata entries (#366)

- change 'num' to a simple enumerating integer
- change default filename format
- provide content of the old 'num' field as 'suffix'
- add 'filename' for ugoira

											
										
										
											2019-08-04 22:35:56 +02:00
+								            work["suffix"] = ""
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								            work.update(metadata)
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
-												[pixiv] create directory for each "work" item (#136)

											
										
										
											2018-12-11 20:37:47 +01:00
+								            yield Message.Directory, work
-												[pixiv] transition to pixiv public api

											
										
										
											2015-05-14 19:08:20 +02:00
+								            if work["type"] == "ugoira":
-												[pixiv] add option to skip ugoiras

											
										
										
											2016-07-20 14:19:46 +02:00
+								                if not self.load_ugoira:
 								                    continue
-												[pixiv] make retrieving ugoira metadata non-fatal (#2562)

											
										
										
											2022-05-08 20:05:38 +02:00
 								                try:
 								                    ugoira = self.api.ugoira_metadata(work["id"])
 								                except exception.StopExtraction as exc:
 								                    self.log.warning(
 								                        "Unable to retrieve Ugoira metatdata (%s - %s)",
 								                        work.get("id"), exc.message)
 								                    continue
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
 								                url = ugoira["zip_urls"]["medium"].replace(
 								                    "_ugoira600x600", "_ugoira1920x1080")
-												[pixiv] rework ugoira handling

Frame information now gets attached to the ZIP file's keyword dict
instead of being written to a separate text file.

											
										
										
											2018-06-18 17:25:52 +02:00
+								                work["frames"] = ugoira["frames"]
-												[pixiv] extract 'date_url' metadata (#3405)

i.e. the datetime encoded in each file URL.

https://i.pximg.net/img-master/img/2022/12/01/13/44/55/12345678_p0.jpg
->
2022-12-01 13:44:55 +09:00
->
2022-12-01 04:44:55

											
										
										
											2022-12-15 11:40:20 +01:00
+								                work["date_url"] = self._date_from_url(url)
-												[downloader:http] disable filename extension changes for ugoira

(#1507)

											
										
										
											2021-04-27 00:48:53 +02:00
+								                work["_http_adjust_extension"] = False
-												[pixiv] update metadata entries (#366)

- change 'num' to a simple enumerating integer
- change default filename format
- provide content of the old 'num' field as 'suffix'
- add 'filename' for ugoira

											
										
										
											2019-08-04 22:35:56 +02:00
+								                yield Message.Url, url, text.nameext_from_url(url, work)
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
-												[pixiv] transition to pixiv public api

											
										
										
											2015-05-14 19:08:20 +02:00
+								            elif work["page_count"] == 1:
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								                url = meta_single_page["original_image_url"]
-												[pixiv] extract 'date_url' metadata (#3405)

i.e. the datetime encoded in each file URL.

https://i.pximg.net/img-master/img/2022/12/01/13/44/55/12345678_p0.jpg
->
2022-12-01 13:44:55 +09:00
->
2022-12-01 04:44:55

											
										
										
											2022-12-15 11:40:20 +01:00
+								                work["date_url"] = self._date_from_url(url)
-												[pixiv] provide 'filename' and change default filename format

to '{filename}.{extension}' (closes #366)

											
										
										
											2019-08-02 22:35:10 +02:00
+								                yield Message.Url, url, text.nameext_from_url(url, work)
-												initial commit

											
										
										
											2014-10-12 21:56:44 +02:00
 								            else:
-												[pixiv] update metadata entries (#366)

- change 'num' to a simple enumerating integer
- change default filename format
- provide content of the old 'num' field as 'suffix'
- add 'filename' for ugoira

											
										
										
											2019-08-04 22:35:56 +02:00
+								                for work["num"], img in enumerate(meta_pages):
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								                    url = img["image_urls"]["original"]
-												[pixiv] extract 'date_url' metadata (#3405)

i.e. the datetime encoded in each file URL.

https://i.pximg.net/img-master/img/2022/12/01/13/44/55/12345678_p0.jpg
->
2022-12-01 13:44:55 +09:00
->
2022-12-01 04:44:55

											
										
										
											2022-12-15 11:40:20 +01:00
+								                    work["date_url"] = self._date_from_url(url)
-												[pixiv] update metadata entries (#366)

- change 'num' to a simple enumerating integer
- change default filename format
- provide content of the old 'num' field as 'suffix'
- add 'filename' for ugoira

											
										
										
											2019-08-04 22:35:56 +02:00
+								                    work["suffix"] = "_p{:02}".format(work["num"])
-												[pixiv] provide 'filename' and change default filename format

to '{filename}.{extension}' (closes #366)

											
										
										
											2019-08-02 22:35:10 +02:00
+								                    yield Message.Url, url, text.nameext_from_url(url, work)
-												[pixiv] transition to pixiv public api

											
										
										
											2015-05-14 19:08:20 +02:00
-												[pixiv] extract 'date_url' metadata (#3405)

i.e. the datetime encoded in each file URL.

https://i.pximg.net/img-master/img/2022/12/01/13/44/55/12345678_p0.jpg
->
2022-12-01 13:44:55 +09:00
->
2022-12-01 04:44:55

											
										
										
											2022-12-15 11:40:20 +01:00
+								    @staticmethod
 								    def _date_from_url(url, offset=timedelta(hours=9)):
 								        try:
 								            _, _, _, _, _, y, m, d, H, M, S, _ = url.split("/")
 								            return datetime(
 								                int(y), int(m), int(d), int(H), int(M), int(S)) - offset
 								        except Exception:
 								            return None
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								    @staticmethod
 								    def _make_work(kind, url, user):
-												[pixiv] updates to avatar/background extractors (#2495)

- add 'date' metadata to avatar/background files when available
  and use that in default filenames / archive ids
- remove deprecation warnings as their option names clash with
  subcategory names

											
										
										
											2022-05-03 16:11:49 +02:00
+								        p = url.split("/")
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								        return {
-												[pixiv] updates to avatar/background extractors (#2495)

- add 'date' metadata to avatar/background files when available
  and use that in default filenames / archive ids
- remove deprecation warnings as their option names clash with
  subcategory names

											
										
										
											2022-05-03 16:11:49 +02:00
+								            "create_date"     : "{}-{}-{}T{}:{}:{}+09:00".format(
 								                p[5], p[6], p[7], p[8], p[9], p[10]) if len(p) > 9 else None,
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								            "height"          : 0,
 								            "id"              : kind,
 								            "image_urls"      : None,
 								            "meta_pages"      : (),
 								            "meta_single_page": {"original_image_url": url},
 								            "page_count"      : 1,
 								            "sanity_level"    : 0,
 								            "tags"            : (),
 								            "title"           : kind,
 								            "type"            : kind,
 								            "user"            : user,
 								            "width"           : 0,
 								            "x_restrict"      : 0,
 								        }
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								    def works(self):
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								        """Return an iterable containing all relevant 'work' objects"""
-												initial commit

											
										
										
											2014-10-12 21:56:44 +02:00
-												[pixiv] reduce calls to '/user/detail'

											
										
										
											2020-02-09 13:54:58 +01:00
+								    def metadata(self):
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								        """Collect metadata for extractor job"""
-												[pixiv] reduce calls to '/user/detail'

											
										
										
											2020-02-09 13:54:58 +01:00
+								        return {}
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
 								class PixivUserExtractor(PixivExtractor):
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								    """Extractor for a pixiv user profile"""
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								    subcategory = "user"
-												[pixiv] use BASE_PATTERN

											
										
										
											2023-05-28 18:06:47 +02:00
+								    pattern = (BASE_PATTERN + r"/(?:"
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								               r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id="
 								               r")(\d+)(?:$|[?#])")
 								    test = (
 								        ("https://www.pixiv.net/en/users/173530"),
 								        ("https://www.pixiv.net/u/173530"),
 								        ("https://www.pixiv.net/member.php?id=173530"),
 								        ("https://www.pixiv.net/mypage.php#id=173530"),
 								        ("https://www.pixiv.net/#id=173530"),
 								    )
 								    def __init__(self, match):
 								        PixivExtractor.__init__(self, match)
 								        self.user_id = match.group(1)
 								    def items(self):
 								        base = "{}/users/{}/".format(self.root, self.user_id)
 								        return self._dispatch_extractors((
-												[pixiv] add 'novel-bookmark' extractor (#4111)

											
										
										
											2023-05-28 16:30:17 +02:00
+								            (PixivAvatarExtractor       , base + "avatar"),
 								            (PixivBackgroundExtractor   , base + "background"),
 								            (PixivArtworksExtractor     , base + "artworks"),
 								            (PixivFavoriteExtractor     , base + "bookmarks/artworks"),
 								            (PixivNovelBookmarkExtractor, base + "bookmarks/novels"),
 								            (PixivNovelUserExtractor    , base + "novels"),
-												[pixiv] updates to avatar/background extractors (#2495)

- add 'date' metadata to avatar/background files when available
  and use that in default filenames / archive ids
- remove deprecation warnings as their option names clash with
  subcategory names

											
										
										
											2022-05-03 16:11:49 +02:00
+								        ), ("artworks",))
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
 								class PixivArtworksExtractor(PixivExtractor):
 								    """Extractor for artworks of a pixiv user"""
 								    subcategory = "artworks"
-												[pixiv] use BASE_PATTERN

											
										
										
											2023-05-28 18:06:47 +02:00
+								    pattern = (BASE_PATTERN + r"/(?:"
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								               r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
 								               r"(?:/([^/?#]+))?/?(?:$|[?#])"
 								               r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    test = (
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        ("https://www.pixiv.net/en/users/173530/artworks", {
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								            "url": "852c31ad83b6840bacbce824d85f2a997889efb7",
 								        }),
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        # illusts with specific tag
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        (("https://www.pixiv.net/en/users/173530/artworks"
 								          "/%E6%89%8B%E3%81%B6%E3%82%8D"), {
 								            "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
 								        }),
-												[pixiv] unquote tags

											
										
										
											2017-07-12 08:21:29 +02:00
+								        (("https://www.pixiv.net/member_illust.php?id=173530"
 								          "&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), {
 								            "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								        }),
-												[pixiv] implement 'avatar' option (#595, #623)

											
										
										
											2020-03-09 21:17:16 +01:00
+								        # deleted account
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								        ("http://www.pixiv.net/member_illust.php?id=173531", {
-												[pixiv] add 'metadata' option (#1551)

											
										
										
											2021-05-14 20:30:28 +02:00
+								            "options": (("metadata", True),),
 								            "exception": exception.NotFoundError,
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								        }),
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        ("https://www.pixiv.net/en/users/173530/manga"),
 								        ("https://www.pixiv.net/en/users/173530/illustrations"),
 								        ("https://www.pixiv.net/member_illust.php?id=173530"),
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								        ("https://touch.pixiv.net/member_illust.php?id=173530"),
 								    )
-												[pixiv] update to new extractor interface

											
										
										
											2015-04-10 15:29:09 +02:00
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								    def __init__(self, match):
-												propagate 'match' to base extractor constructor

											
										
										
											2019-02-11 13:31:10 +01:00
+								        PixivExtractor.__init__(self, match)
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								        u1, t1, u2, t2 = match.groups()
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        if t1:
 								            t1 = text.unquote(t1)
 								        elif t2:
 								            t2 = text.parse_query(t2).get("tag")
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								        self.user_id = u1 or u2
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        self.tag = t1 or t2
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
-												[pixiv] add 'metadata' option (#1551)

											
										
										
											2021-05-14 20:30:28 +02:00
+								    def metadata(self):
 								        if self.config("metadata"):
-												[pixiv] extend 'metadata' option (#3057)

make it usable for all 'pixiv' extractors

											
										
										
											2022-10-16 15:32:31 +02:00
+								            self.api.user_detail(self.user_id)
-												[pixiv] add 'metadata' option (#1551)

											
										
										
											2021-05-14 20:30:28 +02:00
+								        return {}
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								    def works(self):
-												[pixiv] respect more query parameters for user URLs

The API endpoint responsible for user illustrations does not
provide sufficient filter capabilities* to match the actual
website, so we are spinning our own filters.

Respected parameters are
    'type': illust, manga, ugoira
    'tag' : any image tag (this was already supported)
    'p'   : the page to start on

*
- API can filter for illustrations and manga, but not for ugoira.
- 'offset' is applied before filtering
- no 'tag' filter

											
										
										
											2018-05-18 15:30:06 +02:00
+								        works = self.api.user_illusts(self.user_id)
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        if self.tag:
 								            tag = self.tag.lower()
-												[pixiv] remove 'type' and 'page' query parameter handling

The "new and improved" /member_illust.php and /bookmark.php listings
don't quite work with how things were.

											
										
										
											2018-10-03 16:08:36 +02:00
+								            works = (
 								                work for work in works
 								                if tag in [t["name"].lower() for t in work["tags"]]
 								            )
-												[pixiv] respect more query parameters for user URLs

The API endpoint responsible for user illustrations does not
provide sufficient filter capabilities* to match the actual
website, so we are spinning our own filters.

Respected parameters are
    'type': illust, manga, ugoira
    'tag' : any image tag (this was already supported)
    'p'   : the page to start on

*
- API can filter for illustrations and manga, but not for ugoira.
- 'offset' is applied before filtering
- no 'tag' filter

											
										
										
											2018-05-18 15:30:06 +02:00
 								        return works
-												[pixiv] support pixiv.me URLs (#23)

											
										
										
											2017-06-25 20:18:27 +02:00
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
 								class PixivAvatarExtractor(PixivExtractor):
 								    """Extractor for pixiv avatars"""
 								    subcategory = "avatar"
-												[pixiv] updates to avatar/background extractors (#2495)

- add 'date' metadata to avatar/background files when available
  and use that in default filenames / archive ids
- remove deprecation warnings as their option names clash with
  subcategory names

											
										
										
											2022-05-03 16:11:49 +02:00
+								    filename_fmt = "avatar{date:?_//%Y-%m-%d}.{extension}"
 								    archive_fmt = "avatar_{user[id]}_{date}"
-												[pixiv] use BASE_PATTERN

											
										
										
											2023-05-28 18:06:47 +02:00
+								    pattern = USER_PATTERN + r"/avatar"
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								    test = ("https://www.pixiv.net/en/users/173530/avatar", {
 								        "content": "4e57544480cc2036ea9608103e8f024fa737fe66",
 								    })
 								    def __init__(self, match):
 								        PixivExtractor.__init__(self, match)
 								        self.user_id = match.group(1)
 								    def works(self):
 								        user = self.api.user_detail(self.user_id)["user"]
 								        url = user["profile_image_urls"]["medium"].replace("_170.", ".")
 								        return (self._make_work("avatar", url, user),)
 								class PixivBackgroundExtractor(PixivExtractor):
 								    """Extractor for pixiv background banners"""
 								    subcategory = "background"
-												[pixiv] fix default filenames for backgrounds

											
										
										
											2022-07-11 00:35:23 +02:00
+								    filename_fmt = "background{date:?_//%Y-%m-%d}.{extension}"
-												[pixiv] updates to avatar/background extractors (#2495)

- add 'date' metadata to avatar/background files when available
  and use that in default filenames / archive ids
- remove deprecation warnings as their option names clash with
  subcategory names

											
										
										
											2022-05-03 16:11:49 +02:00
+								    archive_fmt = "background_{user[id]}_{date}"
-												[pixiv] use BASE_PATTERN

											
										
										
											2023-05-28 18:06:47 +02:00
+								    pattern = USER_PATTERN + "/background"
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								    test = ("https://www.pixiv.net/en/users/194921/background", {
 								        "pattern": r"https://i\.pximg\.net/background/img/2021/01/30/16/12/02"
 								                   r"/194921_af1f71e557a42f499213d4b9eaccc0f8\.jpg",
 								    })
 								    def __init__(self, match):
 								        PixivExtractor.__init__(self, match)
 								        self.user_id = match.group(1)
 								    def works(self):
 								        detail = self.api.user_detail(self.user_id)
 								        url = detail["profile"]["background_image_url"]
 								        if not url:
 								            return ()
 								        if "/c/" in url:
 								            parts = url.split("/")
 								            del parts[3:5]
 								            url = "/".join(parts)
 								        url = url.replace("_master1200.", ".")
 								        work = self._make_work("background", url, detail["user"])
 								        if url.endswith(".jpg"):
-												[pixiv] include '.gif' in background fallback URLs (#2495)

											
										
										
											2022-06-03 17:25:23 +02:00
+								            url = url[:-4]
 								            work["_fallback"] = (url + ".png", url + ".gif")
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								        return (work,)
-												[pixiv] implement 'background' option (#623, #1124, #2495)

											
										
										
											2022-04-21 13:53:02 +02:00
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
-												[pixiv] support pixiv.me URLs (#23)

											
										
										
											2017-06-25 20:18:27 +02:00
+								class PixivMeExtractor(PixivExtractor):
 								    """Extractor for pixiv.me URLs"""
 								    subcategory = "me"
-												remove '&' from URL patterns

'/?&#' -> '/?#' and '?&#' -> '?#'

According to https://www.ietf.org/rfc/rfc3986.txt, URLs are
"organized hierarchically" by using "the slash ("/"), question
mark ("?"), and number sign ("#") characters to delimit components"

											
										
										
											2020-10-22 23:12:59 +02:00
+								    pattern = r"(?:https?://)?pixiv\.me/([^/?#]+)"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    test = (
-												[pixiv] support pixiv.me URLs (#23)

											
										
										
											2017-06-25 20:18:27 +02:00
+								        ("https://pixiv.me/del_shannon", {
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								            "url": "29c295ce75150177e6b0a09089a949804c708fbf",
-												[pixiv] support pixiv.me URLs (#23)

											
										
										
											2017-06-25 20:18:27 +02:00
+								        }),
 								        ("https://pixiv.me/del_shanno", {
 								            "exception": exception.NotFoundError,
 								        }),
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    )
-												[pixiv] support pixiv.me URLs (#23)

											
										
										
											2017-06-25 20:18:27 +02:00
 								    def __init__(self, match):
-												propagate 'match' to base extractor constructor

											
										
										
											2019-02-11 13:31:10 +01:00
+								        PixivExtractor.__init__(self, match)
-												[pixiv] support pixiv.me URLs (#23)

											
										
										
											2017-06-25 20:18:27 +02:00
+								        self.account = match.group(1)
 								    def items(self):
-												use 'extractor.request()' for more HTTP requests

											
										
										
											2018-06-25 22:39:43 +02:00
+								        url = "https://pixiv.me/" + self.account
-												add '_extractor' information to redirect results

											
										
										
											2019-12-29 23:37:34 +01:00
+								        data = {"_extractor": PixivUserExtractor}
-												use 'extractor.request()' for more HTTP requests

											
										
										
											2018-06-25 22:39:43 +02:00
+								        response = self.request(
-												replace extractor.request() 'expect' argument

with
- 'fatal': allow 4xx status codes
- 'notfound': raise NotFoundError on 404

											
										
										
											2019-07-04 23:45:26 +02:00
+								            url, method="HEAD", allow_redirects=False, notfound="user")
-												add '_extractor' information to redirect results

											
										
										
											2019-12-29 23:37:34 +01:00
+								        yield Message.Queue, response.headers["Location"], data
-												[pixiv] support pixiv.me URLs (#23)

											
										
										
											2017-06-25 20:18:27 +02:00
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								class PixivWorkExtractor(PixivExtractor):
-												consistent extractor naming scheme + docstrings

											
										
										
											2016-09-12 10:20:57 +02:00
+								    """Extractor for a single pixiv work/illustration"""
-												add subcategories to extractors

											
										
										
											2015-11-30 01:11:13 +01:00
+								    subcategory = "work"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    pattern = (r"(?:https?://)?(?:(?:www\.|touch\.)?pixiv\.net"
-												[pixiv] match '/artworks/' URLs

											
										
										
											2019-09-24 21:42:31 +02:00
+								               r"/(?:(?:en/)?artworks/"
 								               r"|member_illust\.php\?(?:[^&]+&)*illust_id=)(\d+)"
-												rewrite URL patterns to use only 1 per extractor

											
										
										
											2019-02-08 12:03:10 +01:00
+								               r"|(?:i(?:\d+\.pixiv|\.pximg)\.net"
 								               r"/(?:(?:.*/)?img-[^/]+/img/\d{4}(?:/\d\d){5}|img\d+/img/[^/]+)"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								               r"|img\d*\.pixiv\.net/img/[^/]+|(?:www\.)?pixiv\.net/i)/(\d+))")
 								    test = (
-												[pixiv] match '/artworks/' URLs

											
										
										
											2019-09-24 21:42:31 +02:00
+								        ("https://www.pixiv.net/artworks/966412", {
-												update unit test results

											
										
										
											2017-04-14 14:40:36 +02:00
+								            "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
-												[pixiv] match direct-links to images

											
										
										
											2016-08-25 20:10:02 +02:00
+								            "content": "69a8edfb717400d1c2e146ab2b30d2c235440c5a",
-												[pixiv] extract 'date_url' metadata (#3405)

i.e. the datetime encoded in each file URL.

https://i.pximg.net/img-master/img/2022/12/01/13/44/55/12345678_p0.jpg
->
2022-12-01 13:44:55 +09:00
->
2022-12-01 04:44:55

											
										
										
											2022-12-15 11:40:20 +01:00
+								            "keyword": {
 								                "date"    : "dt:2008-06-12 15:29:13",
 								                "date_url": "dt:2008-06-12 15:29:13",
 								            },
-												[pixiv] match direct-links to images

											
										
										
											2016-08-25 20:10:02 +02:00
+								        }),
-												code adjustments according to pep8 nr2

											
										
										
											2017-02-01 00:53:19 +01:00
+								        (("http://www.pixiv.net/member_illust.php"
 								          "?mode=medium&illust_id=966411"), {
-												add a few tests expecting exceptions

											
										
										
											2016-12-30 01:46:42 +01:00
+								            "exception": exception.NotFoundError,
 								        }),
-												[pixiv] fix ugoira extraction (closes #78)

											
										
										
											2018-02-19 08:51:09 +01:00
+								        # ugoira
 								        (("https://www.pixiv.net/member_illust.php"
 								          "?mode=medium&illust_id=66806629"), {
-												[pixiv] fix ugoira test

											
										
										
											2018-06-18 19:22:54 +02:00
+								            "url": "7267695a985c4db8759bebcf8d21dbdd2d2317ef",
-												[pixiv] extract 'date_url' metadata (#3405)

i.e. the datetime encoded in each file URL.

https://i.pximg.net/img-master/img/2022/12/01/13/44/55/12345678_p0.jpg
->
2022-12-01 13:44:55 +09:00
->
2022-12-01 04:44:55

											
										
										
											2022-12-15 11:40:20 +01:00
+								            "keyword": {
 								                "frames"  : list,
 								                "date"    : "dt:2018-01-14 15:06:08",
 								                "date_url": "dt:2018-01-15 04:24:48",
 								            },
-												[pixiv] fix ugoira extraction (closes #78)

											
										
										
											2018-02-19 08:51:09 +01:00
+								        }),
-												[pixiv] add 'related' option (#1237)

											
										
										
											2021-01-17 16:37:07 +01:00
+								        # related works (#1237)
 								        ("https://www.pixiv.net/artworks/966412", {
 								            "options": (("related", True),),
-												update extractor test results

											
										
										
											2021-01-21 21:35:42 +01:00
+								            "range": "1-10",
-												[pixiv] add 'related' option (#1237)

											
										
										
											2021-01-17 16:37:07 +01:00
+								            "count": ">= 10",
 								        }),
-												[pixiv] match '/artworks/' URLs

											
										
										
											2019-09-24 21:42:31 +02:00
+								        ("https://www.pixiv.net/en/artworks/966412"),
 								        ("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=96641"),
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								        ("http://i1.pixiv.net/c/600x600/img-master"
 								         "/img/2008/06/13/00/29/13/966412_p0_master1200.jpg"),
 								        ("https://i.pximg.net/img-original"
 								         "/img/2017/04/25/07/33/29/62568267_p0.png"),
 								        ("https://www.pixiv.net/i/966412"),
 								        ("http://img.pixiv.net/img/soundcross/42626136.jpg"),
 								        ("http://i2.pixiv.net/img76/img/snailrin/42672235.jpg"),
 								    )
-												[pixiv] add single work/illust extractor

											
										
										
											2015-11-22 02:21:02 +01:00
 								    def __init__(self, match):
-												propagate 'match' to base extractor constructor

											
										
										
											2019-02-11 13:31:10 +01:00
+								        PixivExtractor.__init__(self, match)
-												rewrite URL patterns to use only 1 per extractor

											
										
										
											2019-02-08 12:03:10 +01:00
+								        self.illust_id = match.group(1) or match.group(2)
-												[pixiv] add single work/illust extractor

											
										
										
											2015-11-22 02:21:02 +01:00
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								    def works(self):
-												[pixiv] add 'related' option (#1237)

											
										
										
											2021-01-17 16:37:07 +01:00
+								        works = (self.api.illust_detail(self.illust_id),)
 								        if self.config("related", False):
 								            related = self.api.illust_related(self.illust_id)
 								            works = itertools.chain(works, related)
 								        return works
-												[pixiv] add single work/illust extractor

											
										
										
											2015-11-22 02:21:02 +01:00
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
+								class PixivFavoriteExtractor(PixivExtractor):
-												[pixiv] use BASE_PATTERN

											
										
										
											2023-05-28 18:06:47 +02:00
+								    """Extractor for all favorites/bookmarks of a pixiv user"""
-												add subcategories to extractors

											
										
										
											2015-11-30 01:11:13 +01:00
+								    subcategory = "favorite"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    directory_fmt = ("{category}", "bookmarks",
 								                     "{user_bookmark[id]} {user_bookmark[account]}")
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}"
-												[pixiv] use BASE_PATTERN

											
										
										
											2023-05-28 18:06:47 +02:00
+								    pattern = (BASE_PATTERN + r"/(?:(?:en/)?"
-												fix PixivFavoriteExtractor regex (#1405)

* fix PixivFavoriteExtractor regex

* do not use lookbehind
											
										
										
											2021-03-25 14:59:33 +01:00
+								               r"users/(\d+)/(bookmarks/artworks|following)(?:/([^/?#]+))?"
 								               r"|bookmark\.php)(?:\?([^#]*))?")
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    test = (
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        ("https://www.pixiv.net/en/users/173530/bookmarks/artworks", {
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								            "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        }),
-												[pixiv] support mobile URLs (https://touch.pixiv.net/)

											
										
										
											2017-10-17 16:49:42 +02:00
+								        ("https://www.pixiv.net/bookmark.php?id=173530", {
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								            "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
-												[pixiv] support mobile URLs (https://touch.pixiv.net/)

											
										
										
											2017-10-17 16:49:42 +02:00
+								        }),
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        # bookmarks with specific tag
-												[pixiv] handle tags at the end of new bookmark URLs

											
										
										
											2020-02-06 23:42:13 +01:00
+								        (("https://www.pixiv.net/en/users/3137110"
 								          "/bookmarks/artworks/%E3%81%AF%E3%82%93%E3%82%82%E3%82%93"), {
 								            "url": "379b28275f786d946e01f721e54afe346c148a8c",
 								        }),
 								        # bookmarks with specific tag (legacy url)
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        (("https://www.pixiv.net/bookmark.php?id=3137110"
 								          "&tag=%E3%81%AF%E3%82%93%E3%82%82%E3%82%93&p=1"), {
-												[pixiv] handle tags at the end of new bookmark URLs

											
										
										
											2020-02-06 23:42:13 +01:00
+								            "url": "379b28275f786d946e01f721e54afe346c148a8c",
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        }),
 								        # own bookmarks
 								        ("https://www.pixiv.net/bookmark.php", {
 								            "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
-												[pixiv] implement 'metadata-bookmark' option (#3417)

											
										
										
											2023-01-07 23:12:36 +01:00
+								            "keyword": {"tags_bookmark": ["47", "hitman"]},
 								            "options": (("metadata-bookmark", True),),
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        }),
-												[pixiv] fix user id for bookmarks API calls (closes #596)

											
										
										
											2020-02-01 01:44:21 +01:00
+								        # own bookmarks with tag (#596)
 								        ("https://www.pixiv.net/bookmark.php?tag=foobar", {
 								            "count": 0,
 								        }),
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
+								        # followed users (#515)
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        ("https://www.pixiv.net/en/users/173530/following", {
 								            "pattern": PixivUserExtractor.pattern,
 								            "count": ">= 12",
 								        }),
-												[pixiv] handle tags at the end of new bookmark URLs

											
										
										
											2020-02-06 23:42:13 +01:00
+								        # followed users (legacy url) (#515)
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
+								        ("https://www.pixiv.net/bookmark.php?id=173530&type=user", {
 								            "pattern": PixivUserExtractor.pattern,
 								            "count": ">= 12",
 								        }),
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        # touch URLs
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								        ("https://touch.pixiv.net/bookmark.php?id=173530"),
 								        ("https://touch.pixiv.net/bookmark.php"),
 								    )
-												[pixiv] add user-favorite extractor

											
										
										
											2015-11-23 00:15:43 +01:00
 								    def __init__(self, match):
-												[pixiv] handle tags at the end of new bookmark URLs

											
										
										
											2020-02-06 23:42:13 +01:00
+								        uid, kind, self.tag, query = match.groups()
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
+								        query = text.parse_query(query)
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
+								        if not uid:
 								            uid = query.get("id")
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								            if not uid:
 								                self.subcategory = "bookmark"
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
 								        if kind == "following" or query.get("type") == "user":
 								            self.subcategory = "following"
 								            self.items = self._items_following
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
-												propagate 'match' to base extractor constructor

											
										
										
											2019-02-11 13:31:10 +01:00
+								        PixivExtractor.__init__(self, match)
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
+								        self.query = query
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        self.user_id = uid
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
 								    def works(self):
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        tag = None
 								        if "tag" in self.query:
 								            tag = text.unquote(self.query["tag"])
-												[pixiv] handle tags at the end of new bookmark URLs

											
										
										
											2020-02-06 23:42:13 +01:00
+								        elif self.tag:
 								            tag = text.unquote(self.tag)
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
+								        restrict = "public"
 								        if self.query.get("rest") == "hide":
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								            restrict = "private"
-												[pixiv] add user-favorite extractor

											
										
										
											2015-11-23 00:15:43 +01:00
-												[pixiv] remove 'type' and 'page' query parameter handling

The "new and improved" /member_illust.php and /bookmark.php listings
don't quite work with how things were.

											
										
										
											2018-10-03 16:08:36 +02:00
+								        return self.api.user_bookmarks_illust(self.user_id, tag, restrict)
-												[pixiv] add bookmark extractor

											
										
										
											2015-11-23 02:58:31 +01:00
-												[pixiv] reduce calls to '/user/detail'

											
										
										
											2020-02-09 13:54:58 +01:00
+								    def metadata(self):
-												[pixiv] update URL patterns (fixes #568)

Pixiv now uses new URLs for
- user profiles and illustration listings:
  - https://www.pixiv.net/en/users/<ID>
  - https://www.pixiv.net/en/users/<ID>/artworks
- bookmarks:
  - https://www.pixiv.net/en/users/<ID>/bookmarks/artworks

											
										
										
											2020-01-10 13:31:06 +01:00
+								        if self.user_id:
-												[pixiv] implement 'background' option (#623, #1124, #2495)

											
										
										
											2022-04-21 13:53:02 +02:00
+								            user = self.api.user_detail(self.user_id)["user"]
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        else:
 								            self.api.login()
 								            user = self.api.user
 								        self.user_id = user["id"]
 								        return {"user_bookmark": user}
-												[pixiv] refresh access-token every 50 minutes

											
										
										
											2015-10-06 17:19:54 +02:00
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
+								    def _items_following(self):
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
+								        restrict = "public"
 								        if self.query.get("rest") == "hide":
 								            restrict = "private"
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
+								        for preview in self.api.user_following(self.user_id, restrict):
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
+								            user = preview["user"]
 								            user["_extractor"] = PixivUserExtractor
-												[pixiv] update URLs of followed users to the new format

											
										
										
											2020-01-29 22:54:31 +01:00
+								            url = "https://www.pixiv.net/users/{}".format(user["id"])
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
+								            yield Message.Queue, url, user
-												code adjustments according to pep8 nr2

											
										
										
											2017-02-01 00:53:19 +01:00
-												[pixiv] add extractor for ranking lists

											
										
										
											2017-08-20 20:21:52 +02:00
+								class PixivRankingExtractor(PixivExtractor):
 								    """Extractor for pixiv ranking pages"""
 								    subcategory = "ranking"
-												[pixiv] update archive IDs and add metadata-fields

(Pixiv bookmarks actually have their own IDs, comments and tags,
independent of the bookmarked image, which makes creating an
archive ID a lot easier)

											
										
										
											2018-03-02 16:11:53 +01:00
+								    archive_fmt = "r_{ranking[mode]}_{ranking[date]}_{id}{num}.{extension}"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    directory_fmt = ("{category}", "rankings",
 								                     "{ranking[mode]}", "{ranking[date]}")
-												[pixiv] use BASE_PATTERN

											
										
										
											2023-05-28 18:06:47 +02:00
+								    pattern = BASE_PATTERN + r"/ranking\.php(?:\?([^#]*))?"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    test = (
 								        ("https://www.pixiv.net/ranking.php?mode=daily&date=20170818"),
 								        ("https://www.pixiv.net/ranking.php"),
 								        ("https://touch.pixiv.net/ranking.php"),
-												[pixiv] stop with error for invalid search/ranking parameters

instead of falling back to defaults

											
										
										
											2022-11-15 12:17:53 +01:00
+								        ("https://www.pixiv.net/ranking.php?mode=unknown", {
 								            "exception": exception.StopExtraction,
 								        }),
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    )
-												[pixiv] add extractor for ranking lists

											
										
										
											2017-08-20 20:21:52 +02:00
 								    def __init__(self, match):
-												propagate 'match' to base extractor constructor

											
										
										
											2019-02-11 13:31:10 +01:00
+								        PixivExtractor.__init__(self, match)
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								        self.query = match.group(1)
 								        self.mode = self.date = None
-												[pixiv] add extractor for ranking lists

											
										
										
											2017-08-20 20:21:52 +02:00
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								    def works(self):
 								        return self.api.illust_ranking(self.mode, self.date)
-												[pixiv] reduce calls to '/user/detail'

											
										
										
											2020-02-09 13:54:58 +01:00
+								    def metadata(self):
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								        query = text.parse_query(self.query)
 								        mode = query.get("mode", "daily").lower()
 								        mode_map = {
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								            "daily": "day",
 								            "daily_r18": "day_r18",
-												[pixiv] rankings: add support for the new daily AI and daily AI R18

(#3214, #3221)

In remembrance of @thatfuckingbird

											
										
										
											2022-11-15 11:47:57 +01:00
+								            "daily_ai": "day_ai",
 								            "daily_r18_ai": "day_r18_ai",
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								            "weekly": "week",
 								            "weekly_r18": "week_r18",
 								            "monthly": "month",
 								            "male": "day_male",
 								            "male_r18": "day_male_r18",
 								            "female": "day_female",
 								            "female_r18": "day_female_r18",
 								            "original": "week_original",
 								            "rookie": "week_rookie",
 								            "r18g": "week_r18g",
-												[pixiv] update archive IDs and add metadata-fields

(Pixiv bookmarks actually have their own IDs, comments and tags,
independent of the bookmarked image, which makes creating an
archive ID a lot easier)

											
										
										
											2018-03-02 16:11:53 +01:00
+								        }
-												[pixiv] stop with error for invalid search/ranking parameters

instead of falling back to defaults

											
										
										
											2022-11-15 12:17:53 +01:00
+								        try:
 								            self.mode = mode = mode_map[mode]
 								        except KeyError:
 								            raise exception.StopExtraction("Invalid mode '%s'", mode)
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								        date = query.get("date")
 								        if date:
 								            if len(date) == 8 and date.isdecimal():
 								                date = "{}-{}-{}".format(date[0:4], date[4:6], date[6:8])
 								            else:
 								                self.log.warning("invalid date '%s'", date)
 								                date = None
 								        if not date:
 								            date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")
 								        self.date = date
-												[pixiv] rewrite

- same functionality, better(?) code quality, easier to extend

- added test for the user-tag functionality

- removed the 'artist-id', 'artist-name' and 'artist-nick'
  keywords, which can be replaced with 'user[id]', 'user[name]'
  and 'user[account]' respectively

											
										
										
											2017-06-04 16:33:36 +02:00
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								        return {"ranking": {
 								            "mode": mode,
 								            "date": self.date,
 								        }}
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
+								class PixivSearchExtractor(PixivExtractor):
 								    """Extractor for pixiv search results"""
 								    subcategory = "search"
 								    archive_fmt = "s_{search[word]}_{id}{num}.{extension}"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    directory_fmt = ("{category}", "search", "{search[word]}")
-												[pixiv] use BASE_PATTERN

											
										
										
											2023-05-28 18:06:47 +02:00
+								    pattern = (BASE_PATTERN + r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?"
-												[pixiv] match new search URLs (closes #507)

											
										
										
											2019-12-06 18:10:22 +01:00
+								               r"|search\.php)(?:\?([^#]+))?")
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    test = (
-												[pixiv] match new search URLs (closes #507)

											
										
										
											2019-12-06 18:10:22 +01:00
+								        ("https://www.pixiv.net/en/tags/Original", {
 								            "range": "1-10",
 								            "count": 10,
 								        }),
-												[pixiv] stop with error for invalid search/ranking parameters

instead of falling back to defaults

											
										
										
											2022-11-15 12:17:53 +01:00
+								        ("https://pixiv.net/en/tags/foo/artworks?order=week&s_mode=s_tag", {
 								            "exception": exception.StopExtraction,
 								        }),
 								        ("https://pixiv.net/en/tags/foo/artworks?order=date&s_mode=tag", {
 								            "exception": exception.StopExtraction,
 								        }),
 								        ("https://www.pixiv.net/search.php?s_mode=s_tag&name=Original", {
 								            "exception": exception.StopExtraction,
 								        }),
-												[pixiv] match new search URLs (closes #507)

											
										
										
											2019-12-06 18:10:22 +01:00
+								        ("https://www.pixiv.net/en/tags/foo/artworks?order=date&s_mode=s_tag"),
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								        ("https://www.pixiv.net/search.php?s_mode=s_tag&word=Original"),
 								        ("https://touch.pixiv.net/search.php?word=Original"),
 								    )
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
 								    def __init__(self, match):
-												propagate 'match' to base extractor constructor

											
										
										
											2019-02-11 13:31:10 +01:00
+								        PixivExtractor.__init__(self, match)
-												[pixiv] match new search URLs (closes #507)

											
										
										
											2019-12-06 18:10:22 +01:00
+								        self.word, self.query = match.groups()
 								        self.sort = self.target = None
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
 								    def works(self):
-												[pixiv] allow setting a date range for search results (#2133)

with the 'scd' and 'ecd' query parameters

											
										
										
											2021-12-23 23:03:39 +01:00
+								        return self.api.search_illust(
 								            self.word, self.sort, self.target,
 								            date_start=self.date_start, date_end=self.date_end)
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
-												[pixiv] reduce calls to '/user/detail'

											
										
										
											2020-02-09 13:54:58 +01:00
+								    def metadata(self):
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								        query = text.parse_query(self.query)
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
-												[pixiv] match new search URLs (closes #507)

											
										
										
											2019-12-06 18:10:22 +01:00
+								        if self.word:
 								            self.word = text.unquote(self.word)
 								        else:
-												[pixiv] stop with error for invalid search/ranking parameters

instead of falling back to defaults

											
										
										
											2022-11-15 12:17:53 +01:00
+								            try:
 								                self.word = query["word"]
 								            except KeyError:
-												[pixiv] match new search URLs (closes #507)

											
										
										
											2019-12-06 18:10:22 +01:00
+								                raise exception.StopExtraction("Missing search term")
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
 								        sort = query.get("order", "date_d")
 								        sort_map = {
 								            "date": "date_asc",
 								            "date_d": "date_desc",
-												[pixiv] allow sorting by popularity (requires pixiv premium)

											
										
										
											2023-04-26 22:49:29 +02:00
+								            "popular_d": "popular_desc",
 								            "popular_male_d": "popular_male_desc",
 								            "popular_female_d": "popular_female_desc",
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
+								        }
-												[pixiv] stop with error for invalid search/ranking parameters

instead of falling back to defaults

											
										
										
											2022-11-15 12:17:53 +01:00
+								        try:
 								            self.sort = sort = sort_map[sort]
 								        except KeyError:
 								            raise exception.StopExtraction("Invalid search order '%s'", sort)
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
-												[pixiv] use 'exact_match_for_tags' as default search mode (#3092)

											
										
										
											2022-10-24 14:16:40 +02:00
+								        target = query.get("s_mode", "s_tag_full")
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
+								        target_map = {
 								            "s_tag": "partial_match_for_tags",
 								            "s_tag_full": "exact_match_for_tags",
 								            "s_tc": "title_and_caption",
 								        }
-												[pixiv] stop with error for invalid search/ranking parameters

instead of falling back to defaults

											
										
										
											2022-11-15 12:17:53 +01:00
+								        try:
 								            self.target = target = target_map[target]
 								        except KeyError:
 								            raise exception.StopExtraction("Invalid search mode '%s'", target)
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
-												[pixiv] allow setting a date range for search results (#2133)

with the 'scd' and 'ecd' query parameters

											
										
										
											2021-12-23 23:03:39 +01:00
+								        self.date_start = query.get("scd")
 								        self.date_end = query.get("ecd")
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								        return {"search": {
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
+								            "word": self.word,
 								            "sort": self.sort,
 								            "target": self.target,
-												[pixiv] allow setting a date range for search results (#2133)

with the 'scd' and 'ecd' query parameters

											
										
										
											2021-12-23 23:03:39 +01:00
+								            "date_start": self.date_start,
 								            "date_end": self.date_end,
-												[pixiv] move query parsing out of constructor

better exception handling, among other things

											
										
										
											2018-05-15 13:28:08 +02:00
+								        }}
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
-												[pixiv] add extractor for illusts from followed users

											
										
										
											2018-05-15 13:02:49 +02:00
+								class PixivFollowExtractor(PixivExtractor):
 								    """Extractor for new illustrations from your followed artists"""
 								    subcategory = "follow"
 								    archive_fmt = "F_{user_follow[id]}_{id}{num}.{extension}"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    directory_fmt = ("{category}", "following")
-												[pixiv] use BASE_PATTERN

											
										
										
											2023-05-28 18:06:47 +02:00
+								    pattern = BASE_PATTERN + r"/bookmark_new_illust\.php"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											2019-02-08 13:45:40 +01:00
+								    test = (
 								        ("https://www.pixiv.net/bookmark_new_illust.php"),
 								        ("https://touch.pixiv.net/bookmark_new_illust.php"),
 								    )
-												[pixiv] add extractor for illusts from followed users

											
										
										
											2018-05-15 13:02:49 +02:00
 								    def works(self):
 								        return self.api.illust_follow()
-												[pixiv] reduce calls to '/user/detail'

											
										
										
											2020-02-09 13:54:58 +01:00
+								    def metadata(self):
-												[pixiv] add extractor for illusts from followed users

											
										
										
											2018-05-15 13:02:49 +02:00
+								        self.api.login()
 								        return {"user_follow": self.api.user}
-												[pixiv] add extractor for 'pixivision' articles (#1672)

											
										
										
											2021-07-07 02:22:44 +02:00
+								class PixivPixivisionExtractor(PixivExtractor):
 								    """Extractor for illustrations from a pixivision article"""
 								    subcategory = "pixivision"
 								    directory_fmt = ("{category}", "pixivision",
 								                     "{pixivision_id} {pixivision_title}")
 								    archive_fmt = "V{pixivision_id}_{id}{suffix}.{extension}"
 								    pattern = r"(?:https?://)?(?:www\.)?pixivision\.net/(?:en/)?a/(\d+)"
 								    test = (
 								        ("https://www.pixivision.net/en/a/2791"),
 								        ("https://pixivision.net/a/2791", {
 								            "count": 7,
 								            "keyword": {
 								                "pixivision_id": "2791",
 								                "pixivision_title": "What's your favorite music? Editor’s "
 								                                    "picks featuring: “CD Covers”!",
 								            },
 								        }),
 								    )
 								    def __init__(self, match):
 								        PixivExtractor.__init__(self, match)
 								        self.pixivision_id = match.group(1)
 								    def works(self):
 								        return (
-												[pixiv] fix 'pixivision' extraction

											
										
										
											2023-04-30 15:35:32 +02:00
+								            self.api.illust_detail(illust_id.partition("?")[0])
-												[pixiv] add extractor for 'pixivision' articles (#1672)

											
										
										
											2021-07-07 02:22:44 +02:00
+								            for illust_id in util.unique_sequence(text.extract_iter(
 								                self.page, '<a href="https://www.pixiv.net/en/artworks/', '"'))
 								        )
 								    def metadata(self):
 								        url = "https://www.pixivision.net/en/a/" + self.pixivision_id
 								        headers = {"User-Agent": "Mozilla/5.0"}
 								        self.page = self.request(url, headers=headers).text
-												replace 'text.extract()' with 'text.extr()' where possible

											
										
										
											2022-11-04 23:39:38 +01:00
+								        title = text.extr(self.page, '<title>', '<')
-												[pixiv] add extractor for 'pixivision' articles (#1672)

											
										
										
											2021-07-07 02:22:44 +02:00
+								        return {
 								            "pixivision_id"   : self.pixivision_id,
 								            "pixivision_title": text.unescape(title),
 								        }
-												[pixiv] add 'series' extractor (#2964)

											
										
										
											2022-09-27 22:57:07 +02:00
+								class PixivSeriesExtractor(PixivExtractor):
 								    """Extractor for illustrations from a Pixiv series"""
 								    subcategory = "series"
 								    directory_fmt = ("{category}", "{user[id]} {user[account]}",
 								                     "{series[id]} {series[title]}")
 								    filename_fmt = "{num_series:>03}_{id}_p{num}.{extension}"
-												[pixiv] use BASE_PATTERN

											
										
										
											2023-05-28 18:06:47 +02:00
+								    pattern = BASE_PATTERN + r"/user/(\d+)/series/(\d+)"
-												[pixiv] add 'series' extractor (#2964)

											
										
										
											2022-09-27 22:57:07 +02:00
+								    test = ("https://www.pixiv.net/user/10509347/series/21859", {
 								        "range": "1-10",
 								        "count": 10,
 								        "keyword": {
 								            "num_series": int,
 								            "series": {
 								                "canonical": "https://www.pixiv.net/user/10509347"
 								                             "/series/21859",
 								                "description": str,
 								                "ogp": dict,
 								                "title": "先輩がうざい後輩の話",
 								                "total": int,
 								                "twitter": dict,
 								            },
 								        },
 								    })
 								    def __init__(self, match):
 								        PixivExtractor.__init__(self, match)
 								        self.user_id, self.series_id = match.groups()
 								    def works(self):
 								        url = self.root + "/ajax/series/" + self.series_id
 								        params = {"p": 1}
 								        headers = {
 								            "Accept": "application/json",
 								            "Referer": "{}/user/{}/series/{}".format(
 								                self.root, self.user_id, self.series_id),
 								            "Alt-Used": "www.pixiv.net",
 								        }
 								        while True:
 								            data = self.request(url, params=params, headers=headers).json()
 								            body = data["body"]
 								            page = body["page"]
 								            series = body["extraData"]["meta"]
 								            series["id"] = self.series_id
 								            series["total"] = page["total"]
-												replace 'text.extract()' with 'text.extr()' where possible

											
										
										
											2022-11-04 23:39:38 +01:00
+								            series["title"] = text.extr(series["title"], '"', '"')
-												[pixiv] add 'series' extractor (#2964)

											
										
										
											2022-09-27 22:57:07 +02:00
 								            for info in page["series"]:
 								                work = self.api.illust_detail(info["workId"])
 								                work["num_series"] = info["order"]
 								                work["series"] = series
 								                yield work
 								            if len(page["series"]) < 10:
 								                return
 								            params["p"] += 1
-												[pixiv] initial 'novel' support (#1241, #4044)

supported URLs are
- https://www.pixiv.net/novel/show.php?id=<ID>
- https://www.pixiv.net/novel/series/<ID>
- https://www.pixiv.net/en/users/<ID>/novels

											
										
										
											2023-05-12 16:01:19 +02:00
+								class PixivNovelExtractor(PixivExtractor):
 								    """Extractor for pixiv novels"""
 								    subcategory = "novel"
 								    request_interval = 1.0
-												[pixiv] use BASE_PATTERN

											
										
										
											2023-05-28 18:06:47 +02:00
+								    pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)"
-												[pixiv] support short novel URLs

https://www.pixiv.net/n/<ID>

											
										
										
											2023-05-21 14:26:30 +02:00
+								    test = (
 								        ("https://www.pixiv.net/novel/show.php?id=19612040", {
 								            "count": 1,
-												[pixiv] add 'embeds' option (#1241)

											
										
										
											2023-05-23 12:14:06 +02:00
+								            "content": "8c818474153cbd2f221ee08766e1d634c821d8b4",
-												[pixiv] support short novel URLs

https://www.pixiv.net/n/<ID>

											
										
										
											2023-05-21 14:26:30 +02:00
+								            "keyword": {
 								                "caption": r"re:「無能な名無し」と呼ばれ虐げられて育った鈴\(すず\)は、",
 								                "comment_access_control": 0,
 								                "create_date": "2023-04-02T15:18:58+09:00",
 								                "date": "dt:2023-04-02 06:18:58",
 								                "id": 19612040,
 								                "is_bookmarked": False,
 								                "is_muted": False,
 								                "is_mypixiv_only": False,
 								                "is_original": True,
 								                "is_x_restricted": False,
 								                "novel_ai_type": 1,
 								                "page_count": 1,
 								                "rating": "General",
 								                "restrict": 0,
 								                "series": {
 								                    "id": 10278364,
-												[pixiv] add 'embeds' option (#1241)

											
										
										
											2023-05-23 12:14:06 +02:00
+								                    "title": "龍の贄嫁〜無能な名無しと虐げられていましたが、"
 								                             "どうやら異母妹に霊力を搾取されていたようです〜",
-												[pixiv] support short novel URLs

https://www.pixiv.net/n/<ID>

											
										
										
											2023-05-21 14:26:30 +02:00
+								                },
 								                "tags": ["和風ファンタジー", "溺愛", "神様", "ヤンデレ", "執着",
 								                         "異能", "ざまぁ", "学園", "神嫁"],
-												[pixiv] add 'embeds' option (#1241)

											
										
										
											2023-05-23 12:14:06 +02:00
+								                "text_length": 5974,
-												[pixiv] support short novel URLs

https://www.pixiv.net/n/<ID>

											
										
										
											2023-05-21 14:26:30 +02:00
+								                "title": "異母妹から「無能な名無し」と虐げられていた私、"
 								                         "どうやら異母妹に霊力を搾取されていたようです（１）",
 								                "user": {
 								                    "account": "yukinaga_chifuyu",
 								                    "id": 77055466,
 								                },
 								                "visible": True,
 								                "x_restrict": 0,
-												[pixiv] initial 'novel' support (#1241, #4044)

supported URLs are
- https://www.pixiv.net/novel/show.php?id=<ID>
- https://www.pixiv.net/novel/series/<ID>
- https://www.pixiv.net/en/users/<ID>/novels

											
										
										
											2023-05-12 16:01:19 +02:00
+								            },
-												[pixiv] support short novel URLs

https://www.pixiv.net/n/<ID>

											
										
										
											2023-05-21 14:26:30 +02:00
+								        }),
-												[pixiv] add 'embeds' option (#1241)

											
										
										
											2023-05-23 12:14:06 +02:00
+								        # embeds
 								        ("https://www.pixiv.net/novel/show.php?id=16422450", {
 								            "options": (("embeds", True),),
 								            "count": 3,
 								        }),
-												[pixiv] add 'novel-bookmark' extractor (#4111)

											
										
										
											2023-05-28 16:30:17 +02:00
+								        # short URL
-												[pixiv] support short novel URLs

https://www.pixiv.net/n/<ID>

											
										
										
											2023-05-21 14:26:30 +02:00
+								        ("https://www.pixiv.net/n/19612040"),
 								    )
-												[pixiv] initial 'novel' support (#1241, #4044)

supported URLs are
- https://www.pixiv.net/novel/show.php?id=<ID>
- https://www.pixiv.net/novel/series/<ID>
- https://www.pixiv.net/en/users/<ID>/novels

											
										
										
											2023-05-12 16:01:19 +02:00
 								    def __init__(self, match):
 								        PixivExtractor.__init__(self, match)
 								        self.novel_id = match.group(1)
 								    def items(self):
 								        tags = self.config("tags", "japanese")
 								        if tags == "original":
 								            transform_tags = None
 								        elif tags == "translated":
 								            def transform_tags(work):
 								                work["tags"] = list(dict.fromkeys(
 								                    tag["translated_name"] or tag["name"]
 								                    for tag in work["tags"]))
 								        else:
 								            def transform_tags(work):
 								                work["tags"] = [tag["name"] for tag in work["tags"]]
 								        ratings = {0: "General", 1: "R-18", 2: "R-18G"}
 								        meta_user = self.config("metadata")
 								        meta_bookmark = self.config("metadata-bookmark")
-												[pixiv] add 'embeds' option (#1241)

											
										
										
											2023-05-23 12:14:06 +02:00
+								        embeds = self.config("embeds")
 								        if embeds:
 								            headers = {
 								                "User-Agent"    : "Mozilla/5.0",
 								                "App-OS"        : None,
 								                "App-OS-Version": None,
 								                "App-Version"   : None,
 								                "Referer"       : self.root + "/",
 								                "Authorization" : None,
 								            }
-												[pixiv] initial 'novel' support (#1241, #4044)

supported URLs are
- https://www.pixiv.net/novel/show.php?id=<ID>
- https://www.pixiv.net/novel/series/<ID>
- https://www.pixiv.net/en/users/<ID>/novels

											
										
										
											2023-05-12 16:01:19 +02:00
 								        novels = self.novels()
 								        if self.max_posts:
 								            novels = itertools.islice(novels, self.max_posts)
 								        for novel in novels:
 								            if meta_user:
 								                novel.update(self.api.user_detail(novel["user"]["id"]))
 								            if meta_bookmark and novel["is_bookmarked"]:
 								                detail = self.api.novel_bookmark_detail(novel["id"])
 								                novel["tags_bookmark"] = [tag["name"] for tag in detail["tags"]
 								                                          if tag["is_registered"]]
 								            if transform_tags:
 								                transform_tags(novel)
 								            novel["num"] = 0
 								            novel["date"] = text.parse_datetime(novel["create_date"])
 								            novel["rating"] = ratings.get(novel["x_restrict"])
 								            novel["suffix"] = ""
 								            yield Message.Directory, novel
 								            novel["extension"] = "txt"
 								            content = self.api.novel_text(novel["id"])["novel_text"]
 								            yield Message.Url, "text:" + content, novel
-												[pixiv] add 'embeds' option (#1241)

											
										
										
											2023-05-23 12:14:06 +02:00
+								            if embeds:
 								                desktop = False
 								                illusts = {}
 								                for marker in text.extract_iter(content, "[", "]"):
 								                    if marker.startswith("[jumpuri:"):
 								                        desktop = True
 								                    elif marker.startswith("pixivimage:"):
 								                        illusts[marker[11:].partition("-")[0]] = None
 								                if desktop:
 								                    novel_id = str(novel["id"])
 								                    url = "{}/novel/show.php?id={}".format(
 								                        self.root, novel_id)
 								                    data = util.json_loads(text.extr(
 								                        self.request(url, headers=headers).text,
 								                        "id=\"meta-preload-data\" content='", "'"))
 								                    for image in (data["novel"][novel_id]
 								                                  ["textEmbeddedImages"]).values():
 								                        url = image.pop("urls")["original"]
 								                        novel.update(image)
 								                        novel["date_url"] = self._date_from_url(url)
 								                        novel["num"] += 1
 								                        novel["suffix"] = "_p{:02}".format(novel["num"])
 								                        text.nameext_from_url(url, novel)
 								                        yield Message.Url, url, novel
 								                if illusts:
 								                    novel["_extractor"] = PixivWorkExtractor
 								                    novel["date_url"] = None
 								                    for illust_id in illusts:
 								                        novel["num"] += 1
 								                        novel["suffix"] = "_p{:02}".format(novel["num"])
 								                        url = "{}/artworks/{}".format(self.root, illust_id)
 								                        yield Message.Queue, url, novel
-												[pixiv] initial 'novel' support (#1241, #4044)

supported URLs are
- https://www.pixiv.net/novel/show.php?id=<ID>
- https://www.pixiv.net/novel/series/<ID>
- https://www.pixiv.net/en/users/<ID>/novels

											
										
										
											2023-05-12 16:01:19 +02:00
+								    def novels(self):
 								        return (self.api.novel_detail(self.novel_id),)
 								class PixivNovelUserExtractor(PixivNovelExtractor):
 								    """Extractor for pixiv users' novels"""
 								    subcategory = "novel-user"
-												[pixiv] use BASE_PATTERN

											
										
										
											2023-05-28 18:06:47 +02:00
+								    pattern = USER_PATTERN + r"/novels"
-												[pixiv] initial 'novel' support (#1241, #4044)

supported URLs are
- https://www.pixiv.net/novel/show.php?id=<ID>
- https://www.pixiv.net/novel/series/<ID>
- https://www.pixiv.net/en/users/<ID>/novels

											
										
										
											2023-05-12 16:01:19 +02:00
+								    test = ("https://www.pixiv.net/en/users/77055466/novels", {
 								        "pattern": "^text:",
 								        "range": "1-5",
 								        "count": 5,
 								    })
 								    def novels(self):
 								        return self.api.user_novels(self.novel_id)
 								class PixivNovelSeriesExtractor(PixivNovelExtractor):
 								    """Extractor for pixiv novel series"""
 								    subcategory = "novel-series"
-												[pixiv] use BASE_PATTERN

											
										
										
											2023-05-28 18:06:47 +02:00
+								    pattern = BASE_PATTERN + r"/novel/series/(\d+)"
-												[pixiv] initial 'novel' support (#1241, #4044)

supported URLs are
- https://www.pixiv.net/novel/show.php?id=<ID>
- https://www.pixiv.net/novel/series/<ID>
- https://www.pixiv.net/en/users/<ID>/novels

											
										
										
											2023-05-12 16:01:19 +02:00
+								    test = ("https://www.pixiv.net/novel/series/10278364", {
 								        "count": 4,
 								        "content": "b06abed001b3f6ccfb1579699e9a238b46d38ea2",
 								    })
 								    def novels(self):
 								        return self.api.novel_series(self.novel_id)
-												[pixiv] add 'novel-bookmark' extractor (#4111)

											
										
										
											2023-05-28 16:30:17 +02:00
+								class PixivNovelBookmarkExtractor(PixivNovelExtractor):
 								    """Extractor for bookmarked pixiv novels"""
 								    subcategory = "novel-bookmark"
-												[pixiv] use BASE_PATTERN

											
										
										
											2023-05-28 18:06:47 +02:00
+								    pattern = (USER_PATTERN + r"/bookmarks/novels"
-												[pixiv] add 'novel-bookmark' extractor (#4111)

											
										
										
											2023-05-28 16:30:17 +02:00
+								               r"(?:/([^/?#]+))?(?:/?\?([^#]+))?")
 								    test = (
 								        ("https://www.pixiv.net/en/users/77055466/bookmarks/novels", {
 								            "count": 1,
 								            "content": "7194e8faa876b2b536f185ee271a2b6e46c69089",
 								        }),
 								        ("https://www.pixiv.net/en/users/11/bookmarks/novels/TAG?rest=hide"),
 								    )
 								    def __init__(self, match):
 								        PixivNovelExtractor.__init__(self, match)
 								        self.user_id, self.tag, self.query = match.groups()
 								    def novels(self):
 								        if self.tag:
 								            tag = text.unquote(self.tag)
 								        else:
 								            tag = None
 								        if text.parse_query(self.query).get("rest") == "hide":
 								            restrict = "private"
 								        else:
 								            restrict = "public"
 								        return self.api.user_bookmarks_novel(self.user_id, tag, restrict)
-												[pixiv] add 'sketch' extractor (#1497)

											
										
										
											2021-10-12 20:50:11 +02:00
+								class PixivSketchExtractor(Extractor):
 								    """Extractor for user pages on sketch.pixiv.net"""
 								    category = "pixiv"
 								    subcategory = "sketch"
 								    directory_fmt = ("{category}", "sketch", "{user[unique_name]}")
 								    filename_fmt = "{post_id} {id}.{extension}"
 								    archive_fmt = "S{user[id]}_{id}"
 								    root = "https://sketch.pixiv.net"
 								    cookiedomain = ".pixiv.net"
 								    pattern = r"(?:https?://)?sketch\.pixiv\.net/@([^/?#]+)"
 								    test = ("https://sketch.pixiv.net/@nicoby", {
 								        "pattern": r"https://img\-sketch\.pixiv\.net/uploads/medium"
 								                   r"/file/\d+/\d+\.(jpg|png)",
 								        "count": ">= 35",
 								    })
 								    def __init__(self, match):
 								        Extractor.__init__(self, match)
 								        self.username = match.group(1)
 								    def items(self):
 								        headers = {"Referer": "{}/@{}".format(self.root, self.username)}
 								        for post in self.posts():
 								            media = post["media"]
 								            post["post_id"] = post["id"]
 								            post["date"] = text.parse_datetime(
 								                post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
 								            util.delete_items(post, ("id", "media", "_links"))
 								            yield Message.Directory, post
 								            post["_http_headers"] = headers
 								            for photo in media:
 								                original = photo["photo"]["original"]
 								                post["id"] = photo["id"]
 								                post["width"] = original["width"]
 								                post["height"] = original["height"]
 								                url = original["url"]
 								                text.nameext_from_url(url, post)
 								                yield Message.Url, url, post
 								    def posts(self):
 								        url = "{}/api/walls/@{}/posts/public.json".format(
 								            self.root, self.username)
 								        headers = {
 								            "Accept": "application/vnd.sketch-v4+json",
 								            "X-Requested-With": "{}/@{}".format(self.root, self.username),
 								            "Referer": self.root + "/",
 								        }
 								        while True:
 								            data = self.request(url, headers=headers).json()
 								            yield from data["data"]["items"]
 								            next_url = data["_links"].get("next")
 								            if not next_url:
 								                return
 								            url = self.root + next_url["href"]
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								class PixivAppAPI():
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    """Minimal interface for the Pixiv App API for mobile devices
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    For a more complete implementation or documentation, see
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								    - https://github.com/upbit/pixivpy
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    - https://gist.github.com/ZipFile/3ba99b47162c23f8aea5d5942bb557b1
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								    """
 								    CLIENT_ID = "MOBrBDS8blbauoSck0ZfDbtuzpyT"
 								    CLIENT_SECRET = "lsACyCD94FhDUtGTXi3QzcFE2uU1hqtDaKeqrdwj"
-												[pixiv] fix authentication

											
										
										
											2019-09-02 22:34:04 +02:00
+								    HASH_SECRET = ("28c1fdd170a5204386cb1313c7077b34"
 								                   "f83e4aaf4aa829ce78c231e05b0bae2c")
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
 								    def __init__(self, extractor):
-												use extractor.request for all other API calls

- deviantart
- pawoo
- pixiv
- reddit

											
										
										
											2018-12-22 14:40:35 +01:00
+								        self.extractor = extractor
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								        self.log = extractor.log
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								        self.username = extractor._get_auth_info()[0]
-												[pixiv] add extractor for illusts from followed users

											
										
										
											2018-05-15 13:02:49 +02:00
+								        self.user = None
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								        extractor.session.headers.update({
 								            "App-OS"        : "ios",
 								            "App-OS-Version": "13.1.2",
 								            "App-Version"   : "7.7.6",
 								            "User-Agent"    : "PixivIOSApp/7.7.6 (iOS 13.1.2; iPhone11,8)",
 								            "Referer"       : "https://app-api.pixiv.net/",
 								        })
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								        self.client_id = extractor.config(
 								            "client-id", self.CLIENT_ID)
 								        self.client_secret = extractor.config(
 								            "client-secret", self.CLIENT_SECRET)
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
 								        token = extractor.config("refresh-token")
 								        if token is None or token == "cache":
 								            token = _refresh_token_cache(self.username)
 								        self.refresh_token = token
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    def login(self):
 								        """Login and gain an access token"""
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								        self.user, auth = self._login_impl(self.username)
-												use extractor.request for all other API calls

- deviantart
- pawoo
- pixiv
- reddit

											
										
										
											2018-12-22 14:40:35 +01:00
+								        self.extractor.session.headers["Authorization"] = auth
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
-												adjust cache maxage values

											
										
										
											2019-03-14 22:21:49 +01:00
+								    @cache(maxage=3600, keyarg=1)
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								    def _login_impl(self, username):
 								        if not self.refresh_token:
-												raise error when required username or password are missing

do not try to login as 'None' (#1192)

											
										
										
											2020-12-22 14:40:18 +01:00
+								            raise exception.AuthenticationError(
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								                "'refresh-token' required.\n"
 								                "Run `gallery-dl oauth:pixiv` to get one.")
-												raise error when required username or password are missing

do not try to login as 'None' (#1192)

											
										
										
											2020-12-22 14:40:18 +01:00
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								        self.log.info("Refreshing access token")
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								        url = "https://oauth.secure.pixiv.net/auth/token"
 								        data = {
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								            "client_id"     : self.client_id,
 								            "client_secret" : self.client_secret,
 								            "grant_type"    : "refresh_token",
 								            "refresh_token" : self.refresh_token,
 								            "get_secure_url": "1",
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								        }
-												[pixiv] fix authentication

											
										
										
											2019-09-02 22:34:04 +02:00
+								        time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00")
 								        headers = {
 								            "X-Client-Time": time,
 								            "X-Client-Hash": hashlib.md5(
 								                (time + self.HASH_SECRET).encode()).hexdigest(),
 								        }
-												use extractor.request for all other API calls

- deviantart
- pawoo
- pixiv
- reddit

											
										
										
											2018-12-22 14:40:35 +01:00
+								        response = self.extractor.request(
-												[pixiv] fix authentication

											
										
										
											2019-09-02 22:34:04 +02:00
+								            url, method="POST", headers=headers, data=data, fatal=False)
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								        if response.status_code >= 400:
-												[pixiv] output debug message on failed login attempt

(#1192)

											
										
										
											2020-12-22 14:59:31 +01:00
+								            self.log.debug(response.text)
-												[pixiv] update (#1304)

- remove login with username & password
- require a refresh token
- add 'oauth:pixiv' functionality

See also:
- https://github.com/upbit/pixivpy/issues/158
- https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362

											
										
										
											2021-02-12 18:03:06 +01:00
+								            raise exception.AuthenticationError("Invalid refresh token")
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
 								        data = response.json()["response"]
 								        return data["user"], "Bearer " + data["access_token"]
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    def illust_detail(self, illust_id):
 								        params = {"illust_id": illust_id}
-												[pixiv] update API interface

- start all endpoints with '/'
- use extractor.wait() for rate limit
- retry with while loop instead of recursion
- in case of error, write entire response to debug log

											
										
										
											2022-04-29 16:29:12 +02:00
+								        return self._call("/v1/illust/detail", params)["illust"]
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
-												[pixiv] implement 'metadata-bookmark' option (#3417)

											
										
										
											2023-01-07 23:12:36 +01:00
+								    def illust_bookmark_detail(self, illust_id):
 								        params = {"illust_id": illust_id}
 								        return self._call(
 								            "/v2/illust/bookmark/detail", params)["bookmark_detail"]
-												[pixiv] add extractor for illusts from followed users

											
										
										
											2018-05-15 13:02:49 +02:00
+								    def illust_follow(self, restrict="all"):
 								        params = {"restrict": restrict}
-												[pixiv] update API interface

- start all endpoints with '/'
- use extractor.wait() for rate limit
- retry with while loop instead of recursion
- in case of error, write entire response to debug log

											
										
										
											2022-04-29 16:29:12 +02:00
+								        return self._pagination("/v2/illust/follow", params)
-												[pixiv] add extractor for illusts from followed users

											
										
										
											2018-05-15 13:02:49 +02:00
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    def illust_ranking(self, mode="day", date=None):
 								        params = {"mode": mode, "date": date}
-												[pixiv] update API interface

- start all endpoints with '/'
- use extractor.wait() for rate limit
- retry with while loop instead of recursion
- in case of error, write entire response to debug log

											
										
										
											2022-04-29 16:29:12 +02:00
+								        return self._pagination("/v1/illust/ranking", params)
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
-												[pixiv] add 'related' option (#1237)

											
										
										
											2021-01-17 16:37:07 +01:00
+								    def illust_related(self, illust_id):
 								        params = {"illust_id": illust_id}
-												[pixiv] update API interface

- start all endpoints with '/'
- use extractor.wait() for rate limit
- retry with while loop instead of recursion
- in case of error, write entire response to debug log

											
										
										
											2022-04-29 16:29:12 +02:00
+								        return self._pagination("/v2/illust/related", params)
-												[pixiv] add 'related' option (#1237)

											
										
										
											2021-01-17 16:37:07 +01:00
-												[pixiv] initial 'novel' support (#1241, #4044)

supported URLs are
- https://www.pixiv.net/novel/show.php?id=<ID>
- https://www.pixiv.net/novel/series/<ID>
- https://www.pixiv.net/en/users/<ID>/novels

											
										
										
											2023-05-12 16:01:19 +02:00
+								    def novel_bookmark_detail(self, novel_id):
 								        params = {"novel_id": novel_id}
 								        return self._call(
 								            "/v2/novel/bookmark/detail", params)["bookmark_detail"]
 								    def novel_detail(self, novel_id):
 								        params = {"novel_id": novel_id}
 								        return self._call("/v2/novel/detail", params)["novel"]
 								    def novel_series(self, series_id):
 								        params = {"series_id": series_id}
 								        return self._pagination("/v1/novel/series", params, "novels")
 								    def novel_text(self, novel_id):
 								        params = {"novel_id": novel_id}
 								        return self._call("/v1/novel/text", params)
-												[pixiv] allow setting a date range for search results (#2133)

with the 'scd' and 'ecd' query parameters

											
										
										
											2021-12-23 23:03:39 +01:00
+								    def search_illust(self, word, sort=None, target=None, duration=None,
 								                      date_start=None, date_end=None):
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
+								        params = {"word": word, "search_target": target,
-												[pixiv] allow setting a date range for search results (#2133)

with the 'scd' and 'ecd' query parameters

											
										
										
											2021-12-23 23:03:39 +01:00
+								                  "sort": sort, "duration": duration,
 								                  "start_date": date_start, "end_date": date_end}
-												[pixiv] update API interface

- start all endpoints with '/'
- use extractor.wait() for rate limit
- retry with while loop instead of recursion
- in case of error, write entire response to debug log

											
										
										
											2022-04-29 16:29:12 +02:00
+								        return self._pagination("/v1/search/illust", params)
-												[pixiv] add extractor for search results

											
										
										
											2018-05-14 14:46:05 +02:00
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								    def user_bookmarks_illust(self, user_id, tag=None, restrict="public"):
-												[pixiv] implement 'metadata-bookmark' option (#3417)

											
										
										
											2023-01-07 23:12:36 +01:00
+								        """Return illusts bookmarked by a user"""
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								        params = {"user_id": user_id, "tag": tag, "restrict": restrict}
-												[pixiv] update API interface

- start all endpoints with '/'
- use extractor.wait() for rate limit
- retry with while loop instead of recursion
- in case of error, write entire response to debug log

											
										
										
											2022-04-29 16:29:12 +02:00
+								        return self._pagination("/v1/user/bookmarks/illust", params)
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
-												[pixiv] add 'novel-bookmark' extractor (#4111)

											
										
										
											2023-05-28 16:30:17 +02:00
+								    def user_bookmarks_novel(self, user_id, tag=None, restrict="public"):
 								        """Return novels bookmarked by a user"""
 								        params = {"user_id": user_id, "tag": tag, "restrict": restrict}
 								        return self._pagination("/v1/user/bookmarks/novel", params, "novels")
-												[pixiv] implement 'metadata-bookmark' option (#3417)

											
										
										
											2023-01-07 23:12:36 +01:00
+								    def user_bookmark_tags_illust(self, user_id, restrict="public"):
 								        """Return bookmark tags defined by a user"""
 								        params = {"user_id": user_id, "restrict": restrict}
 								        return self._pagination(
 								            "/v1/user/bookmark-tags/illust", params, "bookmark_tags")
-												[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before

											
										
										
											2022-05-01 21:12:23 +02:00
+								    @memcache(keyarg=1)
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    def user_detail(self, user_id):
 								        params = {"user_id": user_id}
-												[pixiv] update API interface

- start all endpoints with '/'
- use extractor.wait() for rate limit
- retry with while loop instead of recursion
- in case of error, write entire response to debug log

											
										
										
											2022-04-29 16:29:12 +02:00
+								        return self._call("/v1/user/detail", params)
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
-												[pixiv] support fetching privately followed users (fixes #1628)

											
										
										
											2021-06-16 19:56:09 +02:00
+								    def user_following(self, user_id, restrict="public"):
 								        params = {"user_id": user_id, "restrict": restrict}
-												[pixiv] update API interface

- start all endpoints with '/'
- use extractor.wait() for rate limit
- retry with while loop instead of recursion
- in case of error, write entire response to debug log

											
										
										
											2022-04-29 16:29:12 +02:00
+								        return self._pagination("/v1/user/following", params, "user_previews")
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
-												[pixiv] respect more query parameters for user URLs

The API endpoint responsible for user illustrations does not
provide sufficient filter capabilities* to match the actual
website, so we are spinning our own filters.

Respected parameters are
    'type': illust, manga, ugoira
    'tag' : any image tag (this was already supported)
    'p'   : the page to start on

*
- API can filter for illustrations and manga, but not for ugoira.
- 'offset' is applied before filtering
- no 'tag' filter

											
										
										
											2018-05-18 15:30:06 +02:00
+								    def user_illusts(self, user_id):
 								        params = {"user_id": user_id}
-												[pixiv] update API interface

- start all endpoints with '/'
- use extractor.wait() for rate limit
- retry with while loop instead of recursion
- in case of error, write entire response to debug log

											
										
										
											2022-04-29 16:29:12 +02:00
+								        return self._pagination("/v1/user/illusts", params)
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
-												[pixiv] initial 'novel' support (#1241, #4044)

supported URLs are
- https://www.pixiv.net/novel/show.php?id=<ID>
- https://www.pixiv.net/novel/series/<ID>
- https://www.pixiv.net/en/users/<ID>/novels

											
										
										
											2023-05-12 16:01:19 +02:00
+								    def user_novels(self, user_id):
 								        params = {"user_id": user_id}
 								        return self._pagination("/v1/user/novels", params, "novels")
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
+								    def ugoira_metadata(self, illust_id):
 								        params = {"illust_id": illust_id}
-												[pixiv] update API interface

- start all endpoints with '/'
- use extractor.wait() for rate limit
- retry with while loop instead of recursion
- in case of error, write entire response to debug log

											
										
										
											2022-04-29 16:29:12 +02:00
+								        return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"]
-												[pixiv] use App API

Transitioning to the App API breaks favorites archive IDs (there is
no longer any bookmark ID information), but the favorites API endpoint
of the public API was gone anyways ...

											
										
										
											2018-05-13 22:24:21 +02:00
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								    def _call(self, endpoint, params=None):
-												[pixiv] update API interface

- start all endpoints with '/'
- use extractor.wait() for rate limit
- retry with while loop instead of recursion
- in case of error, write entire response to debug log

											
										
										
											2022-04-29 16:29:12 +02:00
+								        url = "https://app-api.pixiv.net" + endpoint
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
-												[pixiv] update API interface

- start all endpoints with '/'
- use extractor.wait() for rate limit
- retry with while loop instead of recursion
- in case of error, write entire response to debug log

											
										
										
											2022-04-29 16:29:12 +02:00
+								        while True:
 								            self.login()
 								            response = self.extractor.request(url, params=params, fatal=False)
 								            data = response.json()
 								            if "error" not in data:
 								                return data
 								            self.log.debug(data)
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
-												[pixiv] wait and retry after rate limit error (closes #535)

											
										
										
											2019-12-28 22:06:58 +01:00
+								            if response.status_code == 404:
 								                raise exception.NotFoundError()
 								            error = data["error"]
 								            if "rate limit" in (error.get("message") or "").lower():
-												[pixiv] update API interface

- start all endpoints with '/'
- use extractor.wait() for rate limit
- retry with while loop instead of recursion
- in case of error, write entire response to debug log

											
										
										
											2022-04-29 16:29:12 +02:00
+								                self.extractor.wait(seconds=300)
 								                continue
-												[pixiv] wait and retry after rate limit error (closes #535)

											
										
										
											2019-12-28 22:06:58 +01:00
-												[pixiv] update API interface

- start all endpoints with '/'
- use extractor.wait() for rate limit
- retry with while loop instead of recursion
- in case of error, write entire response to debug log

											
										
										
											2022-04-29 16:29:12 +02:00
+								            raise exception.StopExtraction("API request failed: %s", error)
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
+								    def _pagination(self, endpoint, params, key="illusts"):
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
+								        while True:
 								            data = self._call(endpoint, params)
-												[pixiv] support listing followed users (#515)

											
										
										
											2019-12-26 23:42:42 +01:00
+								            yield from data[key]
-												[pixiv] implement AppAPI wrapper

											
										
										
											2018-05-07 20:05:44 +02:00
 								            if not data["next_url"]:
 								                return
-												[pixiv] improve bookmark extraction

- combine 'favorite' and 'bookmark' extractors
  - it is now one extractor class, but its subcategory still
    distinguishes between your own bookmarks ('bookmark') and other
    user's bookmarks ('favorite') like before
- allow filtering by bookmark tags and public/private bookmarks
- fix pagination for bookmark results

											
										
										
											2018-05-18 16:55:24 +02:00
+								            query = data["next_url"].rpartition("?")[2]
 								            params = text.parse_query(query)
-												[pixiv] use refresh_token based authentication

The first login will still use username and password, but everything
afterwards will use the refresh_token obtained from that.

This will prevent pixiv from sending a "New login to pixiv" email every
time a new access_token is requested.

											
										
										
											2018-10-12 22:26:27 +02:00
-												adjust cache maxage values

											
										
										
											2019-03-14 22:21:49 +01:00
+								@cache(maxage=10*365*24*3600, keyarg=0)
-												[pixiv] use refresh_token based authentication

The first login will still use username and password, but everything
afterwards will use the refresh_token obtained from that.

This will prevent pixiv from sending a "New login to pixiv" email every
time a new access_token is requested.

											
										
										
											2018-10-12 22:26:27 +02:00
+								def _refresh_token_cache(username):
 								    return None