gallery-dl/gallery_dl/extractor/patreon.py

# -*- coding: utf-8 -*-

# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://www.patreon.com/"""

from .common import Extractor, Message
from .. import text, exception
from ..cache import memcache
import collections
import itertools
import json


class PatreonExtractor(Extractor):
    """Base class for patreon extractors"""
    category = "patreon"
    root = "https://www.patreon.com"
    directory_fmt = ("{category}", "{creator[full_name]}")
    filename_fmt = "{id}_{title}_{num:>02}.{extension}"
    archive_fmt = "{id}_{num}"
    browser = "firefox"
    _warning = True

    def items(self):

        if self._warning:
            if not self._check_cookies(("session_id",)):
                self.log.warning("no 'session_id' cookie set")
            PatreonExtractor._warning = False
        generators = self._build_file_generators(self.config("files"))

        for post in self.posts():

            if not post.get("current_user_can_view", True):
                self.log.warning("Not allowed to view post %s", post["id"])
                continue
            yield Message.Directory, post

            post["num"] = 0
            hashes = set()
            for kind, url, name in itertools.chain.from_iterable(
                    g(post) for g in generators):
                fhash = self._filehash(url)
                if fhash not in hashes or not fhash:
                    hashes.add(fhash)
                    post["hash"] = fhash
                    post["type"] = kind
                    post["num"] += 1
                    yield Message.Url, url, text.nameext_from_url(name, post)
                else:
                    self.log.debug("skipping %s (%s %s)", url, fhash, kind)

    @staticmethod
    def _postfile(post):
        postfile = post.get("post_file")
        if postfile:
            return (("postfile", postfile["url"], postfile["name"]),)
        return ()

    def _images(self, post):
        for image in post["images"]:
            url = image.get("download_url")
            if url:
                name = image.get("file_name") or self._filename(url) or url
                yield "image", url, name

    def _attachments(self, post):
        for attachment in post["attachments"]:
            url = self.request(
                attachment["url"], method="HEAD",
                allow_redirects=False, fatal=False,
            ).headers.get("Location")

            if url:
                yield "attachment", url, attachment["name"]

    def _content(self, post):
        content = post.get("content")
        if content:
            for img in text.extract_iter(
                    content, '<img data-media-id="', '>'):
                url = text.extract(img, 'src="', '"')[0]
                if url:
                    yield "content", url, self._filename(url) or url

    def posts(self):
        """Return all relevant post objects"""

    def _pagination(self, url):
        headers = {"Referer": self.root}

        while url:
            url = text.ensure_http_scheme(url)
            posts = self.request(url, headers=headers).json()

            if "included" in posts:
                included = self._transform(posts["included"])
                for post in posts["data"]:
                    yield self._process(post, included)

            if "links" not in posts:
                return
            url = posts["links"].get("next")

    def _process(self, post, included):
        """Process and extend a 'post' object"""
        attr = post["attributes"]
        attr["id"] = text.parse_int(post["id"])

        if attr.get("current_user_can_view", True):

            relationships = post["relationships"]
            attr["images"] = self._files(post, included, "images")
            attr["attachments"] = self._files(post, included, "attachments")
            attr["date"] = text.parse_datetime(
                attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")

            tags = relationships.get("user_defined_tags")
            attr["tags"] = [
                tag["id"].replace("user_defined;", "")
                for tag in tags["data"]
                if tag["type"] == "post_tag"
            ] if tags else []

            user = relationships["user"]
            attr["creator"] = (
                self._user(user["links"]["related"]) or
                included["user"][user["data"]["id"]])

        return attr

    @staticmethod
    def _transform(included):
        """Transform 'included' into an easier to handle format"""
        result = collections.defaultdict(dict)
        for inc in included:
            result[inc["type"]][inc["id"]] = inc["attributes"]
        return result

    @staticmethod
    def _files(post, included, key):
        """Build a list of files"""
        files = post["relationships"].get(key)
        if files and files.get("data"):
            return [
                included[file["type"]][file["id"]]
                for file in files["data"]
            ]
        return []

    @memcache(keyarg=1)
    def _user(self, url):
        """Fetch user information"""
        response = self.request(url, fatal=False)
        if response.status_code >= 400:
            return None
        user = response.json()["data"]
        attr = user["attributes"]
        attr["id"] = user["id"]
        attr["date"] = text.parse_datetime(
            attr["created"], "%Y-%m-%dT%H:%M:%S.%f%z")
        return attr

    def _filename(self, url):
        """Fetch filename from an URL's Content-Disposition header"""
        response = self.request(url, method="HEAD", fatal=False)
        cd = response.headers.get("Content-Disposition")
        return text.extract(cd, 'filename="', '"')[0]

    @staticmethod
    def _filehash(url):
        """Extract MD5 hash from a download URL"""
        parts = url.partition("?")[0].split("/")
        parts.reverse()

        for part in parts:
            if len(part) == 32:
                return part
        return ""

    @staticmethod
    def _build_url(endpoint, query):
        return (
            "https://www.patreon.com/api/" + endpoint +

            "?include=user,images,attachments,user_defined_tags,campaign,poll."
            "choices,poll.current_user_responses.user,poll.current_user_respon"
            "ses.choice,poll.current_user_responses.poll,access_rules.tier.nul"
            "l"

            "&fields[post]=change_visibility_at,comment_count,content,current_"
            "user_can_delete,current_user_can_view,current_user_has_liked,embe"
            "d,image,is_paid,like_count,min_cents_pledged_to_view,post_file,pu"
            "blished_at,patron_count,patreon_url,post_type,pledge_url,thumbnai"
            "l_url,teaser_text,title,upgrade_url,url,was_posted_by_campaign_ow"
            "ner"
            "&fields[user]=image_url,full_name,url"
            "&fields[campaign]=avatar_photo_url,earnings_visibility,is_nsfw,is"
            "_monthly,name,url"
            "&fields[access_rule]=access_rule_type,amount_cents" + query +

            "&json-api-use-default-includes=false"
            "&json-api-version=1.0"
        )

    def _build_file_generators(self, filetypes):
        if filetypes is None:
            return (self._images, self._attachments,
                    self._postfile, self._content)
        genmap = {
            "images"     : self._images,
            "attachments": self._attachments,
            "postfile"   : self._postfile,
            "content"    : self._content,
        }
        if isinstance(filetypes, str):
            filetypes = filetypes.split(",")
        return [genmap[ft] for ft in filetypes]


class PatreonCreatorExtractor(PatreonExtractor):
    """Extractor for a creator's works"""
    subcategory = "creator"
    pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
               r"/(?!(?:home|join|posts|login|signup)(?:$|[/?#]))"
               r"([^/?#]+)(?:/posts)?/?(?:\?([^#]+))?")
    test = (
        ("https://www.patreon.com/koveliana", {
            "range": "1-25",
            "count": ">= 25",
            "keyword": {
                "attachments"  : list,
                "comment_count": int,
                "content"      : str,
                "creator"      : dict,
                "date"         : "type:datetime",
                "id"           : int,
                "images"       : list,
                "like_count"   : int,
                "post_type"    : str,
                "published_at" : str,
                "title"        : str,
            },
        }),
        ("https://www.patreon.com/koveliana/posts?filters[month]=2020-3", {
            "count": 1,
            "keyword": {"date": "dt:2020-03-30 21:21:44"},
        }),
        ("https://www.patreon.com/kovelianot", {
            "exception": exception.NotFoundError,
        }),
        ("https://www.patreon.com/user?u=2931440"),
        ("https://www.patreon.com/user/posts/?u=2931440"),
    )

    def __init__(self, match):
        PatreonExtractor.__init__(self, match)
        self.creator, self.query = match.groups()

    def posts(self):
        query = text.parse_query(self.query)

        creator_id = query.get("u")
        if creator_id:
            url = "{}/user/posts?u={}".format(self.root, creator_id)
        else:
            url = "{}/{}/posts".format(self.root, self.creator)

        page = self.request(url, notfound="creator").text
        campaign_id = text.extract(page, "/campaign/", "/")[0]
        if not campaign_id:
            raise exception.NotFoundError("creator")

        filters = "".join(
            "&filter[{}={}".format(key[8:], text.escape(value))
            for key, value in query.items()
            if key.startswith("filters[")
        )

        url = self._build_url("posts", (
            "&sort=" + query.get("sort", "-published_at") +
            "&filter[is_draft]=false"
            "&filter[contains_exclusive_posts]=true"
            "&filter[campaign_id]=" + campaign_id + filters
        ))
        return self._pagination(url)


class PatreonUserExtractor(PatreonExtractor):
    """Extractor for media from creators supported by you"""
    subcategory = "user"
    pattern = r"(?:https?://)?(?:www\.)?patreon\.com/home$"
    test = ("https://www.patreon.com/home",)

    def posts(self):
        url = self._build_url("stream", (
            "&page[cursor]=null"
            "&filter[is_following]=true"
        ))
        return self._pagination(url)


class PatreonPostExtractor(PatreonExtractor):
    """Extractor for media from a single post"""
    subcategory = "post"
    pattern = r"(?:https?://)?(?:www\.)?patreon\.com/posts/([^/?#]+)"
    test = (
        # postfile + attachments
        ("https://www.patreon.com/posts/precious-metal-23563293", {
            "count": 4,
        }),
        # postfile + content
        ("https://www.patreon.com/posts/56127163", {
            "count": 3,
            "keyword": {"filename": r"re:^(?!1).+$"},
        }),
        # tags (#1539)
        ("https://www.patreon.com/posts/free-post-12497641", {
            "keyword": {"tags": ["AWMedia"]},
        }),
        ("https://www.patreon.com/posts/not-found-123", {
            "exception": exception.NotFoundError,
        }),
    )

    def __init__(self, match):
        PatreonExtractor.__init__(self, match)
        self.slug = match.group(1)

    def posts(self):
        url = "{}/posts/{}".format(self.root, self.slug)
        page = self.request(url, notfound="post").text
        data = text.extract(page, "window.patreon.bootstrap,", "\n});")[0]
        post = json.loads(data + "}")["post"]

        included = self._transform(post["included"])
        return (self._process(post["data"], included),)
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`# -- coding: utf-8 --`

remove 'Message.Metadata' (#866) 2021-01-31 02:12:37 +01:00			`# Copyright 2019-2021 Mike Fährmann`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License version 2 as`
			`# published by the Free Software Foundation.`

			`"""Extractors for https://www.patreon.com/"""`

			`from .common import Extractor, Message`
[patreon] raise proper exception if creator/post doesn't exist 2019-12-12 01:14:32 +01:00			`from .. import text, exception`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`from ..cache import memcache`
[patreon] support multi image posts and post URLs (#383) 2019-08-17 23:20:26 +02:00			`import collections`
[patreon] filter duplicate files per post (#590) 2020-02-05 22:47:20 +01:00			`import itertools`
[patreon] support multi image posts and post URLs (#383) 2019-08-17 23:20:26 +02:00			`import json`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00

			`class PatreonExtractor(Extractor):`
			`"""Base class for patreon extractors"""`
			`category = "patreon"`
			`root = "https://www.patreon.com"`
[patreon] small fixes and adjustments (#226) - fix datetime parsing - rename 'user' to 'creator' - convert 'id' to integer - improve tests 2019-05-17 13:13:11 +02:00			`directory_fmt = ("{category}", "{creator[full_name]}")`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`filename_fmt = "{id}_{title}_{num:>02}.{extension}"`
			`archive_fmt = "{id}_{num}"`
[patreon] use '"browser": "firefox"' by default (#1117) 2021-02-27 16:26:42 +01:00			`browser = "firefox"`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`_warning = True`

			`def items(self):`

[patreon] small fixes and adjustments (#226) - fix datetime parsing - rename 'user' to 'creator' - convert 'id' to integer - improve tests 2019-05-17 13:13:11 +02:00			`if self._warning:`
use Extractor._check_cookies() for all cookie checks 2021-12-16 02:21:16 +01:00			`if not self._check_cookies(("session_id",)):`
[patreon] small fixes and adjustments (#226) - fix datetime parsing - rename 'user' to 'creator' - convert 'id' to integer - improve tests 2019-05-17 13:13:11 +02:00			`self.log.warning("no 'session_id' cookie set")`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`PatreonExtractor._warning = False`
[patreon] implement 'files' option (#1935) 2021-10-17 04:14:58 +02:00			`generators = self._build_file_generators(self.config("files"))`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00
			`for post in self.posts():`
[patreon] skip posts without view permission (#1316) 2021-02-14 16:03:11 +01:00
			`if not post.get("current_user_can_view", True):`
			`self.log.warning("Not allowed to view post %s", post["id"])`
			`continue`
[patreon] implement 'files' option (#1935) 2021-10-17 04:14:58 +02:00			`yield Message.Directory, post`

[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`post["num"] = 0`
[patreon] filter duplicate files per post (#590) 2020-02-05 22:47:20 +01:00			`hashes = set()`
[patreon] implement 'files' option (#1935) 2021-10-17 04:14:58 +02:00			`for kind, url, name in itertools.chain.from_iterable(`
			`g(post) for g in generators):`
[patreon] improve hash extraction (#693, #713) Instead of accessing a specific part of a download URL, potentially causing an exception if it doesn't exist, we're now searching through all parts for a potential MD5 hash without ever raising an exception. 2020-04-28 21:40:22 +02:00			`fhash = self._filehash(url)`
			`if fhash not in hashes or not fhash:`
[patreon] filter duplicate files per post (#590) 2020-02-05 22:47:20 +01:00			`hashes.add(fhash)`
			`post["hash"] = fhash`
			`post["type"] = kind`
			`post["num"] += 1`
			`yield Message.Url, url, text.nameext_from_url(name, post)`
[patreon] log skipped files (#590) 2020-02-11 19:01:07 +01:00			`else:`
			`self.log.debug("skipping %s (%s %s)", url, fhash, kind)`
[patreon] include image info in API results (#383) 2019-08-18 23:21:57 +02:00
[patreon] filter duplicate files per post (#590) 2020-02-05 22:47:20 +01:00			`@staticmethod`
			`def _postfile(post):`
			`postfile = post.get("post_file")`
			`if postfile:`
			`return (("postfile", postfile["url"], postfile["name"]),)`
			`return ()`

			`def _images(self, post):`
			`for image in post["images"]:`
			`url = image.get("download_url")`
			`if url:`
			`name = image.get("file_name") or self._filename(url) or url`
			`yield "image", url, name`
[patreon] include image info in API results (#383) 2019-08-18 23:21:57 +02:00
[patreon] filter duplicate files per post (#590) 2020-02-05 22:47:20 +01:00			`def _attachments(self, post):`
			`for attachment in post["attachments"]:`
			`url = self.request(`
			`attachment["url"], method="HEAD",`
			`allow_redirects=False, fatal=False,`
			`).headers.get("Location")`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00
[patreon] filter duplicate files per post (#590) 2020-02-05 22:47:20 +01:00			`if url:`
			`yield "attachment", url, attachment["name"]`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00
[patreon] better filenames for 'content' images (#1954) 2021-10-16 22:58:28 +02:00			`def _content(self, post):`
[patreon] filter duplicate files per post (#590) 2020-02-05 22:47:20 +01:00			`content = post.get("content")`
			`if content:`
			`for img in text.extract_iter(`
			`content, '<img data-media-id="', '>'):`
			`url = text.extract(img, 'src="', '"')[0]`
			`if url:`
[patreon] better filenames for 'content' images (#1954) 2021-10-16 22:58:28 +02:00			`yield "content", url, self._filename(url) or url`
[patreon] support multi image posts and post URLs (#383) 2019-08-17 23:20:26 +02:00
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`def posts(self):`
			`"""Return all relevant post objects"""`

			`def _pagination(self, url):`
			`headers = {"Referer": self.root}`

			`while url:`
add 'text.ensure_http_scheme()' 2020-05-19 21:25:07 +02:00			`url = text.ensure_http_scheme(url)`
[patreon] use file extensions from original filenames (#268) 2019-05-20 15:46:59 +02:00			`posts = self.request(url, headers=headers).json()`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00
[patreon] support multi image posts and post URLs (#383) 2019-08-17 23:20:26 +02:00			`if "included" in posts:`
			`included = self._transform(posts["included"])`
			`for post in posts["data"]:`
			`yield self._process(post, included)`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00
			`if "links" not in posts:`
			`return`
			`url = posts["links"].get("next")`

[patreon] support multi image posts and post URLs (#383) 2019-08-17 23:20:26 +02:00			`def _process(self, post, included):`
			`"""Process and extend a 'post' object"""`
			`attr = post["attributes"]`
			`attr["id"] = text.parse_int(post["id"])`
[patreon] skip posts without view permission (#1316) 2021-02-14 16:03:11 +01:00
[patreon] extract user defined 'tags' (#1539, closes #1540) 2021-05-18 00:31:01 +02:00			`if attr.get("current_user_can_view", True):`

			`relationships = post["relationships"]`
[patreon] skip posts without view permission (#1316) 2021-02-14 16:03:11 +01:00			`attr["images"] = self._files(post, included, "images")`
			`attr["attachments"] = self._files(post, included, "attachments")`
			`attr["date"] = text.parse_datetime(`
			`attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")`
[patreon] extract user defined 'tags' (#1539, closes #1540) 2021-05-18 00:31:01 +02:00
			`tags = relationships.get("user_defined_tags")`
			`attr["tags"] = [`
			`tag["id"].replace("user_defined;", "")`
			`for tag in tags["data"]`
			`if tag["type"] == "post_tag"`
			`] if tags else []`

			`user = relationships["user"]`
[patreon] skip posts without view permission (#1316) 2021-02-14 16:03:11 +01:00			`attr["creator"] = (`
			`self._user(user["links"]["related"]) or`
			`included["user"][user["data"]["id"]])`

[patreon] support multi image posts and post URLs (#383) 2019-08-17 23:20:26 +02:00			`return attr`

			`@staticmethod`
			`def _transform(included):`
			`"""Transform 'included' into an easier to handle format"""`
			`result = collections.defaultdict(dict)`
			`for inc in included:`
			`result[inc["type"]][inc["id"]] = inc["attributes"]`
			`return result`

			`@staticmethod`
			`def _files(post, included, key):`
			`"""Build a list of files"""`
			`files = post["relationships"].get(key)`
			`if files and files.get("data"):`
			`return [`
			`included[file["type"]][file["id"]]`
			`for file in files["data"]`
			`]`
update extractor test results 2021-11-01 02:58:53 +01:00			`return []`
[patreon] support multi image posts and post URLs (#383) 2019-08-17 23:20:26 +02:00
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`@memcache(keyarg=1)`
			`def _user(self, url):`
[patreon] support multi image posts and post URLs (#383) 2019-08-17 23:20:26 +02:00			`"""Fetch user information"""`
[patreon] make retrieving user info nonfatal (#508) … and fall back to the included data if an error occurs. 2019-12-12 00:31:35 +01:00			`response = self.request(url, fatal=False)`
			`if response.status_code >= 400:`
			`return None`
			`user = response.json()["data"]`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`attr = user["attributes"]`
			`attr["id"] = user["id"]`
[patreon] small fixes and adjustments (#226) - fix datetime parsing - rename 'user' to 'creator' - convert 'id' to integer - improve tests 2019-05-17 13:13:11 +02:00			`attr["date"] = text.parse_datetime(`
			`attr["created"], "%Y-%m-%dT%H:%M:%S.%f%z")`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`return attr`

[patreon] support multi image posts and post URLs (#383) 2019-08-17 23:20:26 +02:00			`def _filename(self, url):`
[patreon] improve hash extraction (#693, #713) Instead of accessing a specific part of a download URL, potentially causing an exception if it doesn't exist, we're now searching through all parts for a potential MD5 hash without ever raising an exception. 2020-04-28 21:40:22 +02:00			`"""Fetch filename from an URL's Content-Disposition header"""`
[patreon] support multi image posts and post URLs (#383) 2019-08-17 23:20:26 +02:00			`response = self.request(url, method="HEAD", fatal=False)`
			`cd = response.headers.get("Content-Disposition")`
			`return text.extract(cd, 'filename="', '"')[0]`

[patreon] improve hash extraction (#693, #713) Instead of accessing a specific part of a download URL, potentially causing an exception if it doesn't exist, we're now searching through all parts for a potential MD5 hash without ever raising an exception. 2020-04-28 21:40:22 +02:00			`@staticmethod`
			`def _filehash(url):`
			`"""Extract MD5 hash from a download URL"""`
			`parts = url.partition("?")[0].split("/")`
			`parts.reverse()`

			`for part in parts:`
			`if len(part) == 32:`
			`return part`
			`return ""`

[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`@staticmethod`
			`def _build_url(endpoint, query):`
			`return (`
			`"https://www.patreon.com/api/" + endpoint +`

[patreon] include image info in API results (#383) 2019-08-18 23:21:57 +02:00			`"?include=user,images,attachments,user_defined_tags,campaign,poll."`
			`"choices,poll.current_user_responses.user,poll.current_user_respon"`
			`"ses.choice,poll.current_user_responses.poll,access_rules.tier.nul"`
			`"l"`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00
			`"&fields[post]=change_visibility_at,comment_count,content,current_"`
			`"user_can_delete,current_user_can_view,current_user_has_liked,embe"`
			`"d,image,is_paid,like_count,min_cents_pledged_to_view,post_file,pu"`
			`"blished_at,patron_count,patreon_url,post_type,pledge_url,thumbnai"`
			`"l_url,teaser_text,title,upgrade_url,url,was_posted_by_campaign_ow"`
			`"ner"`
			`"&fields[user]=image_url,full_name,url"`
			`"&fields[campaign]=avatar_photo_url,earnings_visibility,is_nsfw,is"`
			`"_monthly,name,url"`
			`"&fields[access_rule]=access_rule_type,amount_cents" + query +`

			`"&json-api-use-default-includes=false"`
			`"&json-api-version=1.0"`
			`)`

[patreon] implement 'files' option (#1935) 2021-10-17 04:14:58 +02:00			`def _build_file_generators(self, filetypes):`
			`if filetypes is None:`
			`return (self._images, self._attachments,`
			`self._postfile, self._content)`
			`genmap = {`
			`"images" : self._images,`
			`"attachments": self._attachments,`
			`"postfile" : self._postfile,`
			`"content" : self._content,`
			`}`
			`if isinstance(filetypes, str):`
			`filetypes = filetypes.split(",")`
			`return [genmap[ft] for ft in filetypes]`

[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00
			`class PatreonCreatorExtractor(PatreonExtractor):`
			`"""Extractor for a creator's works"""`
			`subcategory = "creator"`
			`pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"`
remove '&' from URL patterns '/?&#' -> '/?#' and '?&#' -> '?#' According to https://www.ietf.org/rfc/rfc3986.txt, URLs are "organized hierarchically" by using "the slash ("/"), question mark ("?"), and number sign ("#") characters to delimit components" 2020-10-22 23:12:59 +02:00			`r"/(?!(?:home\|join\|posts\|login\|signup)(?:$\|[/?#]))"`
			`r"([^/?#]+)(?:/posts)?/?(?:\?([^#]+))?")`
[patreon] raise proper exception if creator/post doesn't exist 2019-12-12 01:14:32 +01:00			`test = (`
			`("https://www.patreon.com/koveliana", {`
			`"range": "1-25",`
			`"count": ">= 25",`
			`"keyword": {`
			`"attachments" : list,`
			`"comment_count": int,`
			`"content" : str,`
			`"creator" : dict,`
			`"date" : "type:datetime",`
			`"id" : int,`
			`"images" : list,`
			`"like_count" : int,`
			`"post_type" : str,`
			`"published_at" : str,`
			`"title" : str,`
			`},`
			`}),`
[patreon] respect filters and sort order in query params (#711) 2020-04-28 23:56:48 +02:00			`("https://www.patreon.com/koveliana/posts?filters[month]=2020-3", {`
			`"count": 1,`
			`"keyword": {"date": "dt:2020-03-30 21:21:44"},`
			`}),`
[patreon] raise proper exception if creator/post doesn't exist 2019-12-12 01:14:32 +01:00			`("https://www.patreon.com/kovelianot", {`
			`"exception": exception.NotFoundError,`
			`}),`
[patreon] recognize URLs with creator IDs (#711) e.g. https://www.patreon.com/user/posts?u=… 2020-04-26 22:19:10 +02:00			`("https://www.patreon.com/user?u=2931440"),`
			`("https://www.patreon.com/user/posts/?u=2931440"),`
[patreon] raise proper exception if creator/post doesn't exist 2019-12-12 01:14:32 +01:00			`)`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00
			`def __init__(self, match):`
			`PatreonExtractor.__init__(self, match)`
[patreon] respect filters and sort order in query params (#711) 2020-04-28 23:56:48 +02:00			`self.creator, self.query = match.groups()`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00
			`def posts(self):`
[patreon] respect filters and sort order in query params (#711) 2020-04-28 23:56:48 +02:00			`query = text.parse_query(self.query)`

			`creator_id = query.get("u")`
			`if creator_id:`
[patreon] reduce redirects when fetching campaign ID 2021-02-27 16:37:41 +01:00			`url = "{}/user/posts?u={}".format(self.root, creator_id)`
[patreon] recognize URLs with creator IDs (#711) e.g. https://www.patreon.com/user/posts?u=… 2020-04-26 22:19:10 +02:00			`else:`
[patreon] reduce redirects when fetching campaign ID 2021-02-27 16:37:41 +01:00			`url = "{}/{}/posts".format(self.root, self.creator)`
[patreon] recognize URLs with creator IDs (#711) e.g. https://www.patreon.com/user/posts?u=… 2020-04-26 22:19:10 +02:00
[patreon] raise proper exception if creator/post doesn't exist 2019-12-12 01:14:32 +01:00			`page = self.request(url, notfound="creator").text`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`campaign_id = text.extract(page, "/campaign/", "/")[0]`
[patreon] raise proper exception if creator/post doesn't exist 2019-12-12 01:14:32 +01:00			`if not campaign_id:`
			`raise exception.NotFoundError("creator")`

[patreon] respect filters and sort order in query params (#711) 2020-04-28 23:56:48 +02:00			`filters = "".join(`
			`"&filter[{}={}".format(key[8:], text.escape(value))`
			`for key, value in query.items()`
			`if key.startswith("filters[")`
			`)`

[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`url = self._build_url("posts", (`
[patreon] respect filters and sort order in query params (#711) 2020-04-28 23:56:48 +02:00			`"&sort=" + query.get("sort", "-published_at") +`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`"&filter[is_draft]=false"`
			`"&filter[contains_exclusive_posts]=true"`
[patreon] respect filters and sort order in query params (#711) 2020-04-28 23:56:48 +02:00			`"&filter[campaign_id]=" + campaign_id + filters`
[patreon] add extractors (#226) 2019-05-16 23:56:48 +02:00			`))`
			`return self._pagination(url)`


			`class PatreonUserExtractor(PatreonExtractor):`
			`"""Extractor for media from creators supported by you"""`
			`subcategory = "user"`
			`pattern = r"(?:https?://)?(?:www\.)?patreon\.com/home$"`
			`test = ("https://www.patreon.com/home",)`

			`def posts(self):`
			`url = self._build_url("stream", (`
			`"&page[cursor]=null"`
			`"&filter[is_following]=true"`
			`))`
			`return self._pagination(url)`
[patreon] support multi image posts and post URLs (#383) 2019-08-17 23:20:26 +02:00

			`class PatreonPostExtractor(PatreonExtractor):`
			`"""Extractor for media from a single post"""`
			`subcategory = "post"`
remove '&' from URL patterns '/?&#' -> '/?#' and '?&#' -> '?#' According to https://www.ietf.org/rfc/rfc3986.txt, URLs are "organized hierarchically" by using "the slash ("/"), question mark ("?"), and number sign ("#") characters to delimit components" 2020-10-22 23:12:59 +02:00			`pattern = r"(?:https?://)?(?:www\.)?patreon\.com/posts/([^/?#]+)"`
[patreon] raise proper exception if creator/post doesn't exist 2019-12-12 01:14:32 +01:00			`test = (`
[patreon] filter duplicate files per post (#590) 2020-02-05 22:47:20 +01:00			`# postfile + attachments`
[patreon] raise proper exception if creator/post doesn't exist 2019-12-12 01:14:32 +01:00			`("https://www.patreon.com/posts/precious-metal-23563293", {`
			`"count": 4,`
			`}),`
[patreon] filter duplicate files per post (#590) 2020-02-05 22:47:20 +01:00			`# postfile + content`
[patreon] better filenames for 'content' images (#1954) 2021-10-16 22:58:28 +02:00			`("https://www.patreon.com/posts/56127163", {`
			`"count": 3,`
			`"keyword": {"filename": r"re:^(?!1).+$"},`
[patreon] fix regex pattern for posts The previous one would match the first number in the URL slug as post ID, which would fail for posts with numbers in their title. 2019-12-14 22:06:08 +01:00			`}),`
[patreon] extract user defined 'tags' (#1539, closes #1540) 2021-05-18 00:31:01 +02:00			`# tags (#1539)`
			`("https://www.patreon.com/posts/free-post-12497641", {`
			`"keyword": {"tags": ["AWMedia"]},`
			`}),`
[patreon] raise proper exception if creator/post doesn't exist 2019-12-12 01:14:32 +01:00			`("https://www.patreon.com/posts/not-found-123", {`
			`"exception": exception.NotFoundError,`
			`}),`
			`)`
[patreon] support multi image posts and post URLs (#383) 2019-08-17 23:20:26 +02:00
			`def __init__(self, match):`
			`PatreonExtractor.__init__(self, match)`
[patreon] fix regex pattern for posts The previous one would match the first number in the URL slug as post ID, which would fail for posts with numbers in their title. 2019-12-14 22:06:08 +01:00			`self.slug = match.group(1)`
[patreon] support multi image posts and post URLs (#383) 2019-08-17 23:20:26 +02:00
			`def posts(self):`
[patreon] fix regex pattern for posts The previous one would match the first number in the URL slug as post ID, which would fail for posts with numbers in their title. 2019-12-14 22:06:08 +01:00			`url = "{}/posts/{}".format(self.root, self.slug)`
[patreon] raise proper exception if creator/post doesn't exist 2019-12-12 01:14:32 +01:00			`page = self.request(url, notfound="post").text`
[patreon] support multi image posts and post URLs (#383) 2019-08-17 23:20:26 +02:00			`data = text.extract(page, "window.patreon.bootstrap,", "\n});")[0]`
			`post = json.loads(data + "}")["post"]`

			`included = self._transform(post["included"])`
			`return (self._process(post["data"], included),)`