gallery-dl/gallery_dl/extractor/gelbooru.py

# -*- coding: utf-8 -*-

# Copyright 2014-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extract images from https://gelbooru.com/"""

from .common import SharedConfigExtractor, Message
from .. import text, util, exception
import xml.etree.ElementTree as ET


class GelbooruExtractor(SharedConfigExtractor):
    """Base class for gelbooru extractors"""
    basecategory = "booru"
    category = "gelbooru"
    filename_fmt = "{category}_{id}_{md5}.{extension}"
    archive_fmt = "{id}"
    api_url = "https://gelbooru.com/index.php?page=dapi&s=post&q=index"

    def __init__(self):
        SharedConfigExtractor.__init__(self)
        self.start_post = 0
        self.use_api = self.config("api", True)
        if self.use_api:
            self.get_post_data = self.get_post_data_api

    def items(self):
        yield Message.Version, 1
        yield Message.Directory, self.get_metadata()

        for post in util.advance(self.get_posts(), self.start_post):
            if isinstance(post, str):
                post = self.get_post_data(post)
            for key in ("id", "width", "height", "score", "change"):
                post[key] = util.safe_int(post[key])
            url = post["file_url"]
            yield Message.Url, url, text.nameext_from_url(url, post)

    def skip(self, num):
        self.start_post += num
        return num

    def get_metadata(self):
        """Return general metadata"""
        return {}

    def get_posts(self):
        """Return an iterable containing all relevant post objects"""

    def get_post_data(self, post_id):
        """Extract metadata of a single post"""
        page = self.request("https://gelbooru.com/index.php?page=post&s=view"
                            "&id=" + post_id).text
        data = text.extract_all(page, (
            (None        , '<meta name="keywords"', ''),
            ("tags"      , ' imageboard, ', '"'),
            ("id"        , '<li>Id: ', '<'),
            ("created_at", '<li>Posted: ', '<'),
            ("width"     , '<li>Size: ', 'x'),
            ("height"    , '', '<'),
            ("source"    , '<li>Source: <a href="', '"'),
            ("rating"    , '<li>Rating: ', '<'),
            (None        , '<li>Score: ', ''),
            ("score"     , '>', '<'),
            ("file_url"  , '<li><a href="http', '"'),
            ("change"    , ' id="lupdated" value="', '"'),
        ))[0]
        data["file_url"] = "http" + data["file_url"].replace("m//", "m/", 1)
        data["md5"] = data["file_url"].rpartition("/")[2].partition(".")[0]
        data["rating"] = (data["rating"] or "?")[0].lower()
        data["tags"] = " ".join(
            [tag.replace(" ", "_") for tag in data["tags"].split(", ")])
        return data

    def get_post_data_api(self, post_id):
        """Request metadata of a single post from Gelbooru's API"""
        return ET.fromstring(
            self.request(self.api_url + "&id=" + post_id).text)[0].attrib


class GelbooruTagExtractor(GelbooruExtractor):
    """Extractor for images from gelbooru.com based on search-tags"""
    subcategory = "tag"
    directory_fmt = ["{category}", "{tags}"]
    pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
               r"\?page=post&s=list&tags=([^&]+)"]
    test = [
        ("https://gelbooru.com/index.php?page=post&s=list&tags=bonocho", {
            "count": 5,
        }),
        ("https://gelbooru.com/index.php?page=post&s=list&tags=bonocho", {
            "options": (("api", False),),
            "count": 5,
        }),
    ]

    def __init__(self, match):
        GelbooruExtractor.__init__(self)
        self.tags = text.unquote(match.group(1).replace("+", " "))
        self.per_page = 100 if self.use_api else 42
        self.start_page = 0

    def skip(self, num):
        pages, posts = divmod(num, self.per_page)
        self.start_page += pages
        self.start_post += posts
        return num

    def get_metadata(self):
        return {"tags": self.tags}

    def get_posts(self):
        if self.use_api:
            return self._get_posts_api()
        return self._get_posts_manual()

    def _get_posts_api(self):
        params = {
            # 'pid' is page-id; first page has index 0
            "tags": self.tags, "limit": self.per_page, "pid": self.start_page}
        while True:
            root = ET.fromstring(
                self.request(self.api_url, params=params).text)
            for item in root:
                yield item.attrib
            if len(root) < self.per_page:
                return
            params["pid"] += 1

    def _get_posts_manual(self):
        url = "https://gelbooru.com/index.php?page=post&s=list"
        # 'pid' is post-id; values for 'pid' must be multiples of 42
        params = {"tags": self.tags, "pid": self.start_page * self.per_page}

        while True:
            page = self.request(url, params=params).text
            ids = list(text.extract_iter(page, '<a id="p', '"'))
            yield from ids
            if len(ids) < self.per_page:
                return
            params["pid"] += self.per_page


class GelbooruPoolExtractor(GelbooruExtractor):
    """Extractor for image-pools from gelbooru.com"""
    subcategory = "pool"
    directory_fmt = ["{category}", "pool", "{pool}"]
    pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
               r"\?page=pool&s=show&id=(\d+)"]
    test = [("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
        "count": 6,
    })]

    def __init__(self, match):
        GelbooruExtractor.__init__(self)
        self.pool_id = match.group(1)
        self.posts = None

    def get_metadata(self):
        page = self.request("https://gelbooru.com/index.php?page=pool&s=show"
                            "&id=" + self.pool_id).text
        name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
        self.posts = list(text.extract_iter(page, 'id="p', '"', pos))

        if not name:
            raise exception.NotFoundError("pool")

        return {
            "pool": util.safe_int(self.pool_id),
            "pool_name": text.unescape(name),
            "count": len(self.posts),
        }

    def get_posts(self):
        return self.posts


class GelbooruPostExtractor(GelbooruExtractor):
    """Extractor for single images from gelbooru.com"""
    subcategory = "post"
    pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
               r"\?page=post&s=view&id=(\d+)"]
    test = [("https://gelbooru.com/index.php?page=post&s=view&id=313638", {
        "content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
        "count": 1,
    })]

    def __init__(self, match):
        GelbooruExtractor.__init__(self)
        self.post_id = match.group(1)

    def get_posts(self):
        return (self.post_id,)
[gelbooru] update to new extractor interface 2015-04-11 00:17:43 +02:00			`# -- coding: utf-8 --`
initial commit 2014-10-12 21:56:44 +02:00
set 'archive_fmt' values These are going to be used to create an unique id for each image. 2018-01-30 22:49:16 +01:00			`# Copyright 2014-2018 Mike Fährmann`
[gelbooru] update to new extractor interface 2015-04-11 00:17:43 +02:00			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License version 2 as`
			`# published by the Free Software Foundation.`

update some extractors to use https 2017-04-20 13:20:41 +02:00			`"""Extract images from https://gelbooru.com/"""`
[gelbooru] update to new extractor interface 2015-04-11 00:17:43 +02:00
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`from .common import SharedConfigExtractor, Message`
[gelbooru] re-enable API use (closes #56) Gelbooru's API allows access to all images and is not restricted to the first 20000. This also adds an option to select between API use and manual information extraction in case their API gets disabled again. 2017-12-21 21:42:40 +01:00			`from .. import text, util, exception`
			`import xml.etree.ElementTree as ET`
[gelbooru] update to new extractor interface 2015-04-11 00:17:43 +02:00
code adjustments according to pep8 nr2 2017-02-01 00:53:19 +01:00
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`class GelbooruExtractor(SharedConfigExtractor):`
[gelbooru] update to new format 2015-11-21 02:40:30 +01:00			`"""Base class for gelbooru extractors"""`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`basecategory = "booru"`
[gelbooru] update to new format 2015-11-21 02:40:30 +01:00			`category = "gelbooru"`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`filename_fmt = "{category}_{id}_{md5}.{extension}"`
set 'archive_fmt' values These are going to be used to create an unique id for each image. 2018-01-30 22:49:16 +01:00			`archive_fmt = "{id}"`
[gelbooru] re-enable API use (closes #56) Gelbooru's API allows access to all images and is not restricted to the first 20000. This also adds an option to select between API use and manual information extraction in case their API gets disabled again. 2017-12-21 21:42:40 +01:00			`api_url = "https://gelbooru.com/index.php?page=dapi&s=post&q=index"`
[gelbooru] update to new format 2015-11-21 02:40:30 +01:00
[gelbooru] various improvements - better metadata for pools - map ratings to s/q/e like other boorus do - skip() support 2017-12-03 01:41:30 +01:00			`def __init__(self):`
			`SharedConfigExtractor.__init__(self)`
			`self.start_post = 0`
[gelbooru] re-enable API use (closes #56) Gelbooru's API allows access to all images and is not restricted to the first 20000. This also adds an option to select between API use and manual information extraction in case their API gets disabled again. 2017-12-21 21:42:40 +01:00			`self.use_api = self.config("api", True)`
			`if self.use_api:`
			`self.get_post_data = self.get_post_data_api`
[gelbooru] various improvements - better metadata for pools - map ratings to s/q/e like other boorus do - skip() support 2017-12-03 01:41:30 +01:00
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`def items(self):`
			`yield Message.Version, 1`
			`yield Message.Directory, self.get_metadata()`
initial commit 2014-10-12 21:56:44 +02:00
[gelbooru] re-enable API use (closes #56) Gelbooru's API allows access to all images and is not restricted to the first 20000. This also adds an option to select between API use and manual information extraction in case their API gets disabled again. 2017-12-21 21:42:40 +01:00			`for post in util.advance(self.get_posts(), self.start_post):`
			`if isinstance(post, str):`
			`post = self.get_post_data(post)`
			`for key in ("id", "width", "height", "score", "change"):`
			`post[key] = util.safe_int(post[key])`
			`url = post["file_url"]`
			`yield Message.Url, url, text.nameext_from_url(url, post)`
code adjustments according to pep8 nr2 2017-02-01 00:53:19 +01:00
[gelbooru] various improvements - better metadata for pools - map ratings to s/q/e like other boorus do - skip() support 2017-12-03 01:41:30 +01:00			`def skip(self, num):`
			`self.start_post += num`
			`return num`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00
			`def get_metadata(self):`
			`"""Return general metadata"""`
			`return {}`

[gelbooru] various improvements - better metadata for pools - map ratings to s/q/e like other boorus do - skip() support 2017-12-03 01:41:30 +01:00			`def get_posts(self):`
[gelbooru] re-enable API use (closes #56) Gelbooru's API allows access to all images and is not restricted to the first 20000. This also adds an option to select between API use and manual information extraction in case their API gets disabled again. 2017-12-21 21:42:40 +01:00			`"""Return an iterable containing all relevant post objects"""`
[gelbooru] various improvements - better metadata for pools - map ratings to s/q/e like other boorus do - skip() support 2017-12-03 01:41:30 +01:00
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`def get_post_data(self, post_id):`
			`"""Extract metadata of a single post"""`
			`page = self.request("https://gelbooru.com/index.php?page=post&s=view"`
			`"&id=" + post_id).text`
			`data = text.extract_all(page, (`
			`(None , '<meta name="keywords"', ''),`
			`("tags" , ' imageboard, ', '"'),`
			`("id" , '<li>Id: ', '<'),`
			`("created_at", '<li>Posted: ', '<'),`
			`("width" , '<li>Size: ', 'x'),`
			`("height" , '', '<'),`
			`("source" , '<li>Source: <a href="', '"'),`
			`("rating" , '<li>Rating: ', '<'),`
			`(None , '<li>Score: ', ''),`
			`("score" , '>', '<'),`
			`("file_url" , '<li><a href="http', '"'),`
[gelbooru] re-enable API use (closes #56) Gelbooru's API allows access to all images and is not restricted to the first 20000. This also adds an option to select between API use and manual information extraction in case their API gets disabled again. 2017-12-21 21:42:40 +01:00			`("change" , ' id="lupdated" value="', '"'),`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`))[0]`
[gelbooru] re-enable API use (closes #56) Gelbooru's API allows access to all images and is not restricted to the first 20000. This also adds an option to select between API use and manual information extraction in case their API gets disabled again. 2017-12-21 21:42:40 +01:00			`data["file_url"] = "http" + data["file_url"].replace("m//", "m/", 1)`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`data["md5"] = data["file_url"].rpartition("/")[2].partition(".")[0]`
various smaller changes/additions 2017-12-05 23:29:11 +01:00			`data["rating"] = (data["rating"] or "?")[0].lower()`
[gelbooru] re-enable API use (closes #56) Gelbooru's API allows access to all images and is not restricted to the first 20000. This also adds an option to select between API use and manual information extraction in case their API gets disabled again. 2017-12-21 21:42:40 +01:00			`data["tags"] = " ".join(`
			`[tag.replace(" ", "_") for tag in data["tags"].split(", ")])`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`return data`

[gelbooru] re-enable API use (closes #56) Gelbooru's API allows access to all images and is not restricted to the first 20000. This also adds an option to select between API use and manual information extraction in case their API gets disabled again. 2017-12-21 21:42:40 +01:00			`def get_post_data_api(self, post_id):`
			`"""Request metadata of a single post from Gelbooru's API"""`
			`return ET.fromstring(`
			`self.request(self.api_url + "&id=" + post_id).text)[0].attrib`

[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00
			`class GelbooruTagExtractor(GelbooruExtractor):`
consistent extractor naming scheme + docstrings 2016-09-12 10:20:57 +02:00			`"""Extractor for images from gelbooru.com based on search-tags"""`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`subcategory = "tag"`
			`directory_fmt = ["{category}", "{tags}"]`
code adjustments according to pep8 nr2 2017-02-01 00:53:19 +01:00			`pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"`
			`r"\?page=post&s=list&tags=([^&]+)"]`
[gelbooru] re-enable API use (closes #56) Gelbooru's API allows access to all images and is not restricted to the first 20000. This also adds an option to select between API use and manual information extraction in case their API gets disabled again. 2017-12-21 21:42:40 +01:00			`test = [`
			`("https://gelbooru.com/index.php?page=post&s=list&tags=bonocho", {`
			`"count": 5,`
			`}),`
			`("https://gelbooru.com/index.php?page=post&s=list&tags=bonocho", {`
			`"options": (("api", False),),`
			`"count": 5,`
			`}),`
			`]`
[gelbooru] update to new format 2015-11-21 02:40:30 +01:00
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`def __init__(self, match):`
			`GelbooruExtractor.__init__(self)`
			`self.tags = text.unquote(match.group(1).replace("+", " "))`
[gelbooru] re-enable API use (closes #56) Gelbooru's API allows access to all images and is not restricted to the first 20000. This also adds an option to select between API use and manual information extraction in case their API gets disabled again. 2017-12-21 21:42:40 +01:00			`self.per_page = 100 if self.use_api else 42`
[gelbooru] various improvements - better metadata for pools - map ratings to s/q/e like other boorus do - skip() support 2017-12-03 01:41:30 +01:00			`self.start_page = 0`

			`def skip(self, num):`
			`pages, posts = divmod(num, self.per_page)`
			`self.start_page += pages`
			`self.start_post += posts`
			`return num`
code adjustments according to pep8 nr2 2017-02-01 00:53:19 +01:00
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`def get_metadata(self):`
			`return {"tags": self.tags}`
code adjustments according to pep8 nr2 2017-02-01 00:53:19 +01:00
[gelbooru] various improvements - better metadata for pools - map ratings to s/q/e like other boorus do - skip() support 2017-12-03 01:41:30 +01:00			`def get_posts(self):`
[gelbooru] re-enable API use (closes #56) Gelbooru's API allows access to all images and is not restricted to the first 20000. This also adds an option to select between API use and manual information extraction in case their API gets disabled again. 2017-12-21 21:42:40 +01:00			`if self.use_api:`
			`return self._get_posts_api()`
			`return self._get_posts_manual()`

			`def _get_posts_api(self):`
			`params = {`
			`# 'pid' is page-id; first page has index 0`
			`"tags": self.tags, "limit": self.per_page, "pid": self.start_page}`
			`while True:`
			`root = ET.fromstring(`
			`self.request(self.api_url, params=params).text)`
			`for item in root:`
			`yield item.attrib`
			`if len(root) < self.per_page:`
			`return`
			`params["pid"] += 1`

			`def _get_posts_manual(self):`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`url = "https://gelbooru.com/index.php?page=post&s=list"`
[gelbooru] re-enable API use (closes #56) Gelbooru's API allows access to all images and is not restricted to the first 20000. This also adds an option to select between API use and manual information extraction in case their API gets disabled again. 2017-12-21 21:42:40 +01:00			`# 'pid' is post-id; values for 'pid' must be multiples of 42`
[gelbooru] various improvements - better metadata for pools - map ratings to s/q/e like other boorus do - skip() support 2017-12-03 01:41:30 +01:00			`params = {"tags": self.tags, "pid": self.start_page * self.per_page}`
[gelbooru] update to new format 2015-11-21 02:40:30 +01:00
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`while True:`
			`page = self.request(url, params=params).text`
			`ids = list(text.extract_iter(page, '<a id="p', '"'))`
			`yield from ids`
[gelbooru] various improvements - better metadata for pools - map ratings to s/q/e like other boorus do - skip() support 2017-12-03 01:41:30 +01:00			`if len(ids) < self.per_page:`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`return`
[gelbooru] various improvements - better metadata for pools - map ratings to s/q/e like other boorus do - skip() support 2017-12-03 01:41:30 +01:00			`params["pid"] += self.per_page`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00

			`class GelbooruPoolExtractor(GelbooruExtractor):`
			`"""Extractor for image-pools from gelbooru.com"""`
			`subcategory = "pool"`
			`directory_fmt = ["{category}", "pool", "{pool}"]`
			`pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"`
			`r"\?page=pool&s=show&id=(\d+)"]`
			`test = [("https://gelbooru.com/index.php?page=pool&s=show&id=761", {`
			`"count": 6,`
			`})]`

			`def __init__(self, match):`
			`GelbooruExtractor.__init__(self)`
			`self.pool_id = match.group(1)`
[gelbooru] various improvements - better metadata for pools - map ratings to s/q/e like other boorus do - skip() support 2017-12-03 01:41:30 +01:00			`self.posts = None`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00
			`def get_metadata(self):`
			`page = self.request("https://gelbooru.com/index.php?page=pool&s=show"`
			`"&id=" + self.pool_id).text`
[gelbooru] various improvements - better metadata for pools - map ratings to s/q/e like other boorus do - skip() support 2017-12-03 01:41:30 +01:00			`name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")`
			`self.posts = list(text.extract_iter(page, 'id="p', '"', pos))`

[gelbooru] re-enable API use (closes #56) Gelbooru's API allows access to all images and is not restricted to the first 20000. This also adds an option to select between API use and manual information extraction in case their API gets disabled again. 2017-12-21 21:42:40 +01:00			`if not name:`
			`raise exception.NotFoundError("pool")`

[gelbooru] various improvements - better metadata for pools - map ratings to s/q/e like other boorus do - skip() support 2017-12-03 01:41:30 +01:00			`return {`
			`"pool": util.safe_int(self.pool_id),`
			`"pool_name": text.unescape(name),`
			`"count": len(self.posts),`
			`}`

			`def get_posts(self):`
			`return self.posts`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00

			`class GelbooruPostExtractor(GelbooruExtractor):`
consistent extractor naming scheme + docstrings 2016-09-12 10:20:57 +02:00			`"""Extractor for single images from gelbooru.com"""`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`subcategory = "post"`
code adjustments according to pep8 nr2 2017-02-01 00:53:19 +01:00			`pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"`
			`r"\?page=post&s=view&id=(\d+)"]`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`test = [("https://gelbooru.com/index.php?page=post&s=view&id=313638", {`
update booru testdata 2015-12-22 03:10:52 +01:00			`"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`"count": 1,`
more extractor test-cases 2015-12-14 03:00:58 +01:00			`})]`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00
			`def __init__(self, match):`
			`GelbooruExtractor.__init__(self)`
			`self.post_id = match.group(1)`

[gelbooru] various improvements - better metadata for pools - map ratings to s/q/e like other boorus do - skip() support 2017-12-03 01:41:30 +01:00			`def get_posts(self):`
[gelbooru] use manual extraction ... to compensate for their disabled API. (https://gelbooru.com/index.php?page=forum&s=view&id=3875) This also adds an extractor for image-pools. 2017-11-29 20:48:17 +01:00			`return (self.post_id,)`