mirror of
https://github.com/mikf/gallery-dl.git
synced 2025-02-01 03:51:42 +01:00
[booru] add generalized extractors for *booru sites
similar to cc15fbe7
This commit is contained in:
parent
5f23441e12
commit
a3a863fc13
@ -92,11 +92,8 @@ modules = [
|
||||
"pururin",
|
||||
"reactor",
|
||||
"readcomiconline",
|
||||
"realbooru",
|
||||
"reddit",
|
||||
"redgifs",
|
||||
"rule34",
|
||||
"safebooru",
|
||||
"sankaku",
|
||||
"sankakucomplex",
|
||||
"seiga",
|
||||
@ -122,6 +119,7 @@ modules = [
|
||||
"xhamster",
|
||||
"xvideos",
|
||||
"yuki",
|
||||
"booru",
|
||||
"moebooru",
|
||||
"foolfuuka",
|
||||
"foolslide",
|
||||
|
@ -1,247 +1,248 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015-2020 Mike Fährmann
|
||||
# Copyright 2020 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Base classes for extractors for danbooru and co"""
|
||||
"""Extractors for *booru sites"""
|
||||
|
||||
from .common import Extractor, Message, generate_extractors
|
||||
from .. import text, util, exception
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
from xml.etree import ElementTree
|
||||
import collections
|
||||
import datetime
|
||||
import operator
|
||||
import re
|
||||
|
||||
|
||||
class BooruExtractor(Extractor):
|
||||
"""Base class for all booru extractors"""
|
||||
"""Base class for *booru extractors"""
|
||||
basecategory = "booru"
|
||||
filename_fmt = "{category}_{id}_{md5}.{extension}"
|
||||
api_url = ""
|
||||
post_url = ""
|
||||
per_page = 50
|
||||
page_start = 1
|
||||
page_limit = None
|
||||
sort = False
|
||||
page_start = 0
|
||||
per_page = 100
|
||||
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.params = {}
|
||||
self.extags = self.post_url and self.config("tags", False)
|
||||
def items(self):
|
||||
self.login()
|
||||
extended_tags = self.config("tags", False)
|
||||
data = self.metadata()
|
||||
for post in self.posts():
|
||||
try:
|
||||
url = self._prepare_post(post, extended_tags)
|
||||
except KeyError:
|
||||
continue
|
||||
post.update(data)
|
||||
text.nameext_from_url(url, post)
|
||||
yield Message.Directory, post
|
||||
yield Message.Url, url, post
|
||||
|
||||
def skip(self, num):
|
||||
pages = num // self.per_page
|
||||
if self.page_limit and pages + self.page_start > self.page_limit:
|
||||
pages = self.page_limit - self.page_start
|
||||
self.page_start += pages
|
||||
return pages * self.per_page
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
data = self.get_metadata()
|
||||
def login(self):
|
||||
"""Login and set necessary cookies"""
|
||||
|
||||
self.reset_page()
|
||||
while True:
|
||||
images = self.parse_response(
|
||||
self.request(self.api_url, params=self.params))
|
||||
def metadata(self):
|
||||
"""Return a dict with general metadata"""
|
||||
return ()
|
||||
|
||||
for image in images:
|
||||
try:
|
||||
url = self.get_file_url(image)
|
||||
except KeyError:
|
||||
continue
|
||||
if url.startswith("/"):
|
||||
url = text.urljoin(self.api_url, url)
|
||||
image.update(data)
|
||||
text.nameext_from_url(url, image)
|
||||
if self.extags:
|
||||
self.extended_tags(image)
|
||||
yield Message.Directory, image
|
||||
yield Message.Url, url, image
|
||||
def posts(self):
|
||||
"""Return an iterable with post objects"""
|
||||
return ()
|
||||
|
||||
if len(images) < self.per_page:
|
||||
return
|
||||
self.update_page(image)
|
||||
def _prepare_post(self, post, extended_tags=False):
|
||||
url = post["file_url"]
|
||||
if url[0] == "/":
|
||||
url = self.root + url
|
||||
if extended_tags:
|
||||
self._fetch_extended_tags(post)
|
||||
post["date"] = text.parse_datetime(
|
||||
post["created_at"], "%a %b %d %H:%M:%S %z %Y")
|
||||
return url
|
||||
|
||||
def reset_page(self):
|
||||
"""Initialize params to point to the first page"""
|
||||
self.params["page"] = self.page_start
|
||||
|
||||
def update_page(self, data):
|
||||
"""Update params to point to the next page"""
|
||||
|
||||
def parse_response(self, response):
|
||||
"""Parse JSON API response"""
|
||||
images = response.json()
|
||||
if self.sort:
|
||||
images.sort(key=operator.itemgetter("score", "id"),
|
||||
reverse=True)
|
||||
return images
|
||||
|
||||
def get_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {}
|
||||
|
||||
@staticmethod
|
||||
def get_file_url(image):
|
||||
return image["file_url"]
|
||||
|
||||
def extended_tags(self, image, page=None):
|
||||
"""Retrieve extended tag information"""
|
||||
def _fetch_extended_tags(self, post, page=None):
|
||||
if not page:
|
||||
url = self.post_url.format(image["id"])
|
||||
url = "{}/index.php?page=post&s=view&id={}".format(
|
||||
self.root, post["id"])
|
||||
page = self.request(url).text
|
||||
tags = collections.defaultdict(list)
|
||||
tags_html = text.extract(page, '<ul id="tag-', '</ul>')[0]
|
||||
pattern = re.compile(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"']+)", re.S)
|
||||
for tag_type, tag_name in pattern.findall(tags_html or ""):
|
||||
tags[tag_type].append(text.unquote(tag_name))
|
||||
for key, value in tags.items():
|
||||
image["tags_" + key] = " ".join(value)
|
||||
html = text.extract(page, '<ul id="tag-', '</ul>')[0]
|
||||
if html:
|
||||
tags = collections.defaultdict(list)
|
||||
pattern = re.compile(
|
||||
r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)", re.S)
|
||||
for tag_type, tag_name in pattern.findall(html):
|
||||
tags[tag_type].append(text.unquote(tag_name))
|
||||
for key, value in tags.items():
|
||||
post["tags_" + key] = " ".join(value)
|
||||
|
||||
def _api_request(self, params):
|
||||
url = self.root + "/index.php?page=dapi&s=post&q=index"
|
||||
return ElementTree.fromstring(self.request(url, params=params).text)
|
||||
|
||||
def _pagination(self, params):
|
||||
params["pid"] = self.page_start
|
||||
params["limit"] = self.per_page
|
||||
|
||||
while True:
|
||||
root = self._api_request(params)
|
||||
for post in root:
|
||||
yield post.attrib
|
||||
|
||||
if len(root) < self.per_page:
|
||||
return
|
||||
params["pid"] += 1
|
||||
|
||||
|
||||
class XmlParserMixin():
|
||||
"""Mixin for XML based API responses"""
|
||||
def parse_response(self, response):
|
||||
root = ElementTree.fromstring(response.text)
|
||||
return [post.attrib for post in root]
|
||||
class BooruPostExtractor(BooruExtractor):
|
||||
subcategory = "post"
|
||||
archive_fmt = "{id}"
|
||||
pattern_fmt = r"/index\.php\?page=post&s=view&id=(\d+)"
|
||||
|
||||
def __init__(self, match):
|
||||
BooruExtractor.__init__(self, match)
|
||||
self.post_id = match.group(1)
|
||||
|
||||
def posts(self):
|
||||
return self._pagination({"id": self.post_id})
|
||||
|
||||
|
||||
class MoebooruPageMixin():
|
||||
"""Pagination for Moebooru and Danbooru v1"""
|
||||
def update_page(self, data):
|
||||
if self.page_limit:
|
||||
self.params["page"] = None
|
||||
self.params["before_id"] = data["id"]
|
||||
else:
|
||||
self.params["page"] += 1
|
||||
|
||||
|
||||
class GelbooruPageMixin():
|
||||
"""Pagination for Gelbooru-like sites"""
|
||||
page_start = 0
|
||||
|
||||
def reset_page(self):
|
||||
self.params["pid"] = self.page_start
|
||||
|
||||
def update_page(self, data):
|
||||
self.params["pid"] += 1
|
||||
|
||||
|
||||
class TagMixin():
|
||||
"""Extraction of images based on search-tags"""
|
||||
class BooruTagExtractor(BooruExtractor):
|
||||
subcategory = "tag"
|
||||
directory_fmt = ("{category}", "{search_tags}")
|
||||
archive_fmt = "t_{search_tags}_{id}"
|
||||
pattern_fmt = r"/index\.php\?page=post&s=list&tags=([^&#]+)"
|
||||
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.tags = text.unquote(match.group("tags").replace("+", " "))
|
||||
self.params["tags"] = self.tags
|
||||
self.params["limit"] = self.per_page
|
||||
BooruExtractor.__init__(self, match)
|
||||
self.tags = text.unquote(match.group(1).replace("+", " "))
|
||||
|
||||
def get_metadata(self):
|
||||
def metadata(self):
|
||||
return {"search_tags": self.tags}
|
||||
|
||||
def posts(self):
|
||||
return self._pagination({"tags" : self.tags})
|
||||
|
||||
class PoolMixin():
|
||||
"""Extraction of image-pools"""
|
||||
|
||||
class BooruPoolExtractor(BooruExtractor):
|
||||
subcategory = "pool"
|
||||
directory_fmt = ("{category}", "pool", "{pool}")
|
||||
archive_fmt = "p_{pool}_{id}"
|
||||
pattern_fmt = r"/index\.php\?page=pool&s=show&id=(\d+)"
|
||||
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.pool = match.group("pool")
|
||||
self.params["tags"] = "pool:" + self.pool
|
||||
self.params["limit"] = self.per_page
|
||||
BooruExtractor.__init__(self, match)
|
||||
self.pool_id = match.group(1)
|
||||
self.post_ids = ()
|
||||
|
||||
def get_metadata(self):
|
||||
return {"pool": text.parse_int(self.pool)}
|
||||
def skip(self, num):
|
||||
self.page_start += num
|
||||
return num
|
||||
|
||||
def metadata(self):
|
||||
url = "{}/index.php?page=pool&s=show&id={}".format(
|
||||
self.root, self.pool_id)
|
||||
page = self.request(url).text
|
||||
|
||||
class GelbooruPoolMixin(PoolMixin):
|
||||
"""Image-pool extraction for Gelbooru-like sites"""
|
||||
per_page = 1
|
||||
|
||||
def get_metadata(self):
|
||||
page = self.request(self.pool_url.format(self.pool)).text
|
||||
name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
|
||||
if not name:
|
||||
name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
|
||||
name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
|
||||
if not name:
|
||||
raise exception.NotFoundError("pool")
|
||||
self.posts = list(text.extract_iter(
|
||||
page, 'class="thumb" id="p', '"', pos))
|
||||
self.post_ids = text.extract_iter(
|
||||
page, 'class="thumb" id="p', '"', pos)
|
||||
|
||||
return {
|
||||
"pool": text.parse_int(self.pool),
|
||||
"pool": text.parse_int(self.pool_id),
|
||||
"pool_name": text.unescape(name),
|
||||
"count": len(self.posts),
|
||||
}
|
||||
|
||||
def reset_page(self):
|
||||
self.index = self.page_start
|
||||
self.update_page(None)
|
||||
|
||||
def update_page(self, data):
|
||||
try:
|
||||
post = self.posts[self.index]
|
||||
self.index += 1
|
||||
except IndexError:
|
||||
post = "0"
|
||||
self.params["tags"] = "id:" + post
|
||||
def posts(self):
|
||||
params = {}
|
||||
for params["id"] in util.advance(self.post_ids, self.page_start):
|
||||
for post in self._api_request(params):
|
||||
yield post.attrib
|
||||
|
||||
|
||||
class PostMixin():
|
||||
"""Extraction of a single image-post"""
|
||||
subcategory = "post"
|
||||
archive_fmt = "{id}"
|
||||
EXTRACTORS = {
|
||||
"rule34": {
|
||||
"root": "https://rule34.xxx",
|
||||
"test-tag": (
|
||||
("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
|
||||
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
|
||||
"pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
|
||||
"count": 1,
|
||||
}),
|
||||
),
|
||||
"test-pool": (
|
||||
("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
|
||||
"count": 3,
|
||||
}),
|
||||
),
|
||||
"test-post": (
|
||||
("https://rule34.xxx/index.php?page=post&s=view&id=1995545", {
|
||||
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_artist": "danraku",
|
||||
"tags_character": "kashima_(kantai_collection)",
|
||||
"tags_copyright": "kantai_collection",
|
||||
"tags_general": str,
|
||||
"tags_metadata": str,
|
||||
},
|
||||
}),
|
||||
),
|
||||
},
|
||||
"safebooru": {
|
||||
"root": "https://safebooru.org",
|
||||
"test-tag": (
|
||||
("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
|
||||
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
|
||||
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
|
||||
}),
|
||||
),
|
||||
"test-pool": (
|
||||
("https://safebooru.org/index.php?page=pool&s=show&id=11", {
|
||||
"count": 5,
|
||||
}),
|
||||
),
|
||||
"test-post": (
|
||||
("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
|
||||
"url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
|
||||
"content": "93b293b27dabd198afafabbaf87c49863ac82f27",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_artist": "kawanakajima",
|
||||
"tags_character": "heath_ledger ronald_mcdonald the_joker",
|
||||
"tags_copyright": "dc_comics mcdonald's the_dark_knight",
|
||||
"tags_general": str,
|
||||
},
|
||||
}),
|
||||
),
|
||||
},
|
||||
"realbooru": {
|
||||
"root": "https://realbooru.com",
|
||||
"test-tag": (
|
||||
("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
|
||||
"count": ">= 64",
|
||||
}),
|
||||
),
|
||||
"test-pool": (
|
||||
("https://realbooru.com/index.php?page=pool&s=show&id=1", {
|
||||
"count": 3,
|
||||
}),
|
||||
),
|
||||
"test-post": (
|
||||
("https://realbooru.com/index.php?page=post&s=view&id=668483", {
|
||||
"url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
|
||||
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
|
||||
}),
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.post = match.group("post")
|
||||
self.params["tags"] = "id:" + self.post
|
||||
|
||||
|
||||
class MoebooruPopularMixin():
|
||||
"""Extraction and metadata handling for Moebooru and Danbooru v1"""
|
||||
subcategory = "popular"
|
||||
directory_fmt = ("{category}", "popular", "{scale}", "{date}")
|
||||
archive_fmt = "P_{scale[0]}_{date}_{id}"
|
||||
page_start = None
|
||||
sort = True
|
||||
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.params.update(text.parse_query(match.group("query")))
|
||||
self.scale = match.group("scale")
|
||||
|
||||
def get_metadata(self, fmt="%Y-%m-%d"):
|
||||
date = self.get_date() or datetime.date.today().isoformat()
|
||||
scale = self.get_scale() or "day"
|
||||
|
||||
if scale == "week":
|
||||
date = datetime.date.fromisoformat(date)
|
||||
date = (date - datetime.timedelta(days=date.weekday())).isoformat()
|
||||
elif scale == "month":
|
||||
date = date[:-3]
|
||||
|
||||
return {"date": date, "scale": scale}
|
||||
|
||||
def get_date(self):
|
||||
if "year" in self.params:
|
||||
return "{:>04}-{:>02}-{:>02}".format(
|
||||
self.params["year"],
|
||||
self.params.get("month", "01"),
|
||||
self.params.get("day", "01"))
|
||||
return None
|
||||
|
||||
def get_scale(self):
|
||||
if self.scale and self.scale.startswith("by_"):
|
||||
return self.scale[3:]
|
||||
return self.scale
|
||||
generate_extractors(EXTRACTORS, globals(), (
|
||||
BooruTagExtractor,
|
||||
BooruPoolExtractor,
|
||||
BooruPostExtractor,
|
||||
))
|
||||
|
@ -6,98 +6,27 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extract images from https://gelbooru.com/"""
|
||||
"""Extractors for https://gelbooru.com/"""
|
||||
|
||||
from . import booru
|
||||
from .common import Message
|
||||
from .. import text
|
||||
from .. import text, exception
|
||||
|
||||
|
||||
class GelbooruExtractor(booru.XmlParserMixin,
|
||||
booru.GelbooruPageMixin,
|
||||
booru.BooruExtractor):
|
||||
class GelbooruBase():
|
||||
"""Base class for gelbooru extractors"""
|
||||
category = "gelbooru"
|
||||
api_url = "https://gelbooru.com/index.php"
|
||||
post_url = "https://gelbooru.com/index.php?page=post&s=view&id={}"
|
||||
pool_url = "https://gelbooru.com/index.php?page=pool&s=show&id={}"
|
||||
root = "https://gelbooru.com"
|
||||
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
|
||||
self.use_api = self.config("api", True)
|
||||
if self.use_api:
|
||||
self.params.update({"page": "dapi", "s": "post", "q": "index"})
|
||||
else:
|
||||
self.items = self.items_noapi
|
||||
self.session.cookies["fringeBenefits"] = "yup"
|
||||
self.per_page = 42
|
||||
|
||||
@staticmethod
|
||||
def get_file_url(image):
|
||||
url = image["file_url"]
|
||||
def _prepare_post(self, post, extended_tags=False):
|
||||
url = booru.BooruExtractor._prepare_post(self, post, extended_tags)
|
||||
if url.startswith("https://mp4.gelbooru.com/"):
|
||||
ihash = image["md5"]
|
||||
md5 = post["md5"]
|
||||
return "https://img2.gelbooru.com/images/{}/{}/{}.webm".format(
|
||||
ihash[0:2], ihash[2:4], ihash)
|
||||
md5[0:2], md5[2:4], md5)
|
||||
return url
|
||||
|
||||
def items_noapi(self):
|
||||
yield Message.Version, 1
|
||||
data = self.get_metadata()
|
||||
|
||||
for post in self.get_posts():
|
||||
post = self.get_post_data(post)
|
||||
url = post["file_url"]
|
||||
post.update(data)
|
||||
text.nameext_from_url(url, post)
|
||||
yield Message.Directory, post
|
||||
yield Message.Url, url, post
|
||||
|
||||
def get_posts(self):
|
||||
"""Return an iterable containing all relevant post objects"""
|
||||
url = "https://gelbooru.com/index.php?page=post&s=list"
|
||||
params = {
|
||||
"tags": self.params["tags"],
|
||||
"pid" : self.page_start * self.per_page
|
||||
}
|
||||
|
||||
while True:
|
||||
page = self.request(url, params=params).text
|
||||
ids = list(text.extract_iter(page, '<span id="s', '"'))
|
||||
yield from ids
|
||||
if len(ids) < self.per_page:
|
||||
return
|
||||
params["pid"] += self.per_page
|
||||
|
||||
def get_post_data(self, post_id):
|
||||
"""Extract metadata of a single post"""
|
||||
page = self.request(self.post_url.format(post_id)).text
|
||||
data = text.extract_all(page, (
|
||||
(None , '<meta name="keywords"', ''),
|
||||
("tags" , ' imageboard- ', '"'),
|
||||
("id" , '<li>Id: ', '<'),
|
||||
("created_at", '<li>Posted: ', '<'),
|
||||
("width" , '<li>Size: ', 'x'),
|
||||
("height" , '', '<'),
|
||||
("source" , '<li>Source: <a href="', '"'),
|
||||
("rating" , '<li>Rating: ', '<'),
|
||||
(None , '<li>Score: ', ''),
|
||||
("score" , '>', '<'),
|
||||
("file_url" , '<li><a href="http', '"'),
|
||||
("change" , ' id="lupdated" value="', '"'),
|
||||
))[0]
|
||||
data["file_url"] = "http" + data["file_url"].replace("m//", "m/", 1)
|
||||
data["md5"] = data["file_url"].rpartition("/")[2].partition(".")[0]
|
||||
data["rating"] = (data["rating"] or "?")[0].lower()
|
||||
data["tags"] = " ".join(
|
||||
[tag.replace(" ", "_") for tag in data["tags"].split(", ")])
|
||||
if self.extags:
|
||||
self.extended_tags(data, page)
|
||||
return data
|
||||
|
||||
|
||||
class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor):
|
||||
class GelbooruTagExtractor(GelbooruBase, booru.BooruTagExtractor):
|
||||
"""Extractor for images from gelbooru.com based on search-tags"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
|
||||
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
|
||||
@ -112,7 +41,7 @@ class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor):
|
||||
)
|
||||
|
||||
|
||||
class GelbooruPoolExtractor(booru.PoolMixin, GelbooruExtractor):
|
||||
class GelbooruPoolExtractor(GelbooruBase, booru.BooruPoolExtractor):
|
||||
"""Extractor for image-pools from gelbooru.com"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
|
||||
r"\?page=pool&s=show&id=(?P<pool>\d+)")
|
||||
@ -126,8 +55,23 @@ class GelbooruPoolExtractor(booru.PoolMixin, GelbooruExtractor):
|
||||
}),
|
||||
)
|
||||
|
||||
def metadata(self):
|
||||
url = "{}/index.php?page=pool&s=show&id={}".format(
|
||||
self.root, self.pool_id)
|
||||
page = self.request(url).text
|
||||
|
||||
class GelbooruPostExtractor(booru.PostMixin, GelbooruExtractor):
|
||||
name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
|
||||
if not name:
|
||||
raise exception.NotFoundError("pool")
|
||||
self.post_ids = text.extract_iter(page, 'class="" id="p', '"', pos)
|
||||
|
||||
return {
|
||||
"pool": text.parse_int(self.pool_id),
|
||||
"pool_name": text.unescape(name),
|
||||
}
|
||||
|
||||
|
||||
class GelbooruPostExtractor(GelbooruBase, booru.BooruPostExtractor):
|
||||
"""Extractor for single images from gelbooru.com"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
|
||||
r"\?page=post&s=view&id=(?P<post>\d+)")
|
||||
@ -135,6 +79,3 @@ class GelbooruPostExtractor(booru.PostMixin, GelbooruExtractor):
|
||||
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
|
||||
"count": 1,
|
||||
})
|
||||
|
||||
def get_posts(self):
|
||||
return (self.post,)
|
||||
|
@ -1,59 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://realbooru.com/"""
|
||||
|
||||
from . import booru
|
||||
|
||||
|
||||
class RealbooruExtractor(booru.XmlParserMixin,
|
||||
booru.GelbooruPageMixin,
|
||||
booru.BooruExtractor):
|
||||
"""Base class for realbooru extractors"""
|
||||
category = "realbooru"
|
||||
api_url = "https://realbooru.com/index.php"
|
||||
post_url = "https://realbooru.com/index.php?page=post&s=view&id={}"
|
||||
pool_url = "https://realbooru.com/index.php?page=pool&s=show&id={}"
|
||||
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.params.update({"page": "dapi", "s": "post", "q": "index"})
|
||||
|
||||
|
||||
class RealbooruTagExtractor(booru.TagMixin, RealbooruExtractor):
|
||||
"""Extractor for images from realbooru.com based on search-tags"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
|
||||
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
|
||||
test = ("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
|
||||
"count": ">= 64",
|
||||
})
|
||||
|
||||
|
||||
class RealbooruPoolExtractor(booru.GelbooruPoolMixin, RealbooruExtractor):
|
||||
"""Extractor for image-pools from realbooru.com"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
|
||||
r"\?page=pool&s=show&id=(?P<pool>\d+)")
|
||||
test = ("https://realbooru.com/index.php?page=pool&s=show&id=1", {
|
||||
"count": 3,
|
||||
})
|
||||
|
||||
|
||||
class RealbooruPostExtractor(booru.PostMixin, RealbooruExtractor):
|
||||
"""Extractor for single images from realbooru.com"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?realbooru\.com/(?:index\.php)?"
|
||||
r"\?page=post&s=view&id=(?P<post>\d+)")
|
||||
test = ("https://realbooru.com/index.php?page=post&s=view&id=668483", {
|
||||
"url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
|
||||
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
|
||||
# "options": (("tags", True),),
|
||||
# "keyword": {
|
||||
# "tags_general" : str,
|
||||
# "tags_metadata": str,
|
||||
# "tags_model" : "jennifer_lawrence",
|
||||
# },
|
||||
})
|
@ -1,63 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016-2019 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extract images from https://rule34.xxx/"""
|
||||
|
||||
from . import booru
|
||||
|
||||
|
||||
class Rule34Extractor(booru.XmlParserMixin,
|
||||
booru.GelbooruPageMixin,
|
||||
booru.BooruExtractor):
|
||||
"""Base class for rule34 extractors"""
|
||||
category = "rule34"
|
||||
api_url = "https://rule34.xxx/index.php"
|
||||
post_url = "https://rule34.xxx/index.php?page=post&s=view&id={}"
|
||||
pool_url = "https://rule34.xxx/index.php?page=pool&s=show&id={}"
|
||||
page_limit = 4000
|
||||
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.params.update({"page": "dapi", "s": "post", "q": "index"})
|
||||
|
||||
|
||||
class Rule34TagExtractor(booru.TagMixin, Rule34Extractor):
|
||||
"""Extractor for images from rule34.xxx based on search-tags"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
|
||||
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
|
||||
test = ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
|
||||
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
|
||||
"pattern": r"https?://([^.]+\.)?rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
|
||||
"count": 1,
|
||||
})
|
||||
|
||||
|
||||
class Rule34PoolExtractor(booru.GelbooruPoolMixin, Rule34Extractor):
|
||||
"""Extractor for image-pools from rule34.xxx"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
|
||||
r"\?page=pool&s=show&id=(?P<pool>\d+)")
|
||||
test = ("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
|
||||
"count": 3,
|
||||
})
|
||||
|
||||
|
||||
class Rule34PostExtractor(booru.PostMixin, Rule34Extractor):
|
||||
"""Extractor for single images from rule34.xxx"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
|
||||
r"\?page=post&s=view&id=(?P<post>\d+)")
|
||||
test = ("https://rule34.xxx/index.php?page=post&s=view&id=1995545", {
|
||||
"content": "97e4bbf86c3860be18de384d02d544251afe1d45",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_artist": "danraku",
|
||||
"tags_character": "kashima_(kantai_collection)",
|
||||
"tags_copyright": "kantai_collection",
|
||||
"tags_general": str,
|
||||
"tags_metadata": str,
|
||||
},
|
||||
})
|
@ -1,61 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015-2019 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extract images from https://safebooru.org/"""
|
||||
|
||||
from . import booru
|
||||
|
||||
|
||||
class SafebooruExtractor(booru.XmlParserMixin,
|
||||
booru.GelbooruPageMixin,
|
||||
booru.BooruExtractor):
|
||||
"""Base class for safebooru extractors"""
|
||||
category = "safebooru"
|
||||
api_url = "https://safebooru.org/index.php"
|
||||
post_url = "https://safebooru.org/index.php?page=post&s=view&id={}"
|
||||
pool_url = "https://safebooru.org/index.php?page=pool&s=show&id={}"
|
||||
|
||||
def __init__(self, match):
|
||||
super().__init__(match)
|
||||
self.params.update({"page": "dapi", "s": "post", "q": "index"})
|
||||
|
||||
|
||||
class SafebooruTagExtractor(booru.TagMixin, SafebooruExtractor):
|
||||
"""Extractor for images from safebooru.org based on search-tags"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
|
||||
r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
|
||||
test = ("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
|
||||
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
|
||||
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
|
||||
})
|
||||
|
||||
|
||||
class SafebooruPoolExtractor(booru.GelbooruPoolMixin, SafebooruExtractor):
|
||||
"""Extractor for image-pools from safebooru.org"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
|
||||
r"\?page=pool&s=show&id=(?P<pool>\d+)")
|
||||
test = ("https://safebooru.org/index.php?page=pool&s=show&id=11", {
|
||||
"count": 5,
|
||||
})
|
||||
|
||||
|
||||
class SafebooruPostExtractor(booru.PostMixin, SafebooruExtractor):
|
||||
"""Extractor for single images from safebooru.org"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
|
||||
r"\?page=post&s=view&id=(?P<post>\d+)")
|
||||
test = ("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
|
||||
"url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
|
||||
"content": "93b293b27dabd198afafabbaf87c49863ac82f27",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_artist": "kawanakajima",
|
||||
"tags_character": "heath_ledger ronald_mcdonald the_joker",
|
||||
"tags_copyright": "dc_comics mcdonald's the_dark_knight",
|
||||
"tags_general": str,
|
||||
},
|
||||
})
|
Loading…
x
Reference in New Issue
Block a user