mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-26 12:42:29 +01:00
[realbooru] fix extraction
get file URLs from HTML pages
This commit is contained in:
parent
75666cf9c3
commit
ac97aca99c
@ -30,7 +30,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
|
|||||||
self.api_root = self.root
|
self.api_root = self.root
|
||||||
|
|
||||||
if self.category == "realbooru":
|
if self.category == "realbooru":
|
||||||
self._file_url = self._file_url_realbooru
|
self.items = self._items_realbooru
|
||||||
self._tags = self._tags_realbooru
|
self._tags = self._tags_realbooru
|
||||||
|
|
||||||
def _api_request(self, params):
|
def _api_request(self, params):
|
||||||
@ -129,6 +129,28 @@ class GelbooruV02Extractor(booru.BooruExtractor):
|
|||||||
self.root, md5[0:2], md5[2:4], md5, url.rpartition(".")[2])
|
self.root, md5[0:2], md5[2:4], md5, url.rpartition(".")[2])
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
def _items_realbooru(self):
|
||||||
|
from .common import Message
|
||||||
|
data = self.metadata()
|
||||||
|
|
||||||
|
for post in self.posts():
|
||||||
|
try:
|
||||||
|
html = self._html(post)
|
||||||
|
url = post["file_url"] = text.rextract(
|
||||||
|
html, 'href="', '"', html.index(">Original<"))[0]
|
||||||
|
except Exception:
|
||||||
|
self.log.debug("Unable to fetch download URL for post %s "
|
||||||
|
"(md5: %s)", post.get("id"), post.get("md5"))
|
||||||
|
continue
|
||||||
|
|
||||||
|
text.nameext_from_url(url, post)
|
||||||
|
post.update(data)
|
||||||
|
self._prepare(post)
|
||||||
|
self._tags(post, html)
|
||||||
|
|
||||||
|
yield Message.Directory, post
|
||||||
|
yield Message.Url, url, post
|
||||||
|
|
||||||
def _tags_realbooru(self, post, page):
|
def _tags_realbooru(self, post, page):
|
||||||
tag_container = text.extr(page, 'id="tagLink"', '</div>')
|
tag_container = text.extr(page, 'id="tagLink"', '</div>')
|
||||||
tags = collections.defaultdict(list)
|
tags = collections.defaultdict(list)
|
||||||
@ -404,7 +426,7 @@ class GelbooruV02PostExtractor(GelbooruV02Extractor):
|
|||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
("https://realbooru.com/index.php?page=post&s=view&id=668483", {
|
("https://realbooru.com/index.php?page=post&s=view&id=668483", {
|
||||||
"pattern": r"https://realbooru\.com/images/dc/b5"
|
"pattern": r"https://realbooru\.com//?images/dc/b5"
|
||||||
r"/dcb5c0ce9ec0bf74a6930608985f4719\.jpeg",
|
r"/dcb5c0ce9ec0bf74a6930608985f4719\.jpeg",
|
||||||
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
|
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
|
||||||
"options": (("tags", True),),
|
"options": (("tags", True),),
|
||||||
|
Loading…
Reference in New Issue
Block a user