mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 10:42:34 +01:00
[hitomi] fallback to /reader/ page if main page returns 404
Some galleries return a 404: Not Found error when trying to access them through the main gallery URL, but their content is still available on the respective /reader/ page.
This commit is contained in:
parent
8af59a4bba
commit
15af2f8464
@ -34,15 +34,36 @@ class HitomiGalleryExtractor(GalleryExtractor):
|
||||
"url": "c2a84185f467450b8b9b72fbe40c0649029ce007",
|
||||
"count": 210,
|
||||
}),
|
||||
("https://hitomi.la/galleries/1045954.html", {
|
||||
# fallback for galleries only available through /reader/ URLs
|
||||
"url": "055c898a36389719799d6bce76889cc4ea4421fc",
|
||||
"count": 1413,
|
||||
}),
|
||||
("https://hitomi.la/reader/867789.html"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
self.gallery_id = match.group(1)
|
||||
self.fallback = False
|
||||
url = "{}/galleries/{}.html".format(self.root, self.gallery_id)
|
||||
GalleryExtractor.__init__(self, match, url)
|
||||
|
||||
def request(self, url, **kwargs):
|
||||
response = GalleryExtractor.request(self, url, fatal=False, **kwargs)
|
||||
if response.status_code == 404:
|
||||
self.fallback = True
|
||||
url = url.replace("/galleries/", "/reader/")
|
||||
response = GalleryExtractor.request(self, url, **kwargs)
|
||||
return response
|
||||
|
||||
def metadata(self, page):
|
||||
if self.fallback:
|
||||
return {
|
||||
"gallery_id": text.parse_int(self.gallery_id),
|
||||
"title": text.unescape(text.extract(
|
||||
page, "<title>", "<")[0].rpartition(" | ")[0]),
|
||||
}
|
||||
|
||||
extr = text.extract_from(page, page.index('<h1><a href="/reader/'))
|
||||
data = {
|
||||
"gallery_id": text.parse_int(self.gallery_id),
|
||||
@ -76,6 +97,8 @@ class HitomiGalleryExtractor(GalleryExtractor):
|
||||
url = "{}/reader/{}.html".format(self.root, self.gallery_id)
|
||||
page = self.request(url).text
|
||||
begin, end = ">//g.hitomi.la/galleries/", "</div>"
|
||||
elif self.fallback:
|
||||
begin, end = ">//g.hitomi.la/galleries/", "</div>"
|
||||
else:
|
||||
begin, end = "'//tn.hitomi.la/smalltn/", ".jpg',"
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user