1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 10:42:34 +01:00

[hitomi] fallback to /reader/ page if main page returns 404

Some galleries return a 404: Not Found error when trying to access
them through the main gallery URL, but their content is still
available on the respective /reader/ page.
This commit is contained in:
Mike Fährmann 2019-10-11 18:25:54 +02:00
parent 8af59a4bba
commit 15af2f8464
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -34,15 +34,36 @@ class HitomiGalleryExtractor(GalleryExtractor):
"url": "c2a84185f467450b8b9b72fbe40c0649029ce007",
"count": 210,
}),
("https://hitomi.la/galleries/1045954.html", {
# fallback for galleries only available through /reader/ URLs
"url": "055c898a36389719799d6bce76889cc4ea4421fc",
"count": 1413,
}),
("https://hitomi.la/reader/867789.html"),
)
def __init__(self, match):
self.gallery_id = match.group(1)
self.fallback = False
url = "{}/galleries/{}.html".format(self.root, self.gallery_id)
GalleryExtractor.__init__(self, match, url)
def request(self, url, **kwargs):
response = GalleryExtractor.request(self, url, fatal=False, **kwargs)
if response.status_code == 404:
self.fallback = True
url = url.replace("/galleries/", "/reader/")
response = GalleryExtractor.request(self, url, **kwargs)
return response
def metadata(self, page):
if self.fallback:
return {
"gallery_id": text.parse_int(self.gallery_id),
"title": text.unescape(text.extract(
page, "<title>", "<")[0].rpartition(" | ")[0]),
}
extr = text.extract_from(page, page.index('<h1><a href="/reader/'))
data = {
"gallery_id": text.parse_int(self.gallery_id),
@ -76,6 +97,8 @@ class HitomiGalleryExtractor(GalleryExtractor):
url = "{}/reader/{}.html".format(self.root, self.gallery_id)
page = self.request(url).text
begin, end = ">//g.hitomi.la/galleries/", "</div>"
elif self.fallback:
begin, end = ">//g.hitomi.la/galleries/", "</div>"
else:
begin, end = "'//tn.hitomi.la/smalltn/", ".jpg',"