mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-26 04:32:51 +01:00
[hitomi] fallback to /reader/ page if main page returns 404
Some galleries return a 404: Not Found error when trying to access them through the main gallery URL, but their content is still available on the respective /reader/ page.
This commit is contained in:
parent
8af59a4bba
commit
15af2f8464
@ -34,15 +34,36 @@ class HitomiGalleryExtractor(GalleryExtractor):
|
|||||||
"url": "c2a84185f467450b8b9b72fbe40c0649029ce007",
|
"url": "c2a84185f467450b8b9b72fbe40c0649029ce007",
|
||||||
"count": 210,
|
"count": 210,
|
||||||
}),
|
}),
|
||||||
|
("https://hitomi.la/galleries/1045954.html", {
|
||||||
|
# fallback for galleries only available through /reader/ URLs
|
||||||
|
"url": "055c898a36389719799d6bce76889cc4ea4421fc",
|
||||||
|
"count": 1413,
|
||||||
|
}),
|
||||||
("https://hitomi.la/reader/867789.html"),
|
("https://hitomi.la/reader/867789.html"),
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.gallery_id = match.group(1)
|
self.gallery_id = match.group(1)
|
||||||
|
self.fallback = False
|
||||||
url = "{}/galleries/{}.html".format(self.root, self.gallery_id)
|
url = "{}/galleries/{}.html".format(self.root, self.gallery_id)
|
||||||
GalleryExtractor.__init__(self, match, url)
|
GalleryExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
|
def request(self, url, **kwargs):
|
||||||
|
response = GalleryExtractor.request(self, url, fatal=False, **kwargs)
|
||||||
|
if response.status_code == 404:
|
||||||
|
self.fallback = True
|
||||||
|
url = url.replace("/galleries/", "/reader/")
|
||||||
|
response = GalleryExtractor.request(self, url, **kwargs)
|
||||||
|
return response
|
||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
|
if self.fallback:
|
||||||
|
return {
|
||||||
|
"gallery_id": text.parse_int(self.gallery_id),
|
||||||
|
"title": text.unescape(text.extract(
|
||||||
|
page, "<title>", "<")[0].rpartition(" | ")[0]),
|
||||||
|
}
|
||||||
|
|
||||||
extr = text.extract_from(page, page.index('<h1><a href="/reader/'))
|
extr = text.extract_from(page, page.index('<h1><a href="/reader/'))
|
||||||
data = {
|
data = {
|
||||||
"gallery_id": text.parse_int(self.gallery_id),
|
"gallery_id": text.parse_int(self.gallery_id),
|
||||||
@ -76,6 +97,8 @@ class HitomiGalleryExtractor(GalleryExtractor):
|
|||||||
url = "{}/reader/{}.html".format(self.root, self.gallery_id)
|
url = "{}/reader/{}.html".format(self.root, self.gallery_id)
|
||||||
page = self.request(url).text
|
page = self.request(url).text
|
||||||
begin, end = ">//g.hitomi.la/galleries/", "</div>"
|
begin, end = ">//g.hitomi.la/galleries/", "</div>"
|
||||||
|
elif self.fallback:
|
||||||
|
begin, end = ">//g.hitomi.la/galleries/", "</div>"
|
||||||
else:
|
else:
|
||||||
begin, end = "'//tn.hitomi.la/smalltn/", ".jpg',"
|
begin, end = "'//tn.hitomi.la/smalltn/", ".jpg',"
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user