1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-21 18:22:30 +01:00

fix imagechest extractor

This commit is contained in:
Achim 2024-11-14 16:50:06 +01:00
parent a3276e3b5d
commit b2fa149598
No known key found for this signature in database
GPG Key ID: 1B2C9B73479BF0CD

View File

@ -11,6 +11,7 @@
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import text, exception from .. import text, exception
import json
BASE_PATTERN = r"(?:https?://)?(?:www\.)?imgchest\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.)?imgchest\.com"
@ -36,32 +37,60 @@ class ImagechestGalleryExtractor(GalleryExtractor):
self.images = self._images_api self.images = self._images_api
def metadata(self, page): def metadata(self, page):
if "Sorry, but the page you requested could not be found." in page: if "Not Found" in page:
raise exception.NotFoundError("gallery") raise exception.NotFoundError("gallery")
return { page_data = self._retrieve_page_data(page)
"gallery_id": self.gallery_id,
"title": text.unescape(text.extr( metadata = {
page, 'property="og:title" content="', '"').strip()) "gallery_id": self.gallery_id
} }
def images(self, page): for attribute in [
if ' load-all">' in page: "id",
url = "{}/p/{}/loadAll".format(self.root, self.gallery_id) "slug",
headers = { "status",
"X-Requested-With": "XMLHttpRequest", "title",
"Origin" : self.root, "nsfw",
"Referer" : self.gallery_url, "score",
} "comments",
csrf_token = text.extr(page, 'name="csrf-token" content="', '"') "upvotes",
data = {"_token": csrf_token} "downvotes",
page += self.request( "favorites",
url, method="POST", headers=headers, data=data).text "views",
"created"
]:
try:
metadata[attribute] = page_data["props"]["post"][attribute]
except Exception:
pass
return [ try:
(url, None) metadata["tags"] = ",".join(page_data["props"]["post"]["tags"])
for url in text.extract_iter(page, 'data-url="', '"') except Exception:
] pass
return metadata
def images(self, page):
try:
return [
(file["link"], None)
for file in self._retrieve_page_data(page)["props"]["post"]["files"]
]
except Exception:
return []
def _retrieve_page_data(self, page):
return json.loads(
text.unescape(
text.extr(
page,
begin='data-page="',
end='"',
default='{}')
)
)
def _metadata_api(self, page): def _metadata_api(self, page):
post = self.api.post(self.gallery_id) post = self.api.post(self.gallery_id)