1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-25 04:02:32 +01:00

fix imagechest extractor

This commit is contained in:
Achim 2024-11-14 16:50:06 +01:00
parent a3276e3b5d
commit b2fa149598
No known key found for this signature in database
GPG Key ID: 1B2C9B73479BF0CD

View File

@ -11,6 +11,7 @@
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import text, exception from .. import text, exception
import json
BASE_PATTERN = r"(?:https?://)?(?:www\.)?imgchest\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.)?imgchest\.com"
@ -36,32 +37,60 @@ class ImagechestGalleryExtractor(GalleryExtractor):
self.images = self._images_api self.images = self._images_api
def metadata(self, page): def metadata(self, page):
if "Sorry, but the page you requested could not be found." in page: if "Not Found" in page:
raise exception.NotFoundError("gallery") raise exception.NotFoundError("gallery")
return { page_data = self._retrieve_page_data(page)
"gallery_id": self.gallery_id,
"title": text.unescape(text.extr( metadata = {
page, 'property="og:title" content="', '"').strip()) "gallery_id": self.gallery_id
} }
for attribute in [
"id",
"slug",
"status",
"title",
"nsfw",
"score",
"comments",
"upvotes",
"downvotes",
"favorites",
"views",
"created"
]:
try:
metadata[attribute] = page_data["props"]["post"][attribute]
except Exception:
pass
try:
metadata["tags"] = ",".join(page_data["props"]["post"]["tags"])
except Exception:
pass
return metadata
def images(self, page): def images(self, page):
if ' load-all">' in page: try:
url = "{}/p/{}/loadAll".format(self.root, self.gallery_id)
headers = {
"X-Requested-With": "XMLHttpRequest",
"Origin" : self.root,
"Referer" : self.gallery_url,
}
csrf_token = text.extr(page, 'name="csrf-token" content="', '"')
data = {"_token": csrf_token}
page += self.request(
url, method="POST", headers=headers, data=data).text
return [ return [
(url, None) (file["link"], None)
for url in text.extract_iter(page, 'data-url="', '"') for file in self._retrieve_page_data(page)["props"]["post"]["files"]
] ]
except Exception:
return []
def _retrieve_page_data(self, page):
return json.loads(
text.unescape(
text.extr(
page,
begin='data-page="',
end='"',
default='{}')
)
)
def _metadata_api(self, page): def _metadata_api(self, page):
post = self.api.post(self.gallery_id) post = self.api.post(self.gallery_id)