From ab11b1c896a8f81959813e3a8340740497b97037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 18 May 2020 19:11:26 +0200 Subject: [PATCH] [imagechest] simplify code (#750) --- docs/supportedsites.rst | 1 + gallery_dl/extractor/imagechest.py | 34 +++++++----------------------- 2 files changed, 9 insertions(+), 26 deletions(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 5de1d580..0530cac4 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -53,6 +53,7 @@ Hitomi.la https://hitomi.la/ Galleries, Tag Searches Hypnohub https://hypnohub.net/ Pools, Popular Images, Posts, Tag Searches Idol Complex https://idol.sankakucomplex.com/ Pools, Posts, Tag Searches Optional ImageBam http://www.imagebam.com/ Galleries, individual Images +ImageChest https://imgchest.com/ Galleries ImageFap https://imagefap.com/ Galleries, individual Images, User Profiles ImgBB https://imgbb.com/ Albums, individual Images, User Profiles Optional imgbox https://imgbox.com/ Galleries, individual Images diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py index b2e8068d..a1ba0c30 100644 --- a/gallery_dl/extractor/imagechest.py +++ b/gallery_dl/extractor/imagechest.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2020 Leonid "Bepis" Pavel +# Copyright 2020 Leonid "Bepis" Pavel # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,15 +10,12 @@ from .common import GalleryExtractor from .. import text, exception -import re class ImagechestGalleryExtractor(GalleryExtractor): """Extractor for image galleries from imgchest.com""" - category = "imagechest" root = "https://imgchest.com" - pattern = r"(?:https?://)?(?:www\.)?imgchest\.com/p/([A-Za-z0-9]{11})" test = ( ("https://imgchest.com/p/3na7kr3by8d", { @@ -29,38 +26,23 @@ class ImagechestGalleryExtractor(GalleryExtractor): ) def __init__(self, match): - self.gallery_id = match.group(1) url = self.root + "/p/" + self.gallery_id - GalleryExtractor.__init__(self, match, url) def metadata(self, page): - """Return a dict with general metadata""" - if "Sorry, but the page you requested could not be found." in page: raise exception.NotFoundError("gallery") - title_match = re.search( - r'', - page) - - title = title_match.group(1).strip() - return { "gallery_id": self.gallery_id, - "title": text.unescape(title) + "title": text.unescape(text.extract( + page, 'property="og:title" content="', '"')[0].strip()) } def images(self, page): - """Return a list of all (image-url, metadata)-tuples""" - - image_keys = re.findall( - r'', - page) - - for imgurl in image_keys: - - data = text.nameext_from_url(imgurl) - - yield imgurl, data + return [ + (url, None) + for url in text.extract_iter( + page, 'property="og:image" content="', '"') + ]