[imgur] distinguish album and gallery URLs (#380)

A gallery can be either an album or a single image.
2024-11-22 10:42:34 +01:00 · 2019-08-14 21:20:58 +02:00 · 2019-08-14 21:20:58 +02:00 · 829b1ccf04
commit 829b1ccf04
parent 23251356cb
2 changed files with 49 additions and 22 deletions
--- a/docs/supportedsites.rst
+++ b/docs/supportedsites.rst
@ -51,7 +51,7 @@ ImageFap             https://imagefap.com/               Images from Users, Gall
 ImgBB                https://imgbb.com/                  Images from Users, Albums, individual Images       Optional
 imgbox               https://imgbox.com/                 Galleries, individual Images
 imgth                https://imgth.com/                  Galleries
-imgur                https://imgur.com/                  Albums, individual Images
+imgur                https://imgur.com/                  Albums, Galleries, individual Images
 Instagram            https://www.instagram.com/          |instagram-C|                                      Optional
 Jaimini's Box        https://jaiminisbox.com/reader/     Chapters, Manga
 Joyreactor           http://joyreactor.cc/               |joyreactor-C|
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@ -20,13 +20,19 @@ class ImgurExtractor(Extractor):

    def __init__(self, match):
        Extractor.__init__(self, match)
-        self.item_id = match.group(1)
+        self.key = match.group(1)
        self.mp4 = self.config("mp4", True)

-    def _get_data(self, path):
+    def _extract_data(self, path):
        response = self.request(self.root + path, notfound=self.subcategory)
-        data = text.extract(response.text, "image               : ", ",\n")[0]
-        return self._clean(json.loads(data))
+        data = json.loads(text.extract(
+            response.text, "image               : ", ",\n")[0])
+        try:
+            del data["adConfig"]
+            del data["isAd"]
+        except KeyError:
+            pass
+        return data

    def _prepare(self, image):
        image["ext"] = image["ext"].partition("?")[0]
@ -37,18 +43,9 @@ class ImgurExtractor(Extractor):
        image["extension"] = image["ext"][1:]
        return url

-    @staticmethod
-    def _clean(data):
-        try:
-            del data["adConfig"]
-            del data["isAd"]
-        except KeyError:
-            pass
-        return data
-

 class ImgurImageExtractor(ImgurExtractor):
-    """Extractor for individual images from imgur.com"""
+    """Extractor for individual images on imgur.com"""
    subcategory = "image"
    filename_fmt = "{category}_{hash}{title:?_//}.{extension}"
    archive_fmt = "{hash}"
@ -101,22 +98,21 @@ class ImgurImageExtractor(ImgurExtractor):
    )

    def items(self):
-        image = self._get_data("/" + self.item_id)
+        image = self._extract_data("/" + self.key)
        url = self._prepare(image)
-
        yield Message.Version, 1
        yield Message.Directory, image
        yield Message.Url, url, image


 class ImgurAlbumExtractor(ImgurExtractor):
-    """Extractor for image albums from imgur.com"""
+    """Extractor for imgur albums"""
    subcategory = "album"
    directory_fmt = ("{category}", "{album[hash]}{album[title]:? - //}")
    filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}"
    archive_fmt = "{album[hash]}_{hash}"
    pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
-               r"/(?:a|gallery|t/unmuted)/(\w{7}|\w{5})")
+               r"/(?:a|t/unmuted)/(\w{7}|\w{5})")
    test = (
        ("https://imgur.com/a/TcBmP", {
            "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
@ -147,7 +143,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
                "width": int,
            },
        }),
-        ("https://imgur.com/gallery/eD9CT", {  # large album
+        ("https://imgur.com/a/eD9CT", {  # large album
            "url": "4ee94de31ff26be416271bc0b1ea27b9349c9937",
        }),
        ("https://imgur.com/a/RhJXhVT/all", {  # 7 character album hash
@ -164,13 +160,14 @@ class ImgurAlbumExtractor(ImgurExtractor):
    )

    def items(self):
-        album = self._get_data("/a/" + self.item_id + "/all")
+        album = self._extract_data("/a/" + self.key + "/all")
+        album["title"] = album["title"].strip()
        images = album["album_images"]["images"]
        del album["album_images"]

        if int(album["num_images"]) > len(images):
            url = "{}/ajaxalbums/getimages/{}/hit.json".format(
-                self.root, self.item_id)
+                self.root, self.key)
            images = self.request(url).json()["data"]["images"]

        yield Message.Version, 1
@ -180,3 +177,33 @@ class ImgurAlbumExtractor(ImgurExtractor):
            image["num"] = num
            image["album"] = album
            yield Message.Url, url, image
+
+
+class ImgurGalleryExtractor(ImgurExtractor):
+    """Extractor for imgur galleries"""
+    subcategory = "gallery"
+    pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
+               r"/gallery/(\w{7}|\w{5})")
+    test = (
+        ("https://imgur.com/gallery/zf2fIms", {  # non-album gallery (#380)
+            "pattern": "https://imgur.com/zf2fIms",
+        }),
+        ("https://imgur.com/gallery/eD9CT", {
+            "pattern": "https://imgur.com/a/eD9CT",
+        }),
+    )
+
+    def items(self):
+        url = self.root + "/a/" + self.key
+        response = self.request(url, method="HEAD", fatal=False)
+
+        if response.status_code < 400:
+            extr = ImgurAlbumExtractor
+            path = "/a/"
+        else:
+            extr = ImgurImageExtractor
+            path = "/"
+
+        response.close()
+        yield Message.Version, 1
+        yield Message.Queue, self.root + path + self.key, {"_extractor": extr}