1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-23 11:12:40 +01:00
gallery-dl/gallery_dl/extractor/imgur.py

245 lines
8.3 KiB
Python
Raw Normal View History

2015-10-11 16:22:38 +02:00
# -*- coding: utf-8 -*-
# Copyright 2015-2019 Mike Fährmann
2015-10-11 16:22:38 +02:00
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract images from https://imgur.com/"""
2015-10-11 16:22:38 +02:00
from .common import Extractor, Message
2016-10-07 00:13:51 +02:00
from .. import text, exception
import json
2015-10-11 16:22:38 +02:00
2017-02-01 00:53:19 +01:00
class ImgurExtractor(Extractor):
"""Base class for imgur extractors"""
category = "imgur"
root = "https://imgur.com"
def __init__(self, match):
Extractor.__init__(self, match)
self.key = match.group(1)
self.mp4 = self.config("mp4", True)
def _extract_data(self, path):
response = self.request(self.root + path, notfound=self.subcategory)
data = json.loads(text.extract(
response.text, "image : ", ",\n")[0])
try:
del data["adConfig"]
del data["isAd"]
except KeyError:
pass
return data
def _prepare(self, image):
2018-02-10 21:29:40 +01:00
image["ext"] = image["ext"].partition("?")[0]
if image["ext"] == ".gif" and (
2017-07-18 12:42:19 +02:00
(self.mp4 and image["prefer_video"]) or self.mp4 == "always"):
image["ext"] = ".mp4"
url = "https://i.imgur.com/" + image["hash"] + image["ext"]
image["extension"] = image["ext"][1:]
return url
class ImgurImageExtractor(ImgurExtractor):
"""Extractor for individual images on imgur.com"""
subcategory = "image"
filename_fmt = "{category}_{hash}{title:?_//}.{extension}"
2018-02-12 23:09:34 +01:00
archive_fmt = "{hash}"
pattern = (r"(?:https?://)?(?:www\.|[im]\.|)?imgur\.com"
r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?")
test = (
("https://imgur.com/21yMxCS", {
"url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
2018-01-20 18:49:29 +01:00
"keyword": {
"animated": False,
"datetime": "2016-11-10 14:24:35",
"description": str,
"ext": ".png",
"extension": "png",
"hash": "21yMxCS",
"height": "32",
"is_moderated": False,
"is_safe": False,
"is_viral": 0,
"looping": False,
"mimetype": "image/png",
"name": None,
"prefer_video": False,
"size": 182,
"source": "",
"title": "Test",
"video_host": None,
"video_source": None,
"width": "64",
},
}),
("http://imgur.com/0gybAXR", { # gifv/mp4 video
"url": "a2220eb265a55b0c95e0d3d721ec7665460e3fd7",
"content": "a3c080e43f58f55243ab830569ba02309d59abfc",
}),
("https://imgur.com/HjoXJAd", { # url ends with '.jpg?1'
"url": "73f361b50753ab25da64160aa50bc5d139480d45",
}),
2018-04-08 17:50:57 +02:00
("https://imgur.com/zzzzzzz", { # not found
"exception": exception.NotFoundError,
}),
("https://www.imgur.com/21yMxCS"), # www
("https://m.imgur.com/21yMxCS"), # mobile
("https://imgur.com/zxaY6"), # 5 character key
("https://i.imgur.com/21yMxCS.png"), # direct link
("https://i.imgur.com/21yMxCSh.png"), # direct link thumbnail
("https://i.imgur.com/zxaY6.gif"), # direct link (short)
("https://i.imgur.com/zxaY6s.gif"), # direct link (short; thumb)
)
def items(self):
image = self._extract_data("/" + self.key)
url = self._prepare(image)
yield Message.Version, 1
yield Message.Directory, image
yield Message.Url, url, image
class ImgurAlbumExtractor(ImgurExtractor):
"""Extractor for imgur albums"""
subcategory = "album"
directory_fmt = ("{category}", "{album[hash]}{album[title]:? - //}")
filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}"
2018-02-12 23:09:34 +01:00
archive_fmt = "{album[hash]}_{hash}"
pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
r"/(?:a|t/unmuted)/(\w{7}|\w{5})")
test = (
2016-12-31 00:51:06 +01:00
("https://imgur.com/a/TcBmP", {
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
2018-01-20 18:49:29 +01:00
"keyword": {
"album": {
"album_cover": "693j2Kr",
"album_description": None,
"cover": "693j2Kr",
"datetime": "2015-10-09 10:37:50",
"description": None,
"hash": "TcBmP",
"id": "TcBmP",
"is_album": True,
"num_images": "19",
"title": "138",
"title_clean": "TcBmP",
"views": str,
},
"animated": bool,
"datetime": str,
"extension": str,
"hash": str,
"height": int,
"num": int,
"prefer_video": bool,
"size": int,
"title": str,
"width": int,
},
2016-12-31 00:51:06 +01:00
}),
("https://imgur.com/a/eD9CT", { # large album
2017-07-18 12:42:19 +02:00
"url": "4ee94de31ff26be416271bc0b1ea27b9349c9937",
}),
2018-05-13 11:19:10 +02:00
("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash
"url": "695ef0c950023362a0163ee5041796300db76674",
}),
2018-07-16 18:14:41 +02:00
("https://imgur.com/t/unmuted/YMqBcua", { # unmuted URL
"url": "86b4747f8147cec7602f0214e267309af73a8655",
2018-05-30 16:19:01 +02:00
}),
2016-12-31 00:51:06 +01:00
("https://imgur.com/a/TcBmQ", {
"exception": exception.NotFoundError,
}),
("https://www.imgur.com/a/TcBmP"), # www
("https://m.imgur.com/a/TcBmP"), # mobile
)
2015-11-21 04:26:30 +01:00
2015-10-11 16:22:38 +02:00
def items(self):
album = self._extract_data("/a/" + self.key + "/all")
images = album["album_images"]["images"]
del album["album_images"]
2017-07-18 12:42:19 +02:00
if int(album["num_images"]) > len(images):
url = "{}/ajaxalbums/getimages/{}/hit.json".format(
self.root, self.key)
2017-07-18 12:42:19 +02:00
images = self.request(url).json()["data"]["images"]
2015-10-11 16:22:38 +02:00
yield Message.Version, 1
yield Message.Directory, {"album": album, "count": len(images)}
for num, image in enumerate(images, 1):
url = self._prepare(image)
2016-10-07 00:13:51 +02:00
image["num"] = num
image["album"] = album
2016-10-07 00:13:51 +02:00
yield Message.Url, url, image
class ImgurGalleryExtractor(ImgurExtractor):
"""Extractor for imgur galleries"""
subcategory = "gallery"
pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
r"/gallery/(\w{7}|\w{5})")
test = (
("https://imgur.com/gallery/zf2fIms", { # non-album gallery (#380)
"pattern": "https://imgur.com/zf2fIms",
}),
("https://imgur.com/gallery/eD9CT", {
"pattern": "https://imgur.com/a/eD9CT",
}),
)
def items(self):
url = self.root + "/a/" + self.key
2019-08-20 20:00:43 +02:00
with self.request(url, method="HEAD", fatal=False) as response:
code = response.status_code
2019-08-20 20:00:43 +02:00
if code < 400:
extr = ImgurAlbumExtractor
else:
extr = ImgurImageExtractor
2019-08-20 20:00:43 +02:00
url = self.root + "/" + self.key
yield Message.Version, 1
2019-08-20 20:00:43 +02:00
yield Message.Queue, url, {"_extractor": extr}
2019-09-17 22:58:18 +02:00
class ImgurUserExtractor(ImgurExtractor):
"""Extractor for all images posted by a user"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
r"/user/([^/?&#]+)(?:/submitted|/posts)?/?")
test = (
("https://imgur.com/user/Miguenzo", {
}),
("https://imgur.com/user/Miguenzo/submitted"),
("https://imgur.com/user/Miguenzo/submitted/newest"),
("https://imgur.com/user/Miguenzo/posts"),
)
def items(self):
num = 0
base = "{}/user/{}/submitted".format(self.root, self.key)
data = {"_extractor": ImgurGalleryExtractor}
headers = {
"Referer": base,
"X-Requested-With": "XMLHttpRequest",
}
while True:
cnt = 0
url = "{}/page/{}?scrolling".format(base, num)
page = self.request(url, headers=headers).text
for path in text.extract_iter(page, '<a href="', '"'):
cnt += 1
yield Message.Queue, self.root + path, data
if cnt < 60:
return
num += 1