From 93e5d8cba3ef39455cea87d92cdf4c2f01fbe0ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 31 May 2017 17:31:51 +0200 Subject: [PATCH] [flickr] add album extractor --- gallery_dl/extractor/flickr.py | 102 ++++++++++++++++++++++++++++----- 1 file changed, 89 insertions(+), 13 deletions(-) diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py index 013829f1..21aeab8f 100644 --- a/gallery_dl/extractor/flickr.py +++ b/gallery_dl/extractor/flickr.py @@ -15,6 +15,13 @@ from .. import text, exception class FlickrExtractor(Extractor): """Base class for flickr extractors""" category = "flickr" + filename_fmt = "{category}_{id}.{extension}" + + def __init__(self, match): + Extractor.__init__(self) + self.api = FlickrAPI(self) + self.item_id = match.group(1) + self.metadata = self.config("metadata", False) @staticmethod def _clean(photo): @@ -34,34 +41,30 @@ class FlickrExtractor(Extractor): class FlickrImageExtractor(FlickrExtractor): """Extractor for individual images from flickr.com""" subcategory = "image" - filename_fmt = "{category}_{id}.{extension}" pattern = [r"(?:https?://)?(?:www\.)?flickr\.com/photos/[^/]+/(\d+)", - r"(?:https?://)?[^.]+\.staticflickr\.com/d+/\d+/(\d+)"] + r"(?:https?://)?[^.]+\.staticflickr\.com/(?:\d+/)+(\d+)_"] test = [ ("https://www.flickr.com/photos/departingyyz/16089302239", { "url": "7f0887f5953f61c8b79a695cb102ea309c0346b0", "keyword": "5ecdaf0192802451b7daca9b81f393f207ff7ee9", "content": "6aaad7512d335ca93286fe2046e7fe3bb93d808e", }), + ("http://c2.staticflickr.com/2/1475/24531000464_9a7503ae68_b.jpg", { + "url": "40f5163488522ca5d918750ed7bd7fcf437982fe", + }), ("https://www.flickr.com/photos/zzz/16089302238", { "exception": exception.NotFoundError, - }) + }), ] - def __init__(self, match): - FlickrExtractor.__init__(self) - self.api = FlickrAPI(self) - self.photo_id = match.group(1) - self.metadata = self.config("metadata", False) - def items(self): - size = self.api.photos_getSizes(self.photo_id)["size"][-1] + size = self.api.photos_getSizes(self.item_id)[-1] if self.metadata: - info = self.api.photos_getInfo(self.photo_id) + info = self.api.photos_getInfo(self.item_id) self._clean(info) else: - info = {"id": self.photo_id} + info = {"id": self.item_id} info["photo"] = size url = size["source"] @@ -72,8 +75,34 @@ class FlickrImageExtractor(FlickrExtractor): yield Message.Url, url, info +class FlickrAlbumExtractor(FlickrExtractor): + """Extractor for photo albums from flickr.com""" + subcategory = "album" + directory_fmt = ["{category}", "{id} - {title}"] + pattern = [r"(?:https?://)?(?:www\.)?flickr\.com/" + r"photos/[^/]+/(?:album|set)s/(\d+)"] + test = [("https://www.flickr.com/photos/flickr/albums/72157656845052880", { + "url": "517db3faa55e88686f1d00a379f8f0daf4c7b837", + "keyword": "504ca926fe520dc6e4a98e7ee590c3498a3c3392", + })] + + def items(self): + first = True + yield Message.Version, 1 + + for photo in self.api.photosets_getPhotos(self.item_id): + if first: + first = False + yield Message.Directory, photo["photoset"].copy() + url = photo["photo"]["source"] + yield Message.Url, url, text.nameext_from_url(url, photo) + + class FlickrAPI(): + """Minimal interface for the flickr API""" api_url = "https://api.flickr.com/services/rest/" + formats = [("o", "Original"), ("k", "Large 2048"), + ("h", "Large 1600"), ("l", "Large")] def __init__(self, extractor, api_key="ac4fd7aa98585b9eee1ba761c209de68"): self.session = extractor.session @@ -86,7 +115,54 @@ class FlickrAPI(): def photos_getSizes(self, photo_id): params = {"photo_id": photo_id} - return self._call("photos.getSizes", params)["sizes"] + return self._call("photos.getSizes", params)["sizes"]["size"] + + def photosets_getPhotos(self, photoset_id): + method = "photosets.getPhotos" + params = {"photoset_id": photoset_id, "page": 1, + "extras": "url_o,url_k,url_h,url_l"} + while True: + photoset = self._call(method, params)["photoset"] + + photos = photoset["photo"] + del photoset["photo"] + del photoset["page"] + del photoset["perpage"] + del photoset["per_page"] + + for photo in photos: + + for fmt, fmtname in self.formats: + key = "url_" + fmt + if key in photo: + # generate photo info + photo["photo"] = { + "source": photo[key], + "width" : photo["width_" + fmt], + "height": photo["height_" + fmt], + "label" : fmtname, + "media" : "photo", + } + # remove excess data + keys = [ + key for key in photo.keys() + if key.startswith(("url_", "width_", "height_")) + ] + for key in keys: + del photo[key] + break + + else: + # extra API call to get photo url and size + print(photo["id"]) + photo["photo"] = self.photos_getSizes(photo["id"])[-1] + + photo["photoset"] = photoset + yield photo + + if params["page"] == photoset["pages"]: + break + params["page"] += 1 def _call(self, method, params): params["method"] = "flickr." + method