From 93e5d8cba3ef39455cea87d92cdf4c2f01fbe0ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Wed, 31 May 2017 17:31:51 +0200
Subject: [PATCH] [flickr] add album extractor

---
 gallery_dl/extractor/flickr.py | 102 ++++++++++++++++++++++++++++-----
 1 file changed, 89 insertions(+), 13 deletions(-)

diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index 013829f1..21aeab8f 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -15,6 +15,13 @@ from .. import text, exception
 class FlickrExtractor(Extractor):
     """Base class for flickr extractors"""
     category = "flickr"
+    filename_fmt = "{category}_{id}.{extension}"
+
+    def __init__(self, match):
+        Extractor.__init__(self)
+        self.api = FlickrAPI(self)
+        self.item_id = match.group(1)
+        self.metadata = self.config("metadata", False)
 
     @staticmethod
     def _clean(photo):
@@ -34,34 +41,30 @@ class FlickrExtractor(Extractor):
 class FlickrImageExtractor(FlickrExtractor):
     """Extractor for individual images from flickr.com"""
     subcategory = "image"
-    filename_fmt = "{category}_{id}.{extension}"
     pattern = [r"(?:https?://)?(?:www\.)?flickr\.com/photos/[^/]+/(\d+)",
-               r"(?:https?://)?[^.]+\.staticflickr\.com/d+/\d+/(\d+)"]
+               r"(?:https?://)?[^.]+\.staticflickr\.com/(?:\d+/)+(\d+)_"]
     test = [
         ("https://www.flickr.com/photos/departingyyz/16089302239", {
             "url": "7f0887f5953f61c8b79a695cb102ea309c0346b0",
             "keyword": "5ecdaf0192802451b7daca9b81f393f207ff7ee9",
             "content": "6aaad7512d335ca93286fe2046e7fe3bb93d808e",
         }),
+        ("http://c2.staticflickr.com/2/1475/24531000464_9a7503ae68_b.jpg", {
+            "url": "40f5163488522ca5d918750ed7bd7fcf437982fe",
+        }),
         ("https://www.flickr.com/photos/zzz/16089302238", {
             "exception": exception.NotFoundError,
-        })
+        }),
     ]
 
-    def __init__(self, match):
-        FlickrExtractor.__init__(self)
-        self.api = FlickrAPI(self)
-        self.photo_id = match.group(1)
-        self.metadata = self.config("metadata", False)
-
     def items(self):
-        size = self.api.photos_getSizes(self.photo_id)["size"][-1]
+        size = self.api.photos_getSizes(self.item_id)[-1]
 
         if self.metadata:
-            info = self.api.photos_getInfo(self.photo_id)
+            info = self.api.photos_getInfo(self.item_id)
             self._clean(info)
         else:
-            info = {"id": self.photo_id}
+            info = {"id": self.item_id}
 
         info["photo"] = size
         url = size["source"]
@@ -72,8 +75,34 @@ class FlickrImageExtractor(FlickrExtractor):
         yield Message.Url, url, info
 
 
+class FlickrAlbumExtractor(FlickrExtractor):
+    """Extractor for photo albums from flickr.com"""
+    subcategory = "album"
+    directory_fmt = ["{category}", "{id} - {title}"]
+    pattern = [r"(?:https?://)?(?:www\.)?flickr\.com/"
+               r"photos/[^/]+/(?:album|set)s/(\d+)"]
+    test = [("https://www.flickr.com/photos/flickr/albums/72157656845052880", {
+        "url": "517db3faa55e88686f1d00a379f8f0daf4c7b837",
+        "keyword": "504ca926fe520dc6e4a98e7ee590c3498a3c3392",
+    })]
+
+    def items(self):
+        first = True
+        yield Message.Version, 1
+
+        for photo in self.api.photosets_getPhotos(self.item_id):
+            if first:
+                first = False
+                yield Message.Directory, photo["photoset"].copy()
+            url = photo["photo"]["source"]
+            yield Message.Url, url, text.nameext_from_url(url, photo)
+
+
 class FlickrAPI():
+    """Minimal interface for the flickr API"""
     api_url = "https://api.flickr.com/services/rest/"
+    formats = [("o", "Original"), ("k", "Large 2048"),
+               ("h", "Large 1600"), ("l", "Large")]
 
     def __init__(self, extractor, api_key="ac4fd7aa98585b9eee1ba761c209de68"):
         self.session = extractor.session
@@ -86,7 +115,54 @@ class FlickrAPI():
 
     def photos_getSizes(self, photo_id):
         params = {"photo_id": photo_id}
-        return self._call("photos.getSizes", params)["sizes"]
+        return self._call("photos.getSizes", params)["sizes"]["size"]
+
+    def photosets_getPhotos(self, photoset_id):
+        method = "photosets.getPhotos"
+        params = {"photoset_id": photoset_id, "page": 1,
+                  "extras": "url_o,url_k,url_h,url_l"}
+        while True:
+            photoset = self._call(method, params)["photoset"]
+
+            photos = photoset["photo"]
+            del photoset["photo"]
+            del photoset["page"]
+            del photoset["perpage"]
+            del photoset["per_page"]
+
+            for photo in photos:
+
+                for fmt, fmtname in self.formats:
+                    key = "url_" + fmt
+                    if key in photo:
+                        # generate photo info
+                        photo["photo"] = {
+                            "source": photo[key],
+                            "width" : photo["width_" + fmt],
+                            "height": photo["height_" + fmt],
+                            "label" : fmtname,
+                            "media" : "photo",
+                        }
+                        # remove excess data
+                        keys = [
+                            key for key in photo.keys()
+                            if key.startswith(("url_", "width_", "height_"))
+                        ]
+                        for key in keys:
+                            del photo[key]
+                        break
+
+                else:
+                    # extra API call to get photo url and size
+                    print(photo["id"])
+                    photo["photo"] = self.photos_getSizes(photo["id"])[-1]
+
+                photo["photoset"] = photoset
+                yield photo
+
+            if params["page"] == photoset["pages"]:
+                break
+            params["page"] += 1
 
     def _call(self, method, params):
         params["method"] = "flickr." + method