2021-01-19 02:23:39 +01:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2023-09-11 16:30:55 +02:00
|
|
|
# Copyright 2021-2023 Mike Fährmann
|
2021-01-19 02:23:39 +01:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
|
|
|
"""Extractors for https://unsplash.com/"""
|
|
|
|
|
|
|
|
from .common import Extractor, Message
|
|
|
|
from .. import text, util
|
|
|
|
|
|
|
|
BASE_PATTERN = r"(?:https?://)?unsplash\.com"
|
|
|
|
|
|
|
|
|
|
|
|
class UnsplashExtractor(Extractor):
|
|
|
|
"""Base class for unsplash extractors"""
|
|
|
|
category = "unsplash"
|
|
|
|
directory_fmt = ("{category}", "{user[username]}")
|
|
|
|
filename_fmt = "{id}.{extension}"
|
|
|
|
archive_fmt = "{id}"
|
|
|
|
root = "https://unsplash.com"
|
2021-01-23 16:33:20 +01:00
|
|
|
page_start = 1
|
|
|
|
per_page = 20
|
2021-01-19 02:23:39 +01:00
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
Extractor.__init__(self, match)
|
|
|
|
self.item = match.group(1)
|
|
|
|
|
|
|
|
def items(self):
|
2021-01-21 22:41:49 +01:00
|
|
|
fmt = self.config("format") or "raw"
|
2022-06-12 18:26:20 +02:00
|
|
|
metadata = self.metadata()
|
|
|
|
|
2021-01-19 02:23:39 +01:00
|
|
|
for photo in self.photos():
|
|
|
|
util.delete_items(
|
2021-01-20 22:51:02 +01:00
|
|
|
photo, ("current_user_collections", "related_collections"))
|
2021-01-21 22:41:49 +01:00
|
|
|
url = photo["urls"][fmt]
|
2021-01-19 02:23:39 +01:00
|
|
|
text.nameext_from_url(url, photo)
|
|
|
|
|
2022-06-12 18:26:20 +02:00
|
|
|
if metadata:
|
|
|
|
photo.update(metadata)
|
2021-01-19 02:23:39 +01:00
|
|
|
photo["extension"] = "jpg"
|
|
|
|
photo["date"] = text.parse_datetime(photo["created_at"])
|
|
|
|
if "tags" in photo:
|
|
|
|
photo["tags"] = [t["title"] for t in photo["tags"]]
|
|
|
|
|
|
|
|
yield Message.Directory, photo
|
|
|
|
yield Message.Url, url, photo
|
|
|
|
|
2022-06-12 18:26:20 +02:00
|
|
|
@staticmethod
|
|
|
|
def metadata():
|
|
|
|
return None
|
|
|
|
|
2021-01-23 16:33:20 +01:00
|
|
|
def skip(self, num):
|
|
|
|
pages = num // self.per_page
|
|
|
|
self.page_start += pages
|
|
|
|
return pages * self.per_page
|
|
|
|
|
2021-01-19 02:23:39 +01:00
|
|
|
def _pagination(self, url, params, results=False):
|
2021-01-23 16:33:20 +01:00
|
|
|
params["per_page"] = self.per_page
|
|
|
|
params["page"] = self.page_start
|
2021-01-19 02:23:39 +01:00
|
|
|
|
|
|
|
while True:
|
|
|
|
photos = self.request(url, params=params).json()
|
|
|
|
if results:
|
|
|
|
photos = photos["results"]
|
|
|
|
yield from photos
|
|
|
|
|
2021-01-23 16:33:20 +01:00
|
|
|
if len(photos) < self.per_page:
|
2021-01-19 02:23:39 +01:00
|
|
|
return
|
|
|
|
params["page"] += 1
|
|
|
|
|
|
|
|
|
|
|
|
class UnsplashImageExtractor(UnsplashExtractor):
|
|
|
|
"""Extractor for a single unsplash photo"""
|
|
|
|
subcategory = "image"
|
2021-01-21 22:27:43 +01:00
|
|
|
pattern = BASE_PATTERN + r"/photos/([^/?#]+)"
|
2023-09-11 16:30:55 +02:00
|
|
|
example = "https://unsplash.com/photos/ID"
|
2021-01-19 02:23:39 +01:00
|
|
|
|
|
|
|
def photos(self):
|
|
|
|
url = "{}/napi/photos/{}".format(self.root, self.item)
|
|
|
|
return (self.request(url).json(),)
|
|
|
|
|
|
|
|
|
|
|
|
class UnsplashUserExtractor(UnsplashExtractor):
|
|
|
|
"""Extractor for all photos of an unsplash user"""
|
|
|
|
subcategory = "user"
|
|
|
|
pattern = BASE_PATTERN + r"/@(\w+)/?$"
|
2023-09-11 16:30:55 +02:00
|
|
|
example = "https://unsplash.com/@USER"
|
2021-01-19 02:23:39 +01:00
|
|
|
|
|
|
|
def photos(self):
|
|
|
|
url = "{}/napi/users/{}/photos".format(self.root, self.item)
|
|
|
|
params = {"order_by": "latest"}
|
|
|
|
return self._pagination(url, params)
|
|
|
|
|
|
|
|
|
|
|
|
class UnsplashFavoriteExtractor(UnsplashExtractor):
|
|
|
|
"""Extractor for all likes of an unsplash user"""
|
|
|
|
subcategory = "favorite"
|
|
|
|
pattern = BASE_PATTERN + r"/@(\w+)/likes"
|
2023-09-11 16:30:55 +02:00
|
|
|
example = "https://unsplash.com/@USER/likes"
|
2021-01-19 02:23:39 +01:00
|
|
|
|
|
|
|
def photos(self):
|
|
|
|
url = "{}/napi/users/{}/likes".format(self.root, self.item)
|
|
|
|
params = {"order_by": "latest"}
|
|
|
|
return self._pagination(url, params)
|
|
|
|
|
|
|
|
|
2021-01-21 22:27:43 +01:00
|
|
|
class UnsplashCollectionExtractor(UnsplashExtractor):
|
|
|
|
"""Extractor for an unsplash collection"""
|
|
|
|
subcategory = "collection"
|
2022-06-12 18:26:20 +02:00
|
|
|
pattern = BASE_PATTERN + r"/collections/([^/?#]+)(?:/([^/?#]+))?"
|
2023-09-11 16:30:55 +02:00
|
|
|
example = "https://unsplash.com/collections/12345/TITLE"
|
2022-06-12 18:26:20 +02:00
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
UnsplashExtractor.__init__(self, match)
|
|
|
|
self.title = match.group(2) or ""
|
|
|
|
|
|
|
|
def metadata(self):
|
|
|
|
return {"collection_id": self.item, "collection_title": self.title}
|
2021-01-21 22:27:43 +01:00
|
|
|
|
|
|
|
def photos(self):
|
|
|
|
url = "{}/napi/collections/{}/photos".format(self.root, self.item)
|
|
|
|
params = {"order_by": "latest"}
|
|
|
|
return self._pagination(url, params)
|
|
|
|
|
|
|
|
|
2021-01-19 02:23:39 +01:00
|
|
|
class UnsplashSearchExtractor(UnsplashExtractor):
|
|
|
|
"""Extractor for unsplash search results"""
|
|
|
|
subcategory = "search"
|
2022-10-02 19:02:05 +02:00
|
|
|
pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^#]+))?"
|
2023-09-11 16:30:55 +02:00
|
|
|
example = "https://unsplash.com/s/photos/QUERY"
|
2021-01-19 02:23:39 +01:00
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
UnsplashExtractor.__init__(self, match)
|
|
|
|
self.query = match.group(2)
|
|
|
|
|
|
|
|
def photos(self):
|
|
|
|
url = self.root + "/napi/search/photos"
|
2022-03-19 16:00:05 +01:00
|
|
|
params = {"query": text.unquote(self.item.replace('-', ' '))}
|
2021-01-19 02:23:39 +01:00
|
|
|
if self.query:
|
|
|
|
params.update(text.parse_query(self.query))
|
|
|
|
return self._pagination(url, params, True)
|