1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-23 11:12:40 +01:00
gallery-dl/gallery_dl/extractor/unsplash.py

145 lines
4.5 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://unsplash.com/"""
from .common import Extractor, Message
from .. import text, util
BASE_PATTERN = r"(?:https?://)?unsplash\.com"
class UnsplashExtractor(Extractor):
"""Base class for unsplash extractors"""
category = "unsplash"
directory_fmt = ("{category}", "{user[username]}")
filename_fmt = "{id}.{extension}"
archive_fmt = "{id}"
root = "https://unsplash.com"
2021-01-23 16:33:20 +01:00
page_start = 1
per_page = 20
def __init__(self, match):
Extractor.__init__(self, match)
self.item = match.group(1)
def items(self):
2021-01-21 22:41:49 +01:00
fmt = self.config("format") or "raw"
metadata = self.metadata()
for photo in self.photos():
util.delete_items(
2021-01-20 22:51:02 +01:00
photo, ("current_user_collections", "related_collections"))
2021-01-21 22:41:49 +01:00
url = photo["urls"][fmt]
text.nameext_from_url(url, photo)
if metadata:
photo.update(metadata)
photo["extension"] = "jpg"
photo["date"] = text.parse_datetime(photo["created_at"])
if "tags" in photo:
photo["tags"] = [t["title"] for t in photo["tags"]]
yield Message.Directory, photo
yield Message.Url, url, photo
@staticmethod
def metadata():
return None
2021-01-23 16:33:20 +01:00
def skip(self, num):
pages = num // self.per_page
self.page_start += pages
return pages * self.per_page
def _pagination(self, url, params, results=False):
2021-01-23 16:33:20 +01:00
params["per_page"] = self.per_page
params["page"] = self.page_start
while True:
photos = self.request(url, params=params).json()
if results:
photos = photos["results"]
yield from photos
2021-01-23 16:33:20 +01:00
if len(photos) < self.per_page:
return
params["page"] += 1
class UnsplashImageExtractor(UnsplashExtractor):
"""Extractor for a single unsplash photo"""
subcategory = "image"
pattern = BASE_PATTERN + r"/photos/([^/?#]+)"
example = "https://unsplash.com/photos/ID"
def photos(self):
url = "{}/napi/photos/{}".format(self.root, self.item)
return (self.request(url).json(),)
class UnsplashUserExtractor(UnsplashExtractor):
"""Extractor for all photos of an unsplash user"""
subcategory = "user"
pattern = BASE_PATTERN + r"/@(\w+)/?$"
example = "https://unsplash.com/@USER"
def photos(self):
url = "{}/napi/users/{}/photos".format(self.root, self.item)
params = {"order_by": "latest"}
return self._pagination(url, params)
class UnsplashFavoriteExtractor(UnsplashExtractor):
"""Extractor for all likes of an unsplash user"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/@(\w+)/likes"
example = "https://unsplash.com/@USER/likes"
def photos(self):
url = "{}/napi/users/{}/likes".format(self.root, self.item)
params = {"order_by": "latest"}
return self._pagination(url, params)
class UnsplashCollectionExtractor(UnsplashExtractor):
"""Extractor for an unsplash collection"""
subcategory = "collection"
pattern = BASE_PATTERN + r"/collections/([^/?#]+)(?:/([^/?#]+))?"
example = "https://unsplash.com/collections/12345/TITLE"
def __init__(self, match):
UnsplashExtractor.__init__(self, match)
self.title = match.group(2) or ""
def metadata(self):
return {"collection_id": self.item, "collection_title": self.title}
def photos(self):
url = "{}/napi/collections/{}/photos".format(self.root, self.item)
params = {"order_by": "latest"}
return self._pagination(url, params)
class UnsplashSearchExtractor(UnsplashExtractor):
"""Extractor for unsplash search results"""
subcategory = "search"
2022-10-02 19:02:05 +02:00
pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^#]+))?"
example = "https://unsplash.com/s/photos/QUERY"
def __init__(self, match):
UnsplashExtractor.__init__(self, match)
self.query = match.group(2)
def photos(self):
url = self.root + "/napi/search/photos"
params = {"query": text.unquote(self.item.replace('-', ' '))}
if self.query:
params.update(text.parse_query(self.query))
return self._pagination(url, params, True)