mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 10:42:34 +01:00
7916c8bf77
partially revert ce54b8c04c
343 lines
12 KiB
Python
343 lines
12 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright 2019-2023 Mike Fährmann
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
# published by the Free Software Foundation.
|
|
|
|
"""Extractors for Mastodon instances"""
|
|
|
|
from .common import BaseExtractor, Message
|
|
from .. import text, exception
|
|
from ..cache import cache
|
|
|
|
|
|
class MastodonExtractor(BaseExtractor):
|
|
"""Base class for mastodon extractors"""
|
|
basecategory = "mastodon"
|
|
directory_fmt = ("mastodon", "{instance}", "{account[username]}")
|
|
filename_fmt = "{category}_{id}_{media[id]}.{extension}"
|
|
archive_fmt = "{media[id]}"
|
|
|
|
def __init__(self, match):
|
|
BaseExtractor.__init__(self, match)
|
|
self.item = match.group(match.lastindex)
|
|
|
|
def _init(self):
|
|
self.instance = self.root.partition("://")[2]
|
|
self.reblogs = self.config("reblogs", False)
|
|
self.replies = self.config("replies", True)
|
|
self.cards = self.config("cards", False)
|
|
|
|
def items(self):
|
|
for status in self.statuses():
|
|
|
|
if self._check_moved:
|
|
self._check_moved(status["account"])
|
|
if not self.reblogs and status["reblog"]:
|
|
self.log.debug("Skipping %s (reblog)", status["id"])
|
|
continue
|
|
if not self.replies and status["in_reply_to_id"]:
|
|
self.log.debug("Skipping %s (reply)", status["id"])
|
|
continue
|
|
|
|
attachments = status["media_attachments"]
|
|
del status["media_attachments"]
|
|
|
|
if status["reblog"]:
|
|
attachments.extend(status["reblog"]["media_attachments"])
|
|
|
|
if self.cards:
|
|
card = status.get("card")
|
|
if card:
|
|
url = card.get("image")
|
|
if url:
|
|
card["weburl"] = card.get("url")
|
|
card["url"] = url
|
|
card["id"] = "card" + "".join(
|
|
url.split("/")[6:-2]).lstrip("0")
|
|
attachments.append(card)
|
|
|
|
status["instance"] = self.instance
|
|
acct = status["account"]["acct"]
|
|
status["instance_remote"] = \
|
|
acct.rpartition("@")[2] if "@" in acct else None
|
|
|
|
status["count"] = len(attachments)
|
|
status["tags"] = [tag["name"] for tag in status["tags"]]
|
|
status["date"] = text.parse_datetime(
|
|
status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
|
|
|
|
yield Message.Directory, status
|
|
for status["num"], media in enumerate(attachments, 1):
|
|
status["media"] = media
|
|
url = media["url"]
|
|
yield Message.Url, url, text.nameext_from_url(url, status)
|
|
|
|
def statuses(self):
|
|
"""Return an iterable containing all relevant Status objects"""
|
|
return ()
|
|
|
|
def _check_moved(self, account):
|
|
self._check_moved = None
|
|
# Certain fediverse software (such as Iceshrimp and Sharkey) have a
|
|
# null account "moved" field instead of not having it outright.
|
|
# To handle this, check if the "moved" value is truthy instead
|
|
# if only it exists.
|
|
if account.get("moved"):
|
|
self.log.warning("Account '%s' moved to '%s'",
|
|
account["acct"], account["moved"]["acct"])
|
|
|
|
|
|
BASE_PATTERN = MastodonExtractor.update({
|
|
"mastodon.social": {
|
|
"root" : "https://mastodon.social",
|
|
"pattern" : r"mastodon\.social",
|
|
"access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48",
|
|
"client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo",
|
|
"client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI",
|
|
},
|
|
"pawoo": {
|
|
"root" : "https://pawoo.net",
|
|
"pattern" : r"pawoo\.net",
|
|
"access-token" : "c12c9d275050bce0dc92169a28db09d7"
|
|
"0d62d0a75a8525953098c167eacd3668",
|
|
"client-id" : "978a25f843ec01e53d09be2c290cd75c"
|
|
"782bc3b7fdbd7ea4164b9f3c3780c8ff",
|
|
"client-secret": "9208e3d4a7997032cf4f1b0e12e5df38"
|
|
"8428ef1fadb446dcfeb4f5ed6872d97b",
|
|
},
|
|
"baraag": {
|
|
"root" : "https://baraag.net",
|
|
"pattern" : r"baraag\.net",
|
|
"access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0",
|
|
"client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o",
|
|
"client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY",
|
|
}
|
|
}) + "(?:/web)?"
|
|
|
|
|
|
class MastodonUserExtractor(MastodonExtractor):
|
|
"""Extractor for all images of an account/user"""
|
|
subcategory = "user"
|
|
pattern = BASE_PATTERN + r"/(?:@|users/)([^/?#]+)(?:/media)?/?$"
|
|
example = "https://mastodon.social/@USER"
|
|
|
|
def statuses(self):
|
|
api = MastodonAPI(self)
|
|
|
|
return api.account_statuses(
|
|
api.account_id_by_username(self.item),
|
|
only_media=(
|
|
not self.reblogs and
|
|
not self.cards and
|
|
not self.config("text-posts", False)
|
|
),
|
|
exclude_replies=not self.replies,
|
|
)
|
|
|
|
|
|
class MastodonBookmarkExtractor(MastodonExtractor):
|
|
"""Extractor for mastodon bookmarks"""
|
|
subcategory = "bookmark"
|
|
pattern = BASE_PATTERN + r"/bookmarks"
|
|
example = "https://mastodon.social/bookmarks"
|
|
|
|
def statuses(self):
|
|
return MastodonAPI(self).account_bookmarks()
|
|
|
|
|
|
class MastodonFavoriteExtractor(MastodonExtractor):
|
|
"""Extractor for mastodon favorites"""
|
|
subcategory = "favorite"
|
|
pattern = BASE_PATTERN + r"/favourites"
|
|
example = "https://mastodon.social/favourites"
|
|
|
|
def statuses(self):
|
|
return MastodonAPI(self).account_favorites()
|
|
|
|
|
|
class MastodonListExtractor(MastodonExtractor):
|
|
"""Extractor for mastodon lists"""
|
|
subcategory = "list"
|
|
pattern = BASE_PATTERN + r"/lists/(\w+)"
|
|
example = "https://mastodon.social/lists/12345"
|
|
|
|
def statuses(self):
|
|
return MastodonAPI(self).timelines_list(self.item)
|
|
|
|
|
|
class MastodonHashtagExtractor(MastodonExtractor):
|
|
"""Extractor for mastodon hashtags"""
|
|
subcategory = "hashtag"
|
|
pattern = BASE_PATTERN + r"/tags/(\w+)"
|
|
example = "https://mastodon.social/tags/NAME"
|
|
|
|
def statuses(self):
|
|
return MastodonAPI(self).timelines_tag(self.item)
|
|
|
|
|
|
class MastodonFollowingExtractor(MastodonExtractor):
|
|
"""Extractor for followed mastodon users"""
|
|
subcategory = "following"
|
|
pattern = BASE_PATTERN + r"/(?:@|users/)([^/?#]+)/following"
|
|
example = "https://mastodon.social/@USER/following"
|
|
|
|
def items(self):
|
|
api = MastodonAPI(self)
|
|
account_id = api.account_id_by_username(self.item)
|
|
|
|
for account in api.account_following(account_id):
|
|
account["_extractor"] = MastodonUserExtractor
|
|
yield Message.Queue, account["url"], account
|
|
|
|
|
|
class MastodonStatusExtractor(MastodonExtractor):
|
|
"""Extractor for images from a status"""
|
|
subcategory = "status"
|
|
pattern = BASE_PATTERN + r"/@[^/?#]+/(?!following)([^/?#]+)"
|
|
example = "https://mastodon.social/@USER/12345"
|
|
|
|
def statuses(self):
|
|
return (MastodonAPI(self).status(self.item),)
|
|
|
|
|
|
class MastodonAPI():
|
|
"""Minimal interface for the Mastodon API
|
|
|
|
https://docs.joinmastodon.org/
|
|
https://github.com/tootsuite/mastodon
|
|
"""
|
|
|
|
def __init__(self, extractor):
|
|
self.root = extractor.root
|
|
self.extractor = extractor
|
|
|
|
access_token = extractor.config("access-token")
|
|
if access_token is None or access_token == "cache":
|
|
access_token = _access_token_cache(extractor.instance)
|
|
if not access_token:
|
|
access_token = extractor.config_instance("access-token")
|
|
|
|
if access_token:
|
|
self.headers = {"Authorization": "Bearer " + access_token}
|
|
else:
|
|
self.headers = None
|
|
|
|
def account_id_by_username(self, username):
|
|
if username.startswith("id:"):
|
|
return username[3:]
|
|
|
|
try:
|
|
return self.account_lookup(username)["id"]
|
|
except Exception:
|
|
# fall back to account search
|
|
pass
|
|
|
|
if "@" in username:
|
|
handle = "@" + username
|
|
else:
|
|
handle = "@{}@{}".format(username, self.extractor.instance)
|
|
|
|
for account in self.account_search(handle, 1):
|
|
if account["acct"] == username:
|
|
self.extractor._check_moved(account)
|
|
return account["id"]
|
|
raise exception.NotFoundError("account")
|
|
|
|
def account_bookmarks(self):
|
|
"""Statuses the user has bookmarked"""
|
|
endpoint = "/v1/bookmarks"
|
|
return self._pagination(endpoint, None)
|
|
|
|
def account_favorites(self):
|
|
"""Statuses the user has favourited"""
|
|
endpoint = "/v1/favourites"
|
|
return self._pagination(endpoint, None)
|
|
|
|
def account_following(self, account_id):
|
|
"""Accounts which the given account is following"""
|
|
endpoint = "/v1/accounts/{}/following".format(account_id)
|
|
return self._pagination(endpoint, None)
|
|
|
|
def account_lookup(self, username):
|
|
"""Quickly lookup a username to see if it is available"""
|
|
endpoint = "/v1/accounts/lookup"
|
|
params = {"acct": username}
|
|
return self._call(endpoint, params).json()
|
|
|
|
def account_search(self, query, limit=40):
|
|
"""Search for matching accounts by username or display name"""
|
|
endpoint = "/v1/accounts/search"
|
|
params = {"q": query, "limit": limit}
|
|
return self._call(endpoint, params).json()
|
|
|
|
def account_statuses(self, account_id, only_media=True,
|
|
exclude_replies=False):
|
|
"""Statuses posted to the given account"""
|
|
endpoint = "/v1/accounts/{}/statuses".format(account_id)
|
|
params = {"only_media" : "true" if only_media else "false",
|
|
"exclude_replies": "true" if exclude_replies else "false"}
|
|
return self._pagination(endpoint, params)
|
|
|
|
def status(self, status_id):
|
|
"""Obtain information about a status"""
|
|
endpoint = "/v1/statuses/" + status_id
|
|
return self._call(endpoint).json()
|
|
|
|
def timelines_list(self, list_id):
|
|
"""View statuses in the given list timeline"""
|
|
endpoint = "/v1/timelines/list/" + list_id
|
|
return self._pagination(endpoint, None)
|
|
|
|
def timelines_tag(self, hashtag):
|
|
"""View public statuses containing the given hashtag"""
|
|
endpoint = "/v1/timelines/tag/" + hashtag
|
|
return self._pagination(endpoint, None)
|
|
|
|
def _call(self, endpoint, params=None):
|
|
if endpoint.startswith("http"):
|
|
url = endpoint
|
|
else:
|
|
url = self.root + "/api" + endpoint
|
|
|
|
while True:
|
|
response = self.extractor.request(
|
|
url, params=params, headers=self.headers, fatal=None)
|
|
code = response.status_code
|
|
|
|
if code < 400:
|
|
return response
|
|
if code == 401:
|
|
raise exception.StopExtraction(
|
|
"Invalid or missing access token.\n"
|
|
"Run 'gallery-dl oauth:mastodon:%s' to obtain one.",
|
|
self.extractor.instance)
|
|
if code == 404:
|
|
raise exception.NotFoundError()
|
|
if code == 429:
|
|
self.extractor.wait(until=text.parse_datetime(
|
|
response.headers["x-ratelimit-reset"],
|
|
"%Y-%m-%dT%H:%M:%S.%fZ",
|
|
))
|
|
continue
|
|
raise exception.StopExtraction(response.json().get("error"))
|
|
|
|
def _pagination(self, endpoint, params):
|
|
url = endpoint
|
|
while url:
|
|
response = self._call(url, params)
|
|
yield from response.json()
|
|
|
|
url = response.links.get("next")
|
|
if not url:
|
|
return
|
|
url = url["url"]
|
|
params = None
|
|
|
|
|
|
@cache(maxage=36500*86400, keyarg=0)
|
|
def _access_token_cache(instance):
|
|
return None
|