[mastodon] implement 'text-posts' option (#1569)

similar to Twitter's 'text-tweets'
2024-11-22 02:32:33 +01:00 · 2021-07-02 22:12:41 +02:00 · 2021-07-02 22:12:41 +02:00 · 93d356712c
commit 93d356712c
parent 414bdc95a3
2 changed files with 33 additions and 23 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -1269,6 +1269,16 @@ Description
    Provide ``artist``, ``author``, and ``group`` metadata fields.


+extractor.mastodon.text-posts
+-----------------------------
+Type
+    ``bool``
+Default
+    ``false``
+Description
+    Also emit metadata for text-only posts without media content.
+
+
 extractor.newgrounds.flash
 --------------------------
 Type
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@ -6,7 +6,7 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-"""Extractors for mastodon instances"""
+"""Extractors for Mastodon instances"""

 from .common import BaseExtractor, Message
 from .. import text, exception
@ -29,26 +29,23 @@ class MastodonExtractor(BaseExtractor):
    def items(self):
        for status in self.statuses():
            attachments = status["media_attachments"]
-            if attachments:
-                self.prepare(status)
-                yield Message.Directory, status
-                for media in attachments:
-                    status["media"] = media
-                    url = media["url"]
-                    yield Message.Url, url, text.nameext_from_url(url, status)
+            del status["media_attachments"]
+
+            status["instance"] = self.instance
+            status["tags"] = [tag["name"] for tag in status["tags"]]
+            status["date"] = text.parse_datetime(
+                status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
+
+            yield Message.Directory, status
+            for media in attachments:
+                status["media"] = media
+                url = media["url"]
+                yield Message.Url, url, text.nameext_from_url(url, status)

    def statuses(self):
-        """Return an iterable containing all relevant Status-objects"""
+        """Return an iterable containing all relevant Status objects"""
        return ()

-    def prepare(self, status):
-        """Prepare a status object"""
-        del status["media_attachments"]
-        status["instance"] = self.instance
-        status["tags"] = [tag["name"] for tag in status["tags"]]
-        status["date"] = text.parse_datetime(
-            status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
-

 INSTANCES = {
    "mastodon.social": {
@ -97,6 +94,7 @@ class MastodonUserExtractor(MastodonExtractor):

    def statuses(self):
        api = MastodonAPI(self)
+
        username = self.item
        handle = "@{}@{}".format(username, self.instance)
        for account in api.account_search(handle, 1):
@ -104,7 +102,9 @@ class MastodonUserExtractor(MastodonExtractor):
                break
        else:
            raise exception.NotFoundError("account")
-        return api.account_statuses(account["id"])
+
+        return api.account_statuses(
+            account["id"], not self.config("text-posts", False))


 class MastodonStatusExtractor(MastodonExtractor):
@ -130,8 +130,8 @@ class MastodonStatusExtractor(MastodonExtractor):
 class MastodonAPI():
    """Minimal interface for the Mastodon API

+    https://docs.joinmastodon.org/
    https://github.com/tootsuite/mastodon
-    https://github.com/tootsuite/documentation/blob/master/Using-the-API/API.md
    """

    def __init__(self, extractor):
@ -153,15 +153,15 @@ class MastodonAPI():
        self.headers = {"Authorization": "Bearer " + access_token}

    def account_search(self, query, limit=40):
-        """Search for content"""
+        """Search for accounts"""
        endpoint = "/v1/accounts/search"
        params = {"q": query, "limit": limit}
        return self._call(endpoint, params).json()

-    def account_statuses(self, account_id):
-        """Get an account's statuses"""
+    def account_statuses(self, account_id, only_media=True):
+        """Fetch an account's statuses"""
        endpoint = "/v1/accounts/{}/statuses".format(account_id)
-        params = {"only_media": "1"}
+        params = {"only_media": "1"} if only_media else None
        return self._pagination(endpoint, params)

    def status(self, status_id):