2023-03-01 12:30:56 +01:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2023-03-03 15:52:50 +01:00
|
|
|
"""Extractors for Misskey instances"""
|
|
|
|
|
2023-03-01 12:30:56 +01:00
|
|
|
from .common import BaseExtractor, Message
|
2023-04-23 10:24:25 +02:00
|
|
|
from .. import text, exception
|
2023-03-01 12:30:56 +01:00
|
|
|
|
|
|
|
|
|
|
|
class MisskeyExtractor(BaseExtractor):
|
|
|
|
"""Base class for Misskey extractors"""
|
|
|
|
basecategory = "misskey"
|
|
|
|
directory_fmt = ("misskey", "{instance}", "{user[username]}")
|
|
|
|
filename_fmt = "{category}_{id}_{file[id]}.{extension}"
|
|
|
|
archive_fmt = "{id}_{file[id]}"
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
BaseExtractor.__init__(self, match)
|
2023-07-25 20:09:44 +02:00
|
|
|
self.item = match.group(match.lastindex)
|
|
|
|
|
|
|
|
def _init(self):
|
2023-03-01 12:30:56 +01:00
|
|
|
self.api = MisskeyAPI(self)
|
|
|
|
self.instance = self.root.rpartition("://")[2]
|
2023-03-02 15:26:19 +01:00
|
|
|
self.renotes = self.config("renotes", False)
|
|
|
|
self.replies = self.config("replies", True)
|
2023-03-01 12:30:56 +01:00
|
|
|
|
|
|
|
def items(self):
|
|
|
|
for note in self.notes():
|
2023-04-26 09:44:47 +02:00
|
|
|
if "note" in note:
|
|
|
|
note = note["note"]
|
2023-03-02 15:26:19 +01:00
|
|
|
files = note.pop("files") or []
|
|
|
|
renote = note.get("renote")
|
|
|
|
if renote:
|
|
|
|
if not self.renotes:
|
|
|
|
self.log.debug("Skipping %s (renote)", note["id"])
|
|
|
|
continue
|
|
|
|
files.extend(renote.get("files") or ())
|
|
|
|
|
|
|
|
reply = note.get("reply")
|
|
|
|
if reply:
|
|
|
|
if not self.replies:
|
|
|
|
self.log.debug("Skipping %s (reply)", note["id"])
|
|
|
|
continue
|
|
|
|
files.extend(reply.get("files") or ())
|
|
|
|
|
2023-03-01 12:30:56 +01:00
|
|
|
note["instance"] = self.instance
|
|
|
|
note["instance_remote"] = note["user"]["host"]
|
|
|
|
note["count"] = len(files)
|
|
|
|
note["date"] = text.parse_datetime(
|
|
|
|
note["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
|
|
|
|
|
|
|
yield Message.Directory, note
|
|
|
|
for note["num"], file in enumerate(files, 1):
|
|
|
|
file["date"] = text.parse_datetime(
|
|
|
|
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
|
|
|
note["file"] = file
|
|
|
|
url = file["url"]
|
|
|
|
yield Message.Url, url, text.nameext_from_url(url, note)
|
|
|
|
|
|
|
|
def notes(self):
|
|
|
|
"""Return an iterable containing all relevant Note objects"""
|
|
|
|
return ()
|
|
|
|
|
|
|
|
|
|
|
|
BASE_PATTERN = MisskeyExtractor.update({
|
|
|
|
"misskey.io": {
|
|
|
|
"root": "https://misskey.io",
|
|
|
|
"pattern": r"misskey\.io",
|
|
|
|
},
|
|
|
|
"lesbian.energy": {
|
|
|
|
"root": "https://lesbian.energy",
|
2023-05-23 22:15:20 +02:00
|
|
|
"pattern": r"lesbian\.energy",
|
2023-03-01 12:30:56 +01:00
|
|
|
},
|
|
|
|
"sushi.ski": {
|
|
|
|
"root": "https://sushi.ski",
|
|
|
|
"pattern": r"sushi\.ski",
|
|
|
|
},
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
class MisskeyUserExtractor(MisskeyExtractor):
|
|
|
|
"""Extractor for all images of a Misskey user"""
|
|
|
|
subcategory = "user"
|
|
|
|
pattern = BASE_PATTERN + r"/@([^/?#]+)/?$"
|
|
|
|
test = (
|
|
|
|
("https://misskey.io/@lithla", {
|
|
|
|
"pattern": r"https://s\d+\.arkjp\.net/misskey/[\w-]+\.\w+",
|
|
|
|
"range": "1-50",
|
|
|
|
"count": 50,
|
|
|
|
}),
|
|
|
|
("https://misskey.io/@blooddj@pawoo.net", {
|
|
|
|
"range": "1-50",
|
|
|
|
"count": 50,
|
|
|
|
}),
|
|
|
|
("https://lesbian.energy/@rerorero", {
|
|
|
|
"pattern": r"https://lesbian.energy/files/\w+",
|
|
|
|
"range": "1-50",
|
|
|
|
"count": 50,
|
|
|
|
}),
|
|
|
|
("https://lesbian.energy/@nano@mk.yopo.work"),
|
|
|
|
("https://sushi.ski/@ui@misskey.04.si"),
|
|
|
|
)
|
|
|
|
|
|
|
|
def notes(self):
|
|
|
|
return self.api.users_notes(self.api.user_id_by_username(self.item))
|
|
|
|
|
|
|
|
|
|
|
|
class MisskeyFollowingExtractor(MisskeyExtractor):
|
|
|
|
"""Extractor for followed Misskey users"""
|
|
|
|
subcategory = "following"
|
|
|
|
pattern = BASE_PATTERN + r"/@([^/?#]+)/following"
|
|
|
|
test = (
|
|
|
|
("https://misskey.io/@blooddj@pawoo.net/following", {
|
|
|
|
"extractor": False,
|
|
|
|
"count": ">= 6",
|
|
|
|
}),
|
|
|
|
("https://sushi.ski/@hatusimo_sigure/following"),
|
|
|
|
)
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
user_id = self.api.user_id_by_username(self.item)
|
|
|
|
for user in self.api.users_following(user_id):
|
|
|
|
user = user["followee"]
|
|
|
|
url = self.root + "/@" + user["username"]
|
|
|
|
host = user["host"]
|
|
|
|
if host is not None:
|
|
|
|
url += "@" + host
|
|
|
|
user["_extractor"] = MisskeyUserExtractor
|
|
|
|
yield Message.Queue, url, user
|
|
|
|
|
|
|
|
|
|
|
|
class MisskeyNoteExtractor(MisskeyExtractor):
|
|
|
|
"""Extractor for images from a Note"""
|
|
|
|
subcategory = "note"
|
|
|
|
pattern = BASE_PATTERN + r"/notes/(\w+)"
|
|
|
|
test = (
|
|
|
|
("https://misskey.io/notes/9bhqfo835v", {
|
|
|
|
"pattern": r"https://s\d+\.arkjp\.net/misskey/[\w-]+\.\w+",
|
|
|
|
"count": 4,
|
|
|
|
}),
|
|
|
|
("https://misskey.io/notes/9brq7z1re6"),
|
|
|
|
("https://sushi.ski/notes/9bm3x4ksqw", {
|
|
|
|
"pattern": r"https://media\.sushi\.ski/files/[\w-]+\.png",
|
|
|
|
"count": 1,
|
|
|
|
}),
|
|
|
|
("https://lesbian.energy/notes/995ig09wqy", {
|
|
|
|
"count": 1,
|
|
|
|
}),
|
|
|
|
("https://lesbian.energy/notes/96ynd9w5kc"),
|
|
|
|
)
|
|
|
|
|
|
|
|
def notes(self):
|
|
|
|
return (self.api.notes_show(self.item),)
|
|
|
|
|
|
|
|
|
2023-05-23 22:15:20 +02:00
|
|
|
class MisskeyFavoriteExtractor(MisskeyExtractor):
|
|
|
|
"""Extractor for favorited notes"""
|
|
|
|
subcategory = "favorite"
|
|
|
|
pattern = BASE_PATTERN + r"/(?:my|api/i)/favorites"
|
2023-04-23 10:24:25 +02:00
|
|
|
test = (
|
2023-04-26 09:44:47 +02:00
|
|
|
("https://misskey.io/my/favorites"),
|
|
|
|
("https://misskey.io/api/i/favorites"),
|
2023-05-23 22:15:20 +02:00
|
|
|
("https://lesbian.energy/my/favorites"),
|
|
|
|
("https://sushi.ski/my/favorites"),
|
2023-04-23 10:24:25 +02:00
|
|
|
)
|
|
|
|
|
2023-04-26 09:44:47 +02:00
|
|
|
def notes(self):
|
|
|
|
return self.api.i_favorites()
|
2023-04-23 10:24:25 +02:00
|
|
|
|
|
|
|
|
2023-03-01 12:30:56 +01:00
|
|
|
class MisskeyAPI():
|
|
|
|
"""Interface for Misskey API
|
|
|
|
|
|
|
|
https://github.com/misskey-dev/misskey
|
|
|
|
https://misskey-hub.net/en/docs/api/
|
|
|
|
https://misskey-hub.net/docs/api/endpoints.html
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, extractor):
|
|
|
|
self.root = extractor.root
|
|
|
|
self.extractor = extractor
|
|
|
|
self.headers = {"Content-Type": "application/json"}
|
2023-04-23 10:24:25 +02:00
|
|
|
self.access_token = extractor.config("access-token")
|
2023-03-01 12:30:56 +01:00
|
|
|
|
|
|
|
def user_id_by_username(self, username):
|
|
|
|
endpoint = "/users/show"
|
|
|
|
data = {"username": username}
|
|
|
|
if "@" in username:
|
|
|
|
data["username"], _, data["host"] = username.partition("@")
|
|
|
|
return self._call(endpoint, data)["id"]
|
|
|
|
|
|
|
|
def users_following(self, user_id):
|
|
|
|
endpoint = "/users/following"
|
|
|
|
data = {"userId": user_id}
|
|
|
|
return self._pagination(endpoint, data)
|
|
|
|
|
|
|
|
def users_notes(self, user_id):
|
|
|
|
endpoint = "/users/notes"
|
|
|
|
data = {"userId": user_id}
|
|
|
|
return self._pagination(endpoint, data)
|
|
|
|
|
|
|
|
def notes_show(self, note_id):
|
|
|
|
endpoint = "/notes/show"
|
|
|
|
data = {"noteId": note_id}
|
|
|
|
return self._call(endpoint, data)
|
|
|
|
|
2023-04-23 10:24:25 +02:00
|
|
|
def i_favorites(self):
|
|
|
|
endpoint = "/i/favorites"
|
|
|
|
if not self.access_token:
|
|
|
|
raise exception.AuthenticationError()
|
2023-04-26 09:44:47 +02:00
|
|
|
data = {"i": self.access_token}
|
2023-04-23 10:24:25 +02:00
|
|
|
return self._pagination(endpoint, data)
|
|
|
|
|
2023-03-01 12:30:56 +01:00
|
|
|
def _call(self, endpoint, data):
|
|
|
|
url = self.root + "/api" + endpoint
|
|
|
|
return self.extractor.request(
|
|
|
|
url, method="POST", headers=self.headers, json=data).json()
|
|
|
|
|
|
|
|
def _pagination(self, endpoint, data):
|
|
|
|
data["limit"] = 100
|
|
|
|
while True:
|
|
|
|
notes = self._call(endpoint, data)
|
|
|
|
if not notes:
|
|
|
|
return
|
|
|
|
yield from notes
|
|
|
|
data["untilId"] = notes[-1]["id"]
|