1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-10-26 06:22:32 +02:00

[bluesky] add 'quoted' option (#6323)

This commit is contained in:
Mike Fährmann 2024-10-25 17:18:30 +02:00
parent d34e2d56aa
commit 8f396cfc57
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 106 additions and 39 deletions

View File

@ -1489,6 +1489,16 @@ Description
(See `depth` parameter of `app.bsky.feed.getPostThread <https://www.docs.bsky.app/docs/api/app-bsky-feed-get-post-thread>`__)
extractor.bluesky.quoted
------------------------
Type
``bool``
Default
``false``
Description
Fetch media from quoted posts.
extractor.bluesky.reposts
-------------------------
Type

View File

@ -42,62 +42,76 @@ class BlueskyExtractor(Extractor):
self._user = self._user_did = None
self.instance = self.root.partition("://")[2]
self.videos = self.config("videos", True)
self.quoted = self.config("quoted", False)
def items(self):
for post in self.posts():
if "post" in post:
post = post["post"]
pid = post["uri"].rpartition("/")[2]
if self._user_did and post["author"]["did"] != self._user_did:
self.log.debug("Skipping %s (repost)", pid)
self.log.debug("Skipping %s (repost)", self._pid(post))
continue
embed = post.get("embed")
post.update(post.pop("record"))
post.update(post["record"])
del post["record"]
while True:
self._prepare(post)
files = self._extract_files(post)
if self._metadata_facets:
if "facets" in post:
post["hashtags"] = tags = []
post["mentions"] = dids = []
post["uris"] = uris = []
for facet in post["facets"]:
features = facet["features"][0]
if "tag" in features:
tags.append(features["tag"])
elif "did" in features:
dids.append(features["did"])
elif "uri" in features:
uris.append(features["uri"])
else:
post["hashtags"] = post["mentions"] = post["uris"] = ()
yield Message.Directory, post
if files:
base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
"?did={}&cid=".format(post["author"]["did"]))
for post["num"], file in enumerate(files, 1):
post.update(file)
yield Message.Url, base + file["filename"], post
if self._metadata_user:
post["user"] = self._user or post["author"]
if not self.quoted or not embed or "record" not in embed:
break
files = self._extract_files(post)
post["instance"] = self.instance
post["post_id"] = pid
post["count"] = len(files)
post["date"] = text.parse_datetime(
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
yield Message.Directory, post
if not files:
continue
base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
"?did={}&cid=".format(post["author"]["did"]))
for post["num"], file in enumerate(files, 1):
post.update(file)
yield Message.Url, base + file["filename"], post
quote = embed["record"]
if "record" in quote:
quote = quote["record"]
quote["quote_id"] = self._pid(post)
quote["quote_by"] = post["author"]
embed = quote.get("embed")
quote.update(quote.pop("value"))
post = quote
def posts(self):
return ()
def _pid(self, post):
return post["uri"].rpartition("/")[2]
def _prepare(self, post):
if self._metadata_facets:
if "facets" in post:
post["hashtags"] = tags = []
post["mentions"] = dids = []
post["uris"] = uris = []
for facet in post["facets"]:
features = facet["features"][0]
if "tag" in features:
tags.append(features["tag"])
elif "did" in features:
dids.append(features["did"])
elif "uri" in features:
uris.append(features["uri"])
else:
post["hashtags"] = post["mentions"] = post["uris"] = ()
if self._metadata_user:
post["user"] = self._user or post["author"]
post["instance"] = self.instance
post["post_id"] = self._pid(post)
post["date"] = text.parse_datetime(
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
def _extract_files(self, post):
if "embed" not in post:
post["count"] = 0
return ()
files = []
@ -111,6 +125,7 @@ class BlueskyExtractor(Extractor):
if "video" in media and self.videos:
files.append(self._extract_media(media, "video"))
post["count"] = len(files)
return files
def _extract_media(self, media, key):

View File

@ -227,4 +227,46 @@ __tests__ = (
"extension" : "mp4",
},
{
"#url" : "https://bsky.app/profile/mikf.bsky.social/post/3kmfodjotln2f",
"#comment" : "quote (#6183)",
"#class" : bluesky.BlueskyPostExtractor,
"#options" : {"quoted": True},
"#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:eyhmjdxsnthqhvvszdejaocz&cid=bafkreib6eb7tfozksquveaj3z5msyx3hkniubrulxdys3eftthvmuzrtme",
"author": {
"associated" : dict,
"avatar" : "https://cdn.bsky.app/img/avatar/plain/did:plc:eyhmjdxsnthqhvvszdejaocz/bafkreigjrftlw7tabtpie32saydttpnoi7276v252vnycr6zt6euef7vdi@jpeg",
"createdAt" : "2024-01-11T00:27:37.404Z",
"did" : "did:plc:eyhmjdxsnthqhvvszdejaocz",
"displayName": "フナ",
"handle" : "ykfuna.bsky.social",
"labels" : list,
},
"quote_by": {
"avatar" : "https://cdn.bsky.app/img/avatar/plain/did:plc:cslxjqkeexku6elp5xowxkq7/bafkreic5jqkn5ohqhgsm6zzi7vnapuz54trojv3io4tfkrcyaprl4b2ztm@jpeg",
"createdAt" : "2024-02-05T00:03:54.087Z",
"did" : "did:plc:cslxjqkeexku6elp5xowxkq7",
"displayName": "mikf",
"handle" : "mikf.bsky.social",
"labels" : list,
},
"quote_id": "3kmfodjotln2f",
"post_id" : "3km4qy5y3jc2z",
},
{
"#url" : "https://bsky.app/profile/mikf.bsky.social/post/3kmfp2qktil25",
"#comment" : "quote with media (#6183)",
"#class" : bluesky.BlueskyPostExtractor,
"#options" : {"quoted": True},
"#urls" : (
"https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:cslxjqkeexku6elp5xowxkq7&cid=bafkreiegcyremdrecmnpisci3a3nduc7lm3zdcl76z5o5rd4nstyolrxki",
"https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:eyhmjdxsnthqhvvszdejaocz&cid=bafkreicojrnwiw5eqo3ko2q6duduyjaoyiqvdc25kuikcedlijtbgvlt5e",
),
"text" : {"quote with media", ""},
},
)