From cd47425ccc102d95935fca25ed0332b0bb9a800b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 2 Nov 2024 17:38:55 +0100 Subject: [PATCH] [bluesky] fix downloads from non-bsky PDSs (#6406) --- gallery_dl/extractor/bluesky.py | 23 ++++++++++++++++++++-- test/results/bluesky.py | 35 +++++++++++++++++++++++---------- 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py index f4cec713..1910c5eb 100644 --- a/gallery_dl/extractor/bluesky.py +++ b/gallery_dl/extractor/bluesky.py @@ -60,8 +60,10 @@ class BlueskyExtractor(Extractor): yield Message.Directory, post if files: - base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob" - "?did={}&cid=".format(post["author"]["did"])) + did = post["author"]["did"] + base = ( + "{}/xrpc/com.atproto.sync.getBlob?did={}&cid=".format( + self.api.get_service_endpoint(did), did)) for post["num"], file in enumerate(files, 1): post.update(file) yield Message.Url, base + file["filename"], post @@ -428,6 +430,23 @@ class BlueskyAPI(): params = {"handle": handle} return self._call(endpoint, params)["did"] + @memcache(keyarg=1) + def get_service_endpoint(self, did): + if did.startswith('did:web:'): + url = "https://{}/.well-known/did.json".format( + did.rpartition(":")[2]) + else: + url = "https://plc.directory/" + did + + try: + data = self.extractor.request(url).json() + for service in data["service"]: + if service["type"] == "AtprotoPersonalDataServer": + return service["serviceEndpoint"] + except Exception: + pass + return "https://bsky.social" + def search_posts(self, query, sort=None): endpoint = "app.bsky.feed.searchPosts" params = { diff --git a/test/results/bluesky.py b/test/results/bluesky.py index 99f66f8d..f2ed9b18 100644 --- a/test/results/bluesky.py +++ b/test/results/bluesky.py @@ -46,14 +46,14 @@ __tests__ = ( "#url" : "https://bsky.app/profile/bsky.app/avatar", "#category": ("", "bluesky", "avatar"), "#class" : bluesky.BlueskyAvatarExtractor, - "#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:z72i7hdynmk6r22z27h6tvur&cid=bafkreihagr2cmvl2jt4mgx3sppwe2it3fwolkrbtjrhcnwjk4jdijhsoze", + "#urls" : "https://puffball.us-east.host.bsky.network/xrpc/com.atproto.sync.getBlob?did=did:plc:z72i7hdynmk6r22z27h6tvur&cid=bafkreihagr2cmvl2jt4mgx3sppwe2it3fwolkrbtjrhcnwjk4jdijhsoze", }, { "#url" : "https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur/banner", "#category": ("", "bluesky", "background"), "#class" : bluesky.BlueskyBackgroundExtractor, - "#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:z72i7hdynmk6r22z27h6tvur&cid=bafkreichzyovokfzmymz36p5jibbjrhsur6n7hjnzxrpbt5jaydp2szvna", + "#urls" : "https://puffball.us-east.host.bsky.network/xrpc/com.atproto.sync.getBlob?did=did:plc:z72i7hdynmk6r22z27h6tvur&cid=bafkreichzyovokfzmymz36p5jibbjrhsur6n7hjnzxrpbt5jaydp2szvna", }, { @@ -141,7 +141,7 @@ __tests__ = ( "#category": ("", "bluesky", "post"), "#class" : bluesky.BlueskyPostExtractor, "#options" : {"metadata": True}, - "#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:z72i7hdynmk6r22z27h6tvur&cid=bafkreidypzoaybmfj5h7pnpiyct6ng5yae6ydp4czrm72ocg7ev6vbirri", + "#urls" : "https://puffball.us-east.host.bsky.network/xrpc/com.atproto.sync.getBlob?did=did:plc:z72i7hdynmk6r22z27h6tvur&cid=bafkreidypzoaybmfj5h7pnpiyct6ng5yae6ydp4czrm72ocg7ev6vbirri", "#sha1_content": "ffcf25e7c511173a12de5276b85903309fcd8d14", "author": { @@ -183,7 +183,7 @@ __tests__ = ( "followsCount" : int, "handle" : "bsky.app", "instance" : "bsky.app", - "indexedAt" : "2024-08-30T21:49:26.737Z", + "indexedAt" : str, "labels" : [], "postsCount" : int, }, @@ -194,7 +194,7 @@ __tests__ = ( "#category": ("", "bluesky", "post"), "#class" : bluesky.BlueskyPostExtractor, "#options" : {"metadata": "facets"}, - "#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:cslxjqkeexku6elp5xowxkq7&cid=bafkreib7ydpe3xxo4cq7nn32w7eqhcanfaanz6caepd2z4kzplxtx2ctgi", + "#urls" : "https://conocybe.us-west.host.bsky.network/xrpc/com.atproto.sync.getBlob?did=did:plc:cslxjqkeexku6elp5xowxkq7&cid=bafkreib7ydpe3xxo4cq7nn32w7eqhcanfaanz6caepd2z4kzplxtx2ctgi", "#sha1_content": "9cf5748f6d00aae83fbb3cc2c6eb3caa832b90f4", "author": { @@ -238,7 +238,7 @@ __tests__ = ( "#comment" : "different embed CID path", "#category": ("", "bluesky", "post"), "#class" : bluesky.BlueskyPostExtractor, - "#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:owc2r2dsewj3hk73rtd746zh&cid=bafkreieuhplc7fpbvi3suvacaf2dqxzvuu4hgl5o6eifqb76tf3uopldmi", + "#urls" : "https://amanita.us-east.host.bsky.network/xrpc/com.atproto.sync.getBlob?did=did:plc:owc2r2dsewj3hk73rtd746zh&cid=bafkreieuhplc7fpbvi3suvacaf2dqxzvuu4hgl5o6eifqb76tf3uopldmi", }, { @@ -246,7 +246,7 @@ __tests__ = ( "#comment" : "video (#6183)", "#category": ("", "bluesky", "post"), "#class" : bluesky.BlueskyPostExtractor, - "#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:cslxjqkeexku6elp5xowxkq7&cid=bafkreihq2nsfocrnlpx4nykb4szouqszxwmy3ucnk4k46nx5t6hjnxlti4", + "#urls" : "https://conocybe.us-west.host.bsky.network/xrpc/com.atproto.sync.getBlob?did=did:plc:cslxjqkeexku6elp5xowxkq7&cid=bafkreihq2nsfocrnlpx4nykb4szouqszxwmy3ucnk4k46nx5t6hjnxlti4", "description": "kirby and reimu dance", "text" : "video", @@ -261,7 +261,7 @@ __tests__ = ( "#comment" : "quote (#6183)", "#class" : bluesky.BlueskyPostExtractor, "#options" : {"quoted": True}, - "#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:eyhmjdxsnthqhvvszdejaocz&cid=bafkreib6eb7tfozksquveaj3z5msyx3hkniubrulxdys3eftthvmuzrtme", + "#urls" : "https://lionsmane.us-east.host.bsky.network/xrpc/com.atproto.sync.getBlob?did=did:plc:eyhmjdxsnthqhvvszdejaocz&cid=bafkreib6eb7tfozksquveaj3z5msyx3hkniubrulxdys3eftthvmuzrtme", "author": { "associated" : dict, @@ -290,8 +290,8 @@ __tests__ = ( "#class" : bluesky.BlueskyPostExtractor, "#options" : {"quoted": True}, "#urls" : ( - "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:cslxjqkeexku6elp5xowxkq7&cid=bafkreiegcyremdrecmnpisci3a3nduc7lm3zdcl76z5o5rd4nstyolrxki", - "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:eyhmjdxsnthqhvvszdejaocz&cid=bafkreicojrnwiw5eqo3ko2q6duduyjaoyiqvdc25kuikcedlijtbgvlt5e", + "https://conocybe.us-west.host.bsky.network/xrpc/com.atproto.sync.getBlob?did=did:plc:cslxjqkeexku6elp5xowxkq7&cid=bafkreiegcyremdrecmnpisci3a3nduc7lm3zdcl76z5o5rd4nstyolrxki", + "https://lionsmane.us-east.host.bsky.network/xrpc/com.atproto.sync.getBlob?did=did:plc:eyhmjdxsnthqhvvszdejaocz&cid=bafkreicojrnwiw5eqo3ko2q6duduyjaoyiqvdc25kuikcedlijtbgvlt5e", ), @@ -342,4 +342,19 @@ __tests__ = ( }, }, +{ + "#url" : "https://bsky.app/profile/alt.bun.how/post/3l7rdfxhyds2f", + "#comment" : "non-bsky PDS (#6406)", + "#class" : bluesky.BlueskyPostExtractor, + "#urls" : "https://pds.bun.how/xrpc/com.atproto.sync.getBlob?did=did:plc:7x6rtuenkuvxq3zsvffp2ide&cid=bafkreielhgekjheckgjusx7x5hxkbrqryfdmzdwwp2zoxchovgnpzkxzae", + "#sha1_content": "1777956de0dc8cf0815c5c7eb574a24ce54a1d42", + + "author": { + "createdAt": "2024-10-17T13:55:48.833Z", + "did" : "did:plc:7x6rtuenkuvxq3zsvffp2ide", + "handle" : "alt.bun.how", + "instance" : "bun.how", + }, +}, + )