diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index e099c7ed..a3a455a9 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -74,8 +74,8 @@ class RedditExtractor(Extractor): yield Message.Url, url, submission elif "gallery_data" in media: - for submission["num"], url in enumerate( - self._extract_gallery(media), 1): + for url in self._extract_gallery(media): + submission["num"] += 1 text.nameext_from_url(url, submission) yield Message.Url, url, submission @@ -99,7 +99,10 @@ class RedditExtractor(Extractor): urls.append((url, submission)) for comment in comments: html = comment["body_html"] or "" - if ' href="' in html: + href = (' href="' in html) + media = ("media_metadata" in comment) + + if media or href: comment["date"] = text.parse_timestamp( comment["created_utc"]) if submission: @@ -107,6 +110,14 @@ class RedditExtractor(Extractor): data["comment"] = comment else: data = comment + + if media: + for embed in self._extract_embed(comment): + submission["num"] += 1 + text.nameext_from_url(embed, submission) + yield Message.Url, embed, submission + + if href: for url in text.extract_iter(html, ' href="', '"'): urls.append((url, data)) @@ -118,6 +129,7 @@ class RedditExtractor(Extractor): if url.startswith(( "https://www.reddit.com/message/compose", "https://reddit.com/message/compose", + "https://preview.redd.it/", )): continue @@ -172,6 +184,27 @@ class RedditExtractor(Extractor): submission["id"], item["media_id"]) self.log.debug(src) + def _extract_embed(self, submission): + meta = submission["media_metadata"] + if not meta: + return + + for mid, data in meta.items(): + if data["status"] != "valid" or "s" not in data: + self.log.warning( + "embed %s: skipping item %s (status: %s)", + submission["id"], mid, data.get("status")) + continue + src = data["s"] + url = src.get("u") or src.get("gif") or src.get("mp4") + if url: + yield url.partition("?")[0].replace("/preview.", "/i.", 1) + else: + self.log.error( + "embed %s: unable to fetch download URL for item %s", + submission["id"], mid) + self.log.debug(src) + def _extract_video_ytdl(self, submission): return "https://www.reddit.com" + submission["permalink"] diff --git a/test/results/reddit.py b/test/results/reddit.py index bd0f9fd7..55623337 100644 --- a/test/results/reddit.py +++ b/test/results/reddit.py @@ -168,6 +168,18 @@ __tests__ = ( "#count" : 0, }, +{ + "#url" : "https://www.reddit.com/r/RobloxArt/comments/15ko0qu/", + "#comment" : "comment embeds (#5366)", + "#category": ("", "reddit", "submission"), + "#class" : reddit.RedditSubmissionExtractor, + "#options" : {"comments": 10}, + "#urls" : ( + "https://i.redd.it/ppt5yciyipgb1.jpg", + "https://i.redd.it/u0ojzd69kpgb1.png", + ), +}, + { "#url" : "https://www.reddit.com/user/TheSpiritTree/comments/srilyf/", "#comment" : "user page submission (#2301)",