From 6294e2c540011f241a4516e2ac8faf88ea7c587b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 19 May 2020 21:25:07 +0200 Subject: [PATCH] add 'text.ensure_http_scheme()' --- gallery_dl/extractor/deviantart.py | 3 ++- gallery_dl/extractor/newgrounds.py | 7 ++----- gallery_dl/extractor/patreon.py | 3 +-- gallery_dl/text.py | 7 +++++++ test/test_text.py | 27 +++++++++++++++++++++++++++ 5 files changed, 39 insertions(+), 8 deletions(-) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index d73a8c7a..c5d410fc 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -126,8 +126,9 @@ class DeviantartExtractor(Extractor): if self.extra: for match in DeviantartStashExtractor.pattern.finditer( deviation.get("description", "")): + url = text.ensure_http_scheme(match.group(0)) deviation["_extractor"] = DeviantartStashExtractor - yield Message.Queue, match.group(0), deviation + yield Message.Queue, url, deviation def deviations(self): """Return an iterable containing all relevant Deviation-objects""" diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 17fe935e..84794ad9 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -224,10 +224,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor): self.post_url = "https://www.newgrounds.com/art/view/{}/{}".format( self.user, match.group(3)) else: - url = match.group(0) - if not url.startswith("http"): - url = "https://" + url - self.post_url = url + self.post_url = text.ensure_http_scheme(match.group(0)) def posts(self): return (self.post_url,) @@ -414,6 +411,6 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor): @staticmethod def _extract_favorites(page): return [ - "https://" + user.rpartition('"')[2].lstrip("/:") + text.ensure_http_scheme(user.rpartition('"')[2]) for user in text.extract_iter(page, 'class="item-user', '">