From c4713404c821dcd83902f693a673b77120f1e6b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 2 Aug 2017 21:06:49 +0200 Subject: [PATCH] [directlink] improve URL pattern --- gallery_dl/extractor/directlink.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py index 9ef4a4ec..1bcf0fb9 100644 --- a/gallery_dl/extractor/directlink.py +++ b/gallery_dl/extractor/directlink.py @@ -16,22 +16,28 @@ class DirectlinkExtractor(Extractor): """Extractor for direct links to images and other media files""" category = "directlink" filename_fmt = "{domain}/{path}" - pattern = [r"https?://([^/]+)/([^?&#]+\." - r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"] - test = [(("https://photos.smugmug.com/The-World/Hawaii/" - "i-SWz2K6n/2/X3/IMG_0311-X3.jpg"), { - "url": "32ee1045881e17ef3f13a9958595afa42421ec6c", - "keyword": "1abd2f2c115cdf2cf2671d2611349b4213c3ab3e", - })] + pattern = [r"https?://(?P[^/]+)/(?P[^?&#]+\." + r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))" + r"(?:\?(?P[^/?#]*))?(?:#(?P.*))?$"] + test = [ + (("https://photos.smugmug.com/The-World/Hawaii/" + "i-SWz2K6n/2/X3/IMG_0311-X3.jpg"), { + "url": "32ee1045881e17ef3f13a9958595afa42421ec6c", + "keyword": "2427b68c14006489df1776bb1bcd3bc24be25e10", + }), + ("https://example.org/path/file.webm?que=1&ry=2#fragment", { + "url": "fd4aec8a32842343394e6078a06c3e6b647bf671", + "keyword": "ed008f35fc18dddb2f448a18d160c949bb3b054c", + }), + ] def __init__(self, match): Extractor.__init__(self) - self.domain, self.path = match.groups() + self.data = match.groupdict() self.url = match.string def items(self): - data = {"domain": self.domain, "path": self.path} - text.nameext_from_url(self.url, data) + text.nameext_from_url(self.url, self.data) yield Message.Version, 1 - yield Message.Directory, data - yield Message.Url, self.url, data + yield Message.Directory, self.data + yield Message.Url, self.url, self.data