1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-23 11:12:40 +01:00

[generic] fix regex for non-src image URLs

This commit is contained in:
thatfuckingbird 2023-01-21 22:32:42 +01:00
parent 137de090dd
commit 8cfeed78b1

View File

@ -150,7 +150,7 @@ class GenericExtractor(Extractor):
https://en.wikipedia.org/wiki/List_of_file_formats https://en.wikipedia.org/wiki/List_of_file_formats
Compared to the "pattern" class variable, here we must exclude also Compared to the "pattern" class variable, here we must exclude also
other special characters (space, ", ', >), since we are looking for other special characters (space, ", ', <, >), since we are looking for
urls in html tags. urls in html tags.
""" """
@ -158,7 +158,7 @@ class GenericExtractor(Extractor):
(?:[^?&#"'>\s]+) # anything until dot+extension (?:[^?&#"'>\s]+) # anything until dot+extension
\.(?:jpe?g|jpe|png|gif \.(?:jpe?g|jpe|png|gif
|web[mp]|mp4|mkv|og[gmv]|opus) # dot + image/video extensions |web[mp]|mp4|mkv|og[gmv]|opus) # dot + image/video extensions
(?:[^"'>\s]*)? # optional query and fragment (?:[^"'<>\s]*)? # optional query and fragment
""" """
imageurls_src = re.findall(imageurl_pattern_src, page) imageurls_src = re.findall(imageurl_pattern_src, page)