mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-23 11:12:40 +01:00
[generic] fix regex for non-src image URLs
This commit is contained in:
parent
137de090dd
commit
8cfeed78b1
@ -150,7 +150,7 @@ class GenericExtractor(Extractor):
|
|||||||
https://en.wikipedia.org/wiki/List_of_file_formats
|
https://en.wikipedia.org/wiki/List_of_file_formats
|
||||||
|
|
||||||
Compared to the "pattern" class variable, here we must exclude also
|
Compared to the "pattern" class variable, here we must exclude also
|
||||||
other special characters (space, ", ', >), since we are looking for
|
other special characters (space, ", ', <, >), since we are looking for
|
||||||
urls in html tags.
|
urls in html tags.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -158,7 +158,7 @@ class GenericExtractor(Extractor):
|
|||||||
(?:[^?&#"'>\s]+) # anything until dot+extension
|
(?:[^?&#"'>\s]+) # anything until dot+extension
|
||||||
\.(?:jpe?g|jpe|png|gif
|
\.(?:jpe?g|jpe|png|gif
|
||||||
|web[mp]|mp4|mkv|og[gmv]|opus) # dot + image/video extensions
|
|web[mp]|mp4|mkv|og[gmv]|opus) # dot + image/video extensions
|
||||||
(?:[^"'>\s]*)? # optional query and fragment
|
(?:[^"'<>\s]*)? # optional query and fragment
|
||||||
"""
|
"""
|
||||||
|
|
||||||
imageurls_src = re.findall(imageurl_pattern_src, page)
|
imageurls_src = re.findall(imageurl_pattern_src, page)
|
||||||
|
Loading…
Reference in New Issue
Block a user