mirror of
https://github.com/mikf/gallery-dl.git
synced 2025-01-31 19:51:34 +01:00
[generic] add support for IDNs
(internationalized domain name)
This commit is contained in:
parent
7610d9cf82
commit
34a7fab0e2
@ -44,6 +44,10 @@ class DirectlinkExtractor(Extractor):
|
||||
("https://post-phinf.pstatic.net/MjAxOTA1MjlfMTQ4/MDAxNTU5MTI2NjcyNTkw"
|
||||
".JUzkGb4V6dj9DXjLclrOoqR64uDxHFUO5KDriRdKpGwg.88mCtd4iT1NHlpVKSCaUpP"
|
||||
"mZPiDgT8hmQdQ5K_gYyu0g.JPEG/2.JPG"),
|
||||
# internationalized domain name
|
||||
("https://räksmörgås.josefsson.org/raksmorgas.jpg", {
|
||||
"content": "f7e00768ab009c969e70d775047cdd302ca51762",
|
||||
}),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
|
@ -26,12 +26,34 @@ class GenericExtractor(Extractor):
|
||||
# Based on: https://tools.ietf.org/html/rfc3986#appendix-B
|
||||
pattern += r"""
|
||||
(?P<scheme>https?://)? # optional http(s) scheme
|
||||
(?P<domain>[-\w\.]+) # required domain
|
||||
(?P<domain>[^/?#]+) # required domain
|
||||
(?P<path>/[^?#]*)? # optional path
|
||||
(?:\?(?P<query>[^#]*))? # optional query
|
||||
(?:\#(?P<fragment>.*))? # optional fragment
|
||||
"""
|
||||
|
||||
test = (
|
||||
("generic:https://www.nongnu.org/lzip/", {
|
||||
"count": 1,
|
||||
"content": "40be5c77773d3e91db6e1c5df720ee30afb62368",
|
||||
"keyword": {
|
||||
"description": "Lossless data compressor",
|
||||
"imageurl": "https://www.nongnu.org/lzip/lzip.png",
|
||||
"keywords": "lzip, clzip, plzip, lzlib, LZMA, bzip2, "
|
||||
"gzip, data compression, GNU, free software",
|
||||
"pageurl": "https://www.nongnu.org/lzip/",
|
||||
},
|
||||
}),
|
||||
# internationalized domain name
|
||||
("generic:https://räksmörgås.josefsson.org/", {
|
||||
"count": 2,
|
||||
"pattern": "^https://räksmörgås.josefsson.org/",
|
||||
}),
|
||||
("generic:https://en.wikipedia.org/Main_Page"),
|
||||
("generic:https://example.org/path/to/file?que=1?&ry=2/#fragment"),
|
||||
("generic:https://example.org/%27%3C%23/%23%3E%27.htm?key=%3C%26%3E"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
"""Init."""
|
||||
Extractor.__init__(self, match)
|
||||
@ -56,7 +78,7 @@ class GenericExtractor(Extractor):
|
||||
self.root = self.scheme + match.group('domain')
|
||||
|
||||
def items(self):
|
||||
"""Get page, extract metadata & images, yield them in suitable messages.
|
||||
"""Get page, extract metadata & images, yield them in suitable messages
|
||||
|
||||
Adapted from common.GalleryExtractor.items()
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user