mirror of
https://github.com/mikf/gallery-dl.git
synced 2025-02-01 03:51:42 +01:00
[generic] add support for IDNs
(internationalized domain name)
This commit is contained in:
parent
7610d9cf82
commit
34a7fab0e2
@ -44,6 +44,10 @@ class DirectlinkExtractor(Extractor):
|
|||||||
("https://post-phinf.pstatic.net/MjAxOTA1MjlfMTQ4/MDAxNTU5MTI2NjcyNTkw"
|
("https://post-phinf.pstatic.net/MjAxOTA1MjlfMTQ4/MDAxNTU5MTI2NjcyNTkw"
|
||||||
".JUzkGb4V6dj9DXjLclrOoqR64uDxHFUO5KDriRdKpGwg.88mCtd4iT1NHlpVKSCaUpP"
|
".JUzkGb4V6dj9DXjLclrOoqR64uDxHFUO5KDriRdKpGwg.88mCtd4iT1NHlpVKSCaUpP"
|
||||||
"mZPiDgT8hmQdQ5K_gYyu0g.JPEG/2.JPG"),
|
"mZPiDgT8hmQdQ5K_gYyu0g.JPEG/2.JPG"),
|
||||||
|
# internationalized domain name
|
||||||
|
("https://räksmörgås.josefsson.org/raksmorgas.jpg", {
|
||||||
|
"content": "f7e00768ab009c969e70d775047cdd302ca51762",
|
||||||
|
}),
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
|
@ -26,12 +26,34 @@ class GenericExtractor(Extractor):
|
|||||||
# Based on: https://tools.ietf.org/html/rfc3986#appendix-B
|
# Based on: https://tools.ietf.org/html/rfc3986#appendix-B
|
||||||
pattern += r"""
|
pattern += r"""
|
||||||
(?P<scheme>https?://)? # optional http(s) scheme
|
(?P<scheme>https?://)? # optional http(s) scheme
|
||||||
(?P<domain>[-\w\.]+) # required domain
|
(?P<domain>[^/?#]+) # required domain
|
||||||
(?P<path>/[^?#]*)? # optional path
|
(?P<path>/[^?#]*)? # optional path
|
||||||
(?:\?(?P<query>[^#]*))? # optional query
|
(?:\?(?P<query>[^#]*))? # optional query
|
||||||
(?:\#(?P<fragment>.*))? # optional fragment
|
(?:\#(?P<fragment>.*))? # optional fragment
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
test = (
|
||||||
|
("generic:https://www.nongnu.org/lzip/", {
|
||||||
|
"count": 1,
|
||||||
|
"content": "40be5c77773d3e91db6e1c5df720ee30afb62368",
|
||||||
|
"keyword": {
|
||||||
|
"description": "Lossless data compressor",
|
||||||
|
"imageurl": "https://www.nongnu.org/lzip/lzip.png",
|
||||||
|
"keywords": "lzip, clzip, plzip, lzlib, LZMA, bzip2, "
|
||||||
|
"gzip, data compression, GNU, free software",
|
||||||
|
"pageurl": "https://www.nongnu.org/lzip/",
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
# internationalized domain name
|
||||||
|
("generic:https://räksmörgås.josefsson.org/", {
|
||||||
|
"count": 2,
|
||||||
|
"pattern": "^https://räksmörgås.josefsson.org/",
|
||||||
|
}),
|
||||||
|
("generic:https://en.wikipedia.org/Main_Page"),
|
||||||
|
("generic:https://example.org/path/to/file?que=1?&ry=2/#fragment"),
|
||||||
|
("generic:https://example.org/%27%3C%23/%23%3E%27.htm?key=%3C%26%3E"),
|
||||||
|
)
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
"""Init."""
|
"""Init."""
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
@ -56,7 +78,7 @@ class GenericExtractor(Extractor):
|
|||||||
self.root = self.scheme + match.group('domain')
|
self.root = self.scheme + match.group('domain')
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
"""Get page, extract metadata & images, yield them in suitable messages.
|
"""Get page, extract metadata & images, yield them in suitable messages
|
||||||
|
|
||||||
Adapted from common.GalleryExtractor.items()
|
Adapted from common.GalleryExtractor.items()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user