1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

[paheal] restore 'extension' metadata (#4976)

This commit is contained in:
Mike Fährmann 2023-12-26 16:09:26 +01:00
parent a50c1472b1
commit f9544194c0
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 27 additions and 9 deletions

View File

@ -32,7 +32,7 @@ class PahealExtractor(Extractor):
post["tags"] = text.unquote(post["tags"])
post.update(data)
yield Message.Directory, post
yield Message.Url, url, text.nameext_from_url(url, post)
yield Message.Url, url, post
def get_metadata(self):
"""Return general metadata"""
@ -59,11 +59,13 @@ class PahealExtractor(Extractor):
extr(">Source&nbsp;Link<", "</td>"), "href='", "'")),
}
dimensions, size, ext = extr("Info</th><td>", ">").split(" // ")
post["width"], _, height = dimensions.partition("x")
dimensions, size, ext = extr("Info</th><td>", "<").split(" // ")
post["size"] = text.parse_bytes(size[:-1])
post["width"], _, height = dimensions.partition("x")
post["height"], _, duration = height.partition(", ")
post["duration"] = text.parse_float(duration[:-1])
post["filename"] = "{} - {}".format(post_id, post["tags"])
post["extension"] = ext
return post
@ -112,6 +114,7 @@ class PahealTagExtractor(PahealExtractor):
tags, data, date = data.split("\n")
dimensions, size, ext = data.split(" // ")
tags = text.unescape(tags)
width, _, height = dimensions.partition("x")
height, _, duration = height.partition(", ")
@ -119,9 +122,11 @@ class PahealTagExtractor(PahealExtractor):
"id": pid, "md5": md5, "file_url": url,
"width": width, "height": height,
"duration": text.parse_float(duration[:-1]),
"tags": text.unescape(tags),
"tags": tags,
"size": text.parse_bytes(size[:-1]),
"date": text.parse_datetime(date, "%B %d, %Y; %H:%M"),
"filename" : "{} - {}".format(pid, tags),
"extension": ext,
}
def _extract_data_ex(self, post):

View File

@ -12,8 +12,21 @@ __tests__ = (
"#url" : "https://rule34.paheal.net/post/list/Ayane_Suzuki/1",
"#category": ("shimmie2", "paheal", "tag"),
"#class" : paheal.PahealTagExtractor,
"#pattern" : r"https://[^.]+\.paheal\.net/_images/\w+/\d+%20-%20",
"#count" : ">= 15",
"#pattern" : "https://[^.]+\.paheal\.net/_images/\w+/\d+%20-%20|https://r34i\.paheal-cdn\.net/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}$",
"#count" : range(70, 200),
"date" : "type:datetime",
"extension": r"re:jpg|png",
"filename" : r"re:\d+ - \w+",
"duration" : float,
"height" : int,
"id" : int,
"md5" : r"re:[0-9a-f]{32}",
"search_tags": "Ayane_Suzuki",
"size" : int,
"tags" : str,
"width" : int,
},
{
@ -42,12 +55,12 @@ __tests__ = (
"#url" : "https://rule34.paheal.net/post/view/481609",
"#category": ("shimmie2", "paheal", "post"),
"#class" : paheal.PahealPostExtractor,
"#pattern" : r"https://tulip\.paheal\.net/_images/bbdc1c33410c2cdce7556c7990be26b7/481609%20-.+\.jpg",
"#urls" : "https://r34i.paheal-cdn.net/bb/dc/bbdc1c33410c2cdce7556c7990be26b7",
"#sha1_content": "7b924bcf150b352ac75c9d281d061e174c851a11",
"date" : "dt:2010-06-17 15:40:23",
"extension": "jpg",
"file_url" : r"re:https://tulip.paheal.net/_images/bbdc1c33410c",
"file_url" : "https://r34i.paheal-cdn.net/bb/dc/bbdc1c33410c2cdce7556c7990be26b7",
"filename" : "481609 - Ayumu_Kasuga Azumanga_Daioh inanimate Vuvuzela",
"height" : 660,
"id" : 481609,
@ -79,7 +92,7 @@ __tests__ = (
"#comment" : "video",
"#category": ("shimmie2", "paheal", "post"),
"#class" : paheal.PahealPostExtractor,
"#pattern" : r"https://[\w]+\.paheal\.net/_images/7629fc0ff77e32637dde5bf4f992b2cb/3864982%20-%20animated%20Metal_Gear%20Metal_Gear_Solid_V%20Quiet%20Vg_erotica%20webm\.webm",
"#urls" : "https://r34i.paheal-cdn.net/76/29/7629fc0ff77e32637dde5bf4f992b2cb",
"date" : "dt:2020-09-06 01:59:03",
"duration" : 30.0,