From f9544194c08862d5cfb6127f76fe188cb16f024a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 26 Dec 2023 16:09:26 +0100 Subject: [PATCH] [paheal] restore 'extension' metadata (#4976) --- gallery_dl/extractor/paheal.py | 13 +++++++++---- test/results/paheal.py | 23 ++++++++++++++++++----- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py index 0389ead9..89c0d2f7 100644 --- a/gallery_dl/extractor/paheal.py +++ b/gallery_dl/extractor/paheal.py @@ -32,7 +32,7 @@ class PahealExtractor(Extractor): post["tags"] = text.unquote(post["tags"]) post.update(data) yield Message.Directory, post - yield Message.Url, url, text.nameext_from_url(url, post) + yield Message.Url, url, post def get_metadata(self): """Return general metadata""" @@ -59,11 +59,13 @@ class PahealExtractor(Extractor): extr(">Source Link<", ""), "href='", "'")), } - dimensions, size, ext = extr("Info", ">").split(" // ") - post["width"], _, height = dimensions.partition("x") + dimensions, size, ext = extr("Info", "<").split(" // ") post["size"] = text.parse_bytes(size[:-1]) + post["width"], _, height = dimensions.partition("x") post["height"], _, duration = height.partition(", ") post["duration"] = text.parse_float(duration[:-1]) + post["filename"] = "{} - {}".format(post_id, post["tags"]) + post["extension"] = ext return post @@ -112,6 +114,7 @@ class PahealTagExtractor(PahealExtractor): tags, data, date = data.split("\n") dimensions, size, ext = data.split(" // ") + tags = text.unescape(tags) width, _, height = dimensions.partition("x") height, _, duration = height.partition(", ") @@ -119,9 +122,11 @@ class PahealTagExtractor(PahealExtractor): "id": pid, "md5": md5, "file_url": url, "width": width, "height": height, "duration": text.parse_float(duration[:-1]), - "tags": text.unescape(tags), + "tags": tags, "size": text.parse_bytes(size[:-1]), "date": text.parse_datetime(date, "%B %d, %Y; %H:%M"), + "filename" : "{} - {}".format(pid, tags), + "extension": ext, } def _extract_data_ex(self, post): diff --git a/test/results/paheal.py b/test/results/paheal.py index 833f3f84..1772593b 100644 --- a/test/results/paheal.py +++ b/test/results/paheal.py @@ -12,8 +12,21 @@ __tests__ = ( "#url" : "https://rule34.paheal.net/post/list/Ayane_Suzuki/1", "#category": ("shimmie2", "paheal", "tag"), "#class" : paheal.PahealTagExtractor, - "#pattern" : r"https://[^.]+\.paheal\.net/_images/\w+/\d+%20-%20", - "#count" : ">= 15", + "#pattern" : "https://[^.]+\.paheal\.net/_images/\w+/\d+%20-%20|https://r34i\.paheal-cdn\.net/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}$", + "#count" : range(70, 200), + + "date" : "type:datetime", + "extension": r"re:jpg|png", + "filename" : r"re:\d+ - \w+", + "duration" : float, + "height" : int, + "id" : int, + "md5" : r"re:[0-9a-f]{32}", + "search_tags": "Ayane_Suzuki", + "size" : int, + "tags" : str, + "width" : int, + }, { @@ -42,12 +55,12 @@ __tests__ = ( "#url" : "https://rule34.paheal.net/post/view/481609", "#category": ("shimmie2", "paheal", "post"), "#class" : paheal.PahealPostExtractor, - "#pattern" : r"https://tulip\.paheal\.net/_images/bbdc1c33410c2cdce7556c7990be26b7/481609%20-.+\.jpg", + "#urls" : "https://r34i.paheal-cdn.net/bb/dc/bbdc1c33410c2cdce7556c7990be26b7", "#sha1_content": "7b924bcf150b352ac75c9d281d061e174c851a11", "date" : "dt:2010-06-17 15:40:23", "extension": "jpg", - "file_url" : r"re:https://tulip.paheal.net/_images/bbdc1c33410c", + "file_url" : "https://r34i.paheal-cdn.net/bb/dc/bbdc1c33410c2cdce7556c7990be26b7", "filename" : "481609 - Ayumu_Kasuga Azumanga_Daioh inanimate Vuvuzela", "height" : 660, "id" : 481609, @@ -79,7 +92,7 @@ __tests__ = ( "#comment" : "video", "#category": ("shimmie2", "paheal", "post"), "#class" : paheal.PahealPostExtractor, - "#pattern" : r"https://[\w]+\.paheal\.net/_images/7629fc0ff77e32637dde5bf4f992b2cb/3864982%20-%20animated%20Metal_Gear%20Metal_Gear_Solid_V%20Quiet%20Vg_erotica%20webm\.webm", + "#urls" : "https://r34i.paheal-cdn.net/76/29/7629fc0ff77e32637dde5bf4f992b2cb", "date" : "dt:2020-09-06 01:59:03", "duration" : 30.0,