1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

[gelbooru_v02] unescape categorized tags

This commit is contained in:
Mike Fährmann 2024-10-10 17:30:55 +02:00
parent f9e76ba6eb
commit 2818973981
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
5 changed files with 7 additions and 5 deletions

View File

@ -97,6 +97,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
@staticmethod
def _prepare(post):
post["tags"] = post["tags"].strip()
post["date"] = text.parse_datetime(
post["created_at"], "%a %b %d %H:%M:%S %z %Y")
@ -114,7 +115,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
pattern = re.compile(
r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)", re.S)
for tag_type, tag_name in pattern.findall(tag_container):
tags[tag_type].append(text.unquote(tag_name))
tags[tag_type].append(text.unescape(text.unquote(tag_name)))
for key, value in tags.items():
post["tags_" + key] = " ".join(value)
@ -178,7 +179,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
pattern = re.compile(
r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)')
for tag_type, tag_name in pattern.findall(tag_container):
tags[tag_type].append(text.unquote(tag_name))
tags[tag_type].append(text.unescape(text.unquote(tag_name)))
for key, value in tags.items():
post["tags_" + key] = " ".join(value)

View File

@ -67,7 +67,7 @@ __tests__ = (
"score" : "",
"source" : "https://www.instagram.com/p/CwAO1UyJBnw",
"status" : "active",
"tags" : " 1girl asian bikini black_hair breasts cleavage female female_only floral_print instagram japanese kurita_emi large_breasts looking_at_viewer navel sauna short_hair side-tie_bikini sitting solo ",
"tags" : "1girl asian bikini black_hair breasts cleavage female female_only floral_print instagram japanese kurita_emi large_breasts looking_at_viewer navel sauna short_hair side-tie_bikini sitting solo",
"tags_copyright": "instagram",
"tags_general" : "1girl asian bikini black_hair breasts cleavage female female_only floral_print japanese large_breasts looking_at_viewer navel sauna short_hair side-tie_bikini sitting solo",
"tags_model" : "kurita_emi",

View File

@ -17,6 +17,7 @@ __tests__ = (
"#sha1_content": [
"5c6ae9ee13e6d4bc9cb8bdce224c84e67fbfa36c",
"622e80be3f496672c44aab5c47fbc6941c61bc79",
"1e0dced55bcb5eefe5cc32f69c7a8df35547b459",
],
},

View File

@ -55,7 +55,7 @@ __tests__ = (
"tags_artist" : "kawanakajima",
"tags_character": "heath_ledger ronald_mcdonald the_joker",
"tags_copyright": "dc_comics mcdonald's the_dark_knight",
"tags_general" : str,
"tags_metadata" : "parody tagme",
},
)

View File

@ -12,7 +12,7 @@ __tests__ = (
"#url" : "https://xbooru.com/index.php?page=post&s=list&tags=konoyan",
"#category": ("gelbooru_v02", "xbooru", "tag"),
"#class" : gelbooru_v02.GelbooruV02TagExtractor,
"#count" : 25,
"#count" : range(28, 40),
},
{