1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-24 19:52:32 +01:00

[newgrounds] fix metadata extraction (#6463)

- fix 'comment' metadata
- fix 'following' extractor pattern
- use own 'type' values, since 'og:type' is no longer available
- update test results
This commit is contained in:
Mike Fährmann 2024-11-17 21:40:29 +01:00
parent 50acf2ac84
commit b069783578
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 51 additions and 27 deletions

View File

@ -193,7 +193,8 @@ class NewgroundsExtractor(Extractor):
data["_comment"] = extr( data["_comment"] = extr(
'id="author_comments"', '</div>').partition(">")[2] 'id="author_comments"', '</div>').partition(">")[2]
data["comment"] = text.unescape(text.remove_html( data["comment"] = text.unescape(text.remove_html(
data["_comment"], "", "")) data["_comment"]
.replace("<p><br></p>", "\n\n").replace("<br>", "\n"), "", ""))
data["favorites"] = text.parse_int(extr( data["favorites"] = text.parse_int(extr(
'id="faves_load">', '<').replace(",", "")) 'id="faves_load">', '<').replace(",", ""))
data["score"] = text.parse_float(extr('id="score_number">', '<')) data["score"] = text.parse_float(extr('id="score_number">', '<'))
@ -214,7 +215,7 @@ class NewgroundsExtractor(Extractor):
data = { data = {
"title" : text.unescape(extr('"og:title" content="', '"')), "title" : text.unescape(extr('"og:title" content="', '"')),
"description": text.unescape(extr(':description" content="', '"')), "description": text.unescape(extr(':description" content="', '"')),
"type" : extr('og:type" content="', '"'), "type" : "art",
"_type" : "i", "_type" : "i",
"date" : text.parse_datetime(extr( "date" : text.parse_datetime(extr(
'itemprop="datePublished" content="', '"')), 'itemprop="datePublished" content="', '"')),
@ -231,7 +232,7 @@ class NewgroundsExtractor(Extractor):
if image_data: if image_data:
data["_multi"] = self._extract_images_multi(image_data) data["_multi"] = self._extract_images_multi(image_data)
else: else:
art_images = extr('<div class="art-images', '\n</div>') art_images = extr('<div class="art-images', '\n\t\t</div>')
if art_images: if art_images:
data["_multi"] = self._extract_images_art(art_images, data) data["_multi"] = self._extract_images_art(art_images, data)
@ -263,7 +264,7 @@ class NewgroundsExtractor(Extractor):
return { return {
"title" : text.unescape(extr('"og:title" content="', '"')), "title" : text.unescape(extr('"og:title" content="', '"')),
"description": text.unescape(extr(':description" content="', '"')), "description": text.unescape(extr(':description" content="', '"')),
"type" : extr('og:type" content="', '"'), "type" : "audio",
"_type" : "a", "_type" : "a",
"date" : text.parse_datetime(extr( "date" : text.parse_datetime(extr(
'itemprop="datePublished" content="', '"')), 'itemprop="datePublished" content="', '"')),
@ -283,8 +284,13 @@ class NewgroundsExtractor(Extractor):
if src: if src:
src = src.replace("\\/", "/") src = src.replace("\\/", "/")
formats = () formats = ()
type = extr(',"description":"', '"')
date = text.parse_datetime(extr( date = text.parse_datetime(extr(
'itemprop="datePublished" content="', '"')) 'itemprop="datePublished" content="', '"'))
if type:
type = type.rpartition(" ")[2].lower()
else:
type = "flash" if text.ext_from_url(url) == "swf" else "game"
else: else:
url = self.root + "/portal/video/" + index url = self.root + "/portal/video/" + index
headers = { headers = {
@ -295,6 +301,7 @@ class NewgroundsExtractor(Extractor):
formats = self._video_formats(sources) formats = self._video_formats(sources)
src = next(formats, "") src = next(formats, "")
date = text.parse_timestamp(src.rpartition("?")[2]) date = text.parse_timestamp(src.rpartition("?")[2])
type = "movie"
return { return {
"title" : text.unescape(title), "title" : text.unescape(title),
@ -513,7 +520,9 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor): class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
"""Extractor for a newgrounds user's favorited users""" """Extractor for a newgrounds user's favorited users"""
subcategory = "following" subcategory = "following"
pattern = USER_PATTERN + r"/favorites/(following)" pattern = (USER_PATTERN + r"/favorites/(following)"
r"(?:(?:/page/|/?\?page=)(\d+))?")
example = "https://USER.newgrounds.com/favorites/following" example = "https://USER.newgrounds.com/favorites/following"
def items(self): def items(self):

View File

@ -16,9 +16,9 @@ __tests__ = (
"#sha1_content": "8f395e08333eb2457ba8d8b715238f8910221365", "#sha1_content": "8f395e08333eb2457ba8d8b715238f8910221365",
"artist" : ["tomfulp"], "artist" : ["tomfulp"],
"comment" : "Consider this the bottom threshold for scouted artists.In fact consider it BELOW the bottom threshold.", "comment" : "Consider this the bottom threshold for scouted artists.\n\nIn fact consider it BELOW the bottom threshold.",
"date" : "dt:2009-06-04 14:44:05", "date" : "dt:2009-06-04 14:44:05",
"description": "Consider this the bottom threshold for scouted artists. In fact consider it BELOW the bottom threshold. ", "description": "",
"favorites" : int, "favorites" : int,
"filename" : "1993615_4474_tomfulp_ryu-is-hawt.44f81090378ae9c257a5e46a8e17cc4d", "filename" : "1993615_4474_tomfulp_ryu-is-hawt.44f81090378ae9c257a5e46a8e17cc4d",
"height" : 476, "height" : 476,
@ -30,7 +30,7 @@ __tests__ = (
"streetfighter", "streetfighter",
], ],
"title" : "Ryu is Hawt", "title" : "Ryu is Hawt",
"type" : "article", "type" : "art",
"user" : "tomfulp", "user" : "tomfulp",
"width" : 447, "width" : 447,
}, },
@ -58,12 +58,13 @@ __tests__ = (
"#comment" : "extra files in 'art-image-row' elements - WebP to GIF (#4642)", "#comment" : "extra files in 'art-image-row' elements - WebP to GIF (#4642)",
"#category": ("", "newgrounds", "image"), "#category": ("", "newgrounds", "image"),
"#class" : newgrounds.NewgroundsImageExtractor, "#class" : newgrounds.NewgroundsImageExtractor,
"#auth" : True,
"#urls" : ( "#urls" : (
"https://art.ngfiles.com/images/5091000/5091275_45067_zedrinbot_untitled-5091275.0a9d27ed2bc265a7e89478ed6ad6f86f.gif?f1696187399", "https://art.ngfiles.com/images/5091000/5091275_45067_zedrinbot_untitled-5091275.0a9d27ed2bc265a7e89478ed6ad6f86f.gif?f1696187399",
"https://art.ngfiles.com/images/5091000/5091275_45071_zedrinbot_untitled-5091275.6fdc62eaef43528fb1c9bda624d30a3d.gif?f1696187437", "https://art.ngfiles.com/images/5091000/5091275_45071_zedrinbot_untitled-5091275.6fdc62eaef43528fb1c9bda624d30a3d.gif?f1696187436",
"https://art.ngfiles.com/images/5091000/5091275_45070_zedrinbot_untitled-5091275.0d7334746374465bd448908b88d1f810.gif?f1696187435", "https://art.ngfiles.com/images/5091000/5091275_45070_zedrinbot_untitled-5091275.0d7334746374465bd448908b88d1f810.gif?f1696187434",
"https://art.ngfiles.com/images/5091000/5091275_45072_zedrinbot_untitled-5091275.6fdc62eaef43528fb1c9bda624d30a3d.gif?f1696187438", "https://art.ngfiles.com/images/5091000/5091275_45072_zedrinbot_untitled-5091275.6fdc62eaef43528fb1c9bda624d30a3d.gif?f1696187437",
"https://art.ngfiles.com/images/5091000/5091275_45073_zedrinbot_untitled-5091275.20aa05c1cd22fd058e8c68ce58f5a302.gif?f1696187439", "https://art.ngfiles.com/images/5091000/5091275_45073_zedrinbot_untitled-5091275.20aa05c1cd22fd058e8c68ce58f5a302.gif?f1696187437",
), ),
}, },
@ -90,7 +91,7 @@ __tests__ = (
"#class" : newgrounds.NewgroundsImageExtractor, "#class" : newgrounds.NewgroundsImageExtractor,
"#urls" : ( "#urls" : (
"https://art.ngfiles.com/images/5127000/5127150_93307_bacun_kill-la-kill-10th-anniversary.61adfe309bec342f9db55fd44397235b.png?f1697310027", "https://art.ngfiles.com/images/5127000/5127150_93307_bacun_kill-la-kill-10th-anniversary.61adfe309bec342f9db55fd44397235b.png?f1697310027",
"https://art.ngfiles.com/images/5127000/5127150_94250_bacun_kill-la-kill-10th-anniversary.64fdf525fa38c1ab34defac4b354bc7a.png?f1697332109", "https://art.ngfiles.com/images/5127000/5127150_94250_bacun_kill-la-kill-10th-anniversary.64fdf525fa38c1ab34defac4b354bc7a.webp?f1697332147",
), ),
}, },
@ -119,6 +120,7 @@ __tests__ = (
{ {
"#url" : "https://www.newgrounds.com/portal/view/595355", "#url" : "https://www.newgrounds.com/portal/view/595355",
"#comment" : "video",
"#category": ("", "newgrounds", "media"), "#category": ("", "newgrounds", "media"),
"#class" : newgrounds.NewgroundsMediaExtractor, "#class" : newgrounds.NewgroundsMediaExtractor,
"#urls" : "https://uploads.ungrounded.net/alternate/564000/564957_alternate_31.mp4?1359712249", "#urls" : "https://uploads.ungrounded.net/alternate/564000/564957_alternate_31.mp4?1359712249",
@ -163,6 +165,7 @@ __tests__ = (
{ {
"#url" : "https://www.newgrounds.com/audio/listen/609768", "#url" : "https://www.newgrounds.com/audio/listen/609768",
"#comment" : "audio",
"#category": ("", "newgrounds", "media"), "#category": ("", "newgrounds", "media"),
"#class" : newgrounds.NewgroundsMediaExtractor, "#class" : newgrounds.NewgroundsMediaExtractor,
"#sha1_url": "f4c5490ae559a3b05e46821bb7ee834f93a43c95", "#sha1_url": "f4c5490ae559a3b05e46821bb7ee834f93a43c95",
@ -171,9 +174,27 @@ __tests__ = (
"zj", "zj",
"tomfulp", "tomfulp",
], ],
"comment" : r"""re:RECORDED 12-09-2014 "comment" : """\
RECORDED 12-09-2014
From The ZJ "Late """, From The ZJ "Late Nite" Report at the University of Cincinnati!
ZJ gets to interview Tom Fulp, the founder of Newgrounds.com and the programmer behind classic games like Alien Hominid and Castle Crashers. Lots of cool stuff is talked about on here like game design, finding a way to market yourself on the modern web, and what Tom would do in the zombie apocalypse. It's a barrel of fun, so shut up and listen to it!
See more ZJ Report:
Twitter: @ZJReport
Facebook: Facebook.com/ZJReport
NOTE:
If this version of this interview offends your ears, there's a different one on Soundcloud. That original file was lost somehow, so I tried recreating it as best as I can, but I understand that there are still some differences...
https://soundcloud.com/the-zj-late-nite-report/the-zj-late-nite-report-extra-tom-fulp-interview
Also wanna give a big shout-out to by by Zachary (Zachary.newgrounds.com) for providing the intro and outro music on this thing.\
""",
"date" : "dt:2015-02-23 19:31:59", "date" : "dt:2015-02-23 19:31:59",
"description": "From The ZJ Report Show!", "description": "From The ZJ Report Show!",
"favorites" : int, "favorites" : int,
@ -187,7 +208,7 @@ From The ZJ "Late """,
"zj", "zj",
], ],
"title" : "ZJ Interviews Tom Fulp!", "title" : "ZJ Interviews Tom Fulp!",
"type" : "music.song", "type" : "audio",
"user" : "zj", "user" : "zj",
}, },
@ -203,7 +224,7 @@ From The ZJ "Late """,
{ {
"#url" : "https://www.newgrounds.com/portal/view/758545", "#url" : "https://www.newgrounds.com/portal/view/758545",
"#comment" : "format selection (#1729)", "#comment" : "video format selection (#1729)",
"#category": ("", "newgrounds", "media"), "#category": ("", "newgrounds", "media"),
"#class" : newgrounds.NewgroundsMediaExtractor, "#class" : newgrounds.NewgroundsMediaExtractor,
"#options" : {"format": "720p"}, "#options" : {"format": "720p"},
@ -235,14 +256,6 @@ From The ZJ "Late """,
"animalspeakandrews", "animalspeakandrews",
"bill", "bill",
"chipollo", "chipollo",
"dylz49",
"gappyshamp",
"pinktophat",
"rad",
"shapeshiftingblob",
"tomfulp",
"voicesbycorey",
"psychogoldfish",
], ],
"comment" : r"re:The children are expendable. Take out the ", "comment" : r"re:The children are expendable. Take out the ",
"date" : "dt:2022-01-10 23:00:57", "date" : "dt:2022-01-10 23:00:57",
@ -268,7 +281,7 @@ From The ZJ "Late """,
{ {
"#url" : "https://tomfulp.newgrounds.com/art", "#url" : "https://tomfulp.newgrounds.com/art",
"#class" : newgrounds.NewgroundsArtExtractor, "#class" : newgrounds.NewgroundsArtExtractor,
"#pattern" : newgrounds.NewgroundsImageExtractor.pattern, "#pattern" : r"https://(art.ngfiles.com/images/\d+|uploads.ungrounded.net/tmp/img/)",
"#count" : ">= 3", "#count" : ">= 3",
}, },
@ -310,9 +323,11 @@ From The ZJ "Late """,
{ {
"#url" : "https://tomfulp.newgrounds.com/games", "#url" : "https://tomfulp.newgrounds.com/games",
"#class" : newgrounds.NewgroundsGamesExtractor, "#class" : newgrounds.NewgroundsGamesExtractor,
"#pattern" : r"https://uploads.ungrounded.net(/alternate)?/(\d+/\d+_.+|tmp/.+)", "#pattern" : r"https://(uploads.ungrounded.net(/alternate)?/(\d+/\d+_.+|tmp/.+)|img.ngfiles.com/)",
"#range" : "1-10", "#range" : "1-10",
"#count" : 10, "#count" : 10,
"type": {"archive", "game"},
}, },
{ {