mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-24 19:52:32 +01:00
[newgrounds] fix metadata extraction (#6463)
- fix 'comment' metadata - fix 'following' extractor pattern - use own 'type' values, since 'og:type' is no longer available - update test results
This commit is contained in:
parent
50acf2ac84
commit
b069783578
@ -193,7 +193,8 @@ class NewgroundsExtractor(Extractor):
|
|||||||
data["_comment"] = extr(
|
data["_comment"] = extr(
|
||||||
'id="author_comments"', '</div>').partition(">")[2]
|
'id="author_comments"', '</div>').partition(">")[2]
|
||||||
data["comment"] = text.unescape(text.remove_html(
|
data["comment"] = text.unescape(text.remove_html(
|
||||||
data["_comment"], "", ""))
|
data["_comment"]
|
||||||
|
.replace("<p><br></p>", "\n\n").replace("<br>", "\n"), "", ""))
|
||||||
data["favorites"] = text.parse_int(extr(
|
data["favorites"] = text.parse_int(extr(
|
||||||
'id="faves_load">', '<').replace(",", ""))
|
'id="faves_load">', '<').replace(",", ""))
|
||||||
data["score"] = text.parse_float(extr('id="score_number">', '<'))
|
data["score"] = text.parse_float(extr('id="score_number">', '<'))
|
||||||
@ -214,7 +215,7 @@ class NewgroundsExtractor(Extractor):
|
|||||||
data = {
|
data = {
|
||||||
"title" : text.unescape(extr('"og:title" content="', '"')),
|
"title" : text.unescape(extr('"og:title" content="', '"')),
|
||||||
"description": text.unescape(extr(':description" content="', '"')),
|
"description": text.unescape(extr(':description" content="', '"')),
|
||||||
"type" : extr('og:type" content="', '"'),
|
"type" : "art",
|
||||||
"_type" : "i",
|
"_type" : "i",
|
||||||
"date" : text.parse_datetime(extr(
|
"date" : text.parse_datetime(extr(
|
||||||
'itemprop="datePublished" content="', '"')),
|
'itemprop="datePublished" content="', '"')),
|
||||||
@ -231,7 +232,7 @@ class NewgroundsExtractor(Extractor):
|
|||||||
if image_data:
|
if image_data:
|
||||||
data["_multi"] = self._extract_images_multi(image_data)
|
data["_multi"] = self._extract_images_multi(image_data)
|
||||||
else:
|
else:
|
||||||
art_images = extr('<div class="art-images', '\n</div>')
|
art_images = extr('<div class="art-images', '\n\t\t</div>')
|
||||||
if art_images:
|
if art_images:
|
||||||
data["_multi"] = self._extract_images_art(art_images, data)
|
data["_multi"] = self._extract_images_art(art_images, data)
|
||||||
|
|
||||||
@ -263,7 +264,7 @@ class NewgroundsExtractor(Extractor):
|
|||||||
return {
|
return {
|
||||||
"title" : text.unescape(extr('"og:title" content="', '"')),
|
"title" : text.unescape(extr('"og:title" content="', '"')),
|
||||||
"description": text.unescape(extr(':description" content="', '"')),
|
"description": text.unescape(extr(':description" content="', '"')),
|
||||||
"type" : extr('og:type" content="', '"'),
|
"type" : "audio",
|
||||||
"_type" : "a",
|
"_type" : "a",
|
||||||
"date" : text.parse_datetime(extr(
|
"date" : text.parse_datetime(extr(
|
||||||
'itemprop="datePublished" content="', '"')),
|
'itemprop="datePublished" content="', '"')),
|
||||||
@ -283,8 +284,13 @@ class NewgroundsExtractor(Extractor):
|
|||||||
if src:
|
if src:
|
||||||
src = src.replace("\\/", "/")
|
src = src.replace("\\/", "/")
|
||||||
formats = ()
|
formats = ()
|
||||||
|
type = extr(',"description":"', '"')
|
||||||
date = text.parse_datetime(extr(
|
date = text.parse_datetime(extr(
|
||||||
'itemprop="datePublished" content="', '"'))
|
'itemprop="datePublished" content="', '"'))
|
||||||
|
if type:
|
||||||
|
type = type.rpartition(" ")[2].lower()
|
||||||
|
else:
|
||||||
|
type = "flash" if text.ext_from_url(url) == "swf" else "game"
|
||||||
else:
|
else:
|
||||||
url = self.root + "/portal/video/" + index
|
url = self.root + "/portal/video/" + index
|
||||||
headers = {
|
headers = {
|
||||||
@ -295,6 +301,7 @@ class NewgroundsExtractor(Extractor):
|
|||||||
formats = self._video_formats(sources)
|
formats = self._video_formats(sources)
|
||||||
src = next(formats, "")
|
src = next(formats, "")
|
||||||
date = text.parse_timestamp(src.rpartition("?")[2])
|
date = text.parse_timestamp(src.rpartition("?")[2])
|
||||||
|
type = "movie"
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"title" : text.unescape(title),
|
"title" : text.unescape(title),
|
||||||
@ -513,7 +520,9 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
|
|||||||
class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
|
class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
|
||||||
"""Extractor for a newgrounds user's favorited users"""
|
"""Extractor for a newgrounds user's favorited users"""
|
||||||
subcategory = "following"
|
subcategory = "following"
|
||||||
pattern = USER_PATTERN + r"/favorites/(following)"
|
pattern = (USER_PATTERN + r"/favorites/(following)"
|
||||||
|
r"(?:(?:/page/|/?\?page=)(\d+))?")
|
||||||
|
|
||||||
example = "https://USER.newgrounds.com/favorites/following"
|
example = "https://USER.newgrounds.com/favorites/following"
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
|
@ -16,9 +16,9 @@ __tests__ = (
|
|||||||
"#sha1_content": "8f395e08333eb2457ba8d8b715238f8910221365",
|
"#sha1_content": "8f395e08333eb2457ba8d8b715238f8910221365",
|
||||||
|
|
||||||
"artist" : ["tomfulp"],
|
"artist" : ["tomfulp"],
|
||||||
"comment" : "Consider this the bottom threshold for scouted artists.In fact consider it BELOW the bottom threshold.",
|
"comment" : "Consider this the bottom threshold for scouted artists.\n\nIn fact consider it BELOW the bottom threshold.",
|
||||||
"date" : "dt:2009-06-04 14:44:05",
|
"date" : "dt:2009-06-04 14:44:05",
|
||||||
"description": "Consider this the bottom threshold for scouted artists. In fact consider it BELOW the bottom threshold. ",
|
"description": "",
|
||||||
"favorites" : int,
|
"favorites" : int,
|
||||||
"filename" : "1993615_4474_tomfulp_ryu-is-hawt.44f81090378ae9c257a5e46a8e17cc4d",
|
"filename" : "1993615_4474_tomfulp_ryu-is-hawt.44f81090378ae9c257a5e46a8e17cc4d",
|
||||||
"height" : 476,
|
"height" : 476,
|
||||||
@ -30,7 +30,7 @@ __tests__ = (
|
|||||||
"streetfighter",
|
"streetfighter",
|
||||||
],
|
],
|
||||||
"title" : "Ryu is Hawt",
|
"title" : "Ryu is Hawt",
|
||||||
"type" : "article",
|
"type" : "art",
|
||||||
"user" : "tomfulp",
|
"user" : "tomfulp",
|
||||||
"width" : 447,
|
"width" : 447,
|
||||||
},
|
},
|
||||||
@ -58,12 +58,13 @@ __tests__ = (
|
|||||||
"#comment" : "extra files in 'art-image-row' elements - WebP to GIF (#4642)",
|
"#comment" : "extra files in 'art-image-row' elements - WebP to GIF (#4642)",
|
||||||
"#category": ("", "newgrounds", "image"),
|
"#category": ("", "newgrounds", "image"),
|
||||||
"#class" : newgrounds.NewgroundsImageExtractor,
|
"#class" : newgrounds.NewgroundsImageExtractor,
|
||||||
|
"#auth" : True,
|
||||||
"#urls" : (
|
"#urls" : (
|
||||||
"https://art.ngfiles.com/images/5091000/5091275_45067_zedrinbot_untitled-5091275.0a9d27ed2bc265a7e89478ed6ad6f86f.gif?f1696187399",
|
"https://art.ngfiles.com/images/5091000/5091275_45067_zedrinbot_untitled-5091275.0a9d27ed2bc265a7e89478ed6ad6f86f.gif?f1696187399",
|
||||||
"https://art.ngfiles.com/images/5091000/5091275_45071_zedrinbot_untitled-5091275.6fdc62eaef43528fb1c9bda624d30a3d.gif?f1696187437",
|
"https://art.ngfiles.com/images/5091000/5091275_45071_zedrinbot_untitled-5091275.6fdc62eaef43528fb1c9bda624d30a3d.gif?f1696187436",
|
||||||
"https://art.ngfiles.com/images/5091000/5091275_45070_zedrinbot_untitled-5091275.0d7334746374465bd448908b88d1f810.gif?f1696187435",
|
"https://art.ngfiles.com/images/5091000/5091275_45070_zedrinbot_untitled-5091275.0d7334746374465bd448908b88d1f810.gif?f1696187434",
|
||||||
"https://art.ngfiles.com/images/5091000/5091275_45072_zedrinbot_untitled-5091275.6fdc62eaef43528fb1c9bda624d30a3d.gif?f1696187438",
|
"https://art.ngfiles.com/images/5091000/5091275_45072_zedrinbot_untitled-5091275.6fdc62eaef43528fb1c9bda624d30a3d.gif?f1696187437",
|
||||||
"https://art.ngfiles.com/images/5091000/5091275_45073_zedrinbot_untitled-5091275.20aa05c1cd22fd058e8c68ce58f5a302.gif?f1696187439",
|
"https://art.ngfiles.com/images/5091000/5091275_45073_zedrinbot_untitled-5091275.20aa05c1cd22fd058e8c68ce58f5a302.gif?f1696187437",
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
|
||||||
@ -90,7 +91,7 @@ __tests__ = (
|
|||||||
"#class" : newgrounds.NewgroundsImageExtractor,
|
"#class" : newgrounds.NewgroundsImageExtractor,
|
||||||
"#urls" : (
|
"#urls" : (
|
||||||
"https://art.ngfiles.com/images/5127000/5127150_93307_bacun_kill-la-kill-10th-anniversary.61adfe309bec342f9db55fd44397235b.png?f1697310027",
|
"https://art.ngfiles.com/images/5127000/5127150_93307_bacun_kill-la-kill-10th-anniversary.61adfe309bec342f9db55fd44397235b.png?f1697310027",
|
||||||
"https://art.ngfiles.com/images/5127000/5127150_94250_bacun_kill-la-kill-10th-anniversary.64fdf525fa38c1ab34defac4b354bc7a.png?f1697332109",
|
"https://art.ngfiles.com/images/5127000/5127150_94250_bacun_kill-la-kill-10th-anniversary.64fdf525fa38c1ab34defac4b354bc7a.webp?f1697332147",
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
|
||||||
@ -119,6 +120,7 @@ __tests__ = (
|
|||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://www.newgrounds.com/portal/view/595355",
|
"#url" : "https://www.newgrounds.com/portal/view/595355",
|
||||||
|
"#comment" : "video",
|
||||||
"#category": ("", "newgrounds", "media"),
|
"#category": ("", "newgrounds", "media"),
|
||||||
"#class" : newgrounds.NewgroundsMediaExtractor,
|
"#class" : newgrounds.NewgroundsMediaExtractor,
|
||||||
"#urls" : "https://uploads.ungrounded.net/alternate/564000/564957_alternate_31.mp4?1359712249",
|
"#urls" : "https://uploads.ungrounded.net/alternate/564000/564957_alternate_31.mp4?1359712249",
|
||||||
@ -163,6 +165,7 @@ __tests__ = (
|
|||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://www.newgrounds.com/audio/listen/609768",
|
"#url" : "https://www.newgrounds.com/audio/listen/609768",
|
||||||
|
"#comment" : "audio",
|
||||||
"#category": ("", "newgrounds", "media"),
|
"#category": ("", "newgrounds", "media"),
|
||||||
"#class" : newgrounds.NewgroundsMediaExtractor,
|
"#class" : newgrounds.NewgroundsMediaExtractor,
|
||||||
"#sha1_url": "f4c5490ae559a3b05e46821bb7ee834f93a43c95",
|
"#sha1_url": "f4c5490ae559a3b05e46821bb7ee834f93a43c95",
|
||||||
@ -171,9 +174,27 @@ __tests__ = (
|
|||||||
"zj",
|
"zj",
|
||||||
"tomfulp",
|
"tomfulp",
|
||||||
],
|
],
|
||||||
"comment" : r"""re:RECORDED 12-09-2014
|
"comment" : """\
|
||||||
|
RECORDED 12-09-2014
|
||||||
|
|
||||||
From The ZJ "Late """,
|
From The ZJ "Late Nite" Report at the University of Cincinnati!
|
||||||
|
|
||||||
|
ZJ gets to interview Tom Fulp, the founder of Newgrounds.com and the programmer behind classic games like Alien Hominid and Castle Crashers. Lots of cool stuff is talked about on here like game design, finding a way to market yourself on the modern web, and what Tom would do in the zombie apocalypse. It's a barrel of fun, so shut up and listen to it!
|
||||||
|
|
||||||
|
See more ZJ Report:
|
||||||
|
|
||||||
|
Twitter: @ZJReport
|
||||||
|
|
||||||
|
Facebook: Facebook.com/ZJReport
|
||||||
|
|
||||||
|
NOTE:
|
||||||
|
|
||||||
|
If this version of this interview offends your ears, there's a different one on Soundcloud. That original file was lost somehow, so I tried recreating it as best as I can, but I understand that there are still some differences...
|
||||||
|
|
||||||
|
https://soundcloud.com/the-zj-late-nite-report/the-zj-late-nite-report-extra-tom-fulp-interview
|
||||||
|
|
||||||
|
Also wanna give a big shout-out to by by Zachary (Zachary.newgrounds.com) for providing the intro and outro music on this thing.\
|
||||||
|
""",
|
||||||
"date" : "dt:2015-02-23 19:31:59",
|
"date" : "dt:2015-02-23 19:31:59",
|
||||||
"description": "From The ZJ Report Show!",
|
"description": "From The ZJ Report Show!",
|
||||||
"favorites" : int,
|
"favorites" : int,
|
||||||
@ -187,7 +208,7 @@ From The ZJ "Late """,
|
|||||||
"zj",
|
"zj",
|
||||||
],
|
],
|
||||||
"title" : "ZJ Interviews Tom Fulp!",
|
"title" : "ZJ Interviews Tom Fulp!",
|
||||||
"type" : "music.song",
|
"type" : "audio",
|
||||||
"user" : "zj",
|
"user" : "zj",
|
||||||
},
|
},
|
||||||
|
|
||||||
@ -203,7 +224,7 @@ From The ZJ "Late """,
|
|||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://www.newgrounds.com/portal/view/758545",
|
"#url" : "https://www.newgrounds.com/portal/view/758545",
|
||||||
"#comment" : "format selection (#1729)",
|
"#comment" : "video format selection (#1729)",
|
||||||
"#category": ("", "newgrounds", "media"),
|
"#category": ("", "newgrounds", "media"),
|
||||||
"#class" : newgrounds.NewgroundsMediaExtractor,
|
"#class" : newgrounds.NewgroundsMediaExtractor,
|
||||||
"#options" : {"format": "720p"},
|
"#options" : {"format": "720p"},
|
||||||
@ -235,14 +256,6 @@ From The ZJ "Late """,
|
|||||||
"animalspeakandrews",
|
"animalspeakandrews",
|
||||||
"bill",
|
"bill",
|
||||||
"chipollo",
|
"chipollo",
|
||||||
"dylz49",
|
|
||||||
"gappyshamp",
|
|
||||||
"pinktophat",
|
|
||||||
"rad",
|
|
||||||
"shapeshiftingblob",
|
|
||||||
"tomfulp",
|
|
||||||
"voicesbycorey",
|
|
||||||
"psychogoldfish",
|
|
||||||
],
|
],
|
||||||
"comment" : r"re:The children are expendable. Take out the ",
|
"comment" : r"re:The children are expendable. Take out the ",
|
||||||
"date" : "dt:2022-01-10 23:00:57",
|
"date" : "dt:2022-01-10 23:00:57",
|
||||||
@ -268,7 +281,7 @@ From The ZJ "Late """,
|
|||||||
{
|
{
|
||||||
"#url" : "https://tomfulp.newgrounds.com/art",
|
"#url" : "https://tomfulp.newgrounds.com/art",
|
||||||
"#class" : newgrounds.NewgroundsArtExtractor,
|
"#class" : newgrounds.NewgroundsArtExtractor,
|
||||||
"#pattern" : newgrounds.NewgroundsImageExtractor.pattern,
|
"#pattern" : r"https://(art.ngfiles.com/images/\d+|uploads.ungrounded.net/tmp/img/)",
|
||||||
"#count" : ">= 3",
|
"#count" : ">= 3",
|
||||||
},
|
},
|
||||||
|
|
||||||
@ -310,9 +323,11 @@ From The ZJ "Late """,
|
|||||||
{
|
{
|
||||||
"#url" : "https://tomfulp.newgrounds.com/games",
|
"#url" : "https://tomfulp.newgrounds.com/games",
|
||||||
"#class" : newgrounds.NewgroundsGamesExtractor,
|
"#class" : newgrounds.NewgroundsGamesExtractor,
|
||||||
"#pattern" : r"https://uploads.ungrounded.net(/alternate)?/(\d+/\d+_.+|tmp/.+)",
|
"#pattern" : r"https://(uploads.ungrounded.net(/alternate)?/(\d+/\d+_.+|tmp/.+)|img.ngfiles.com/)",
|
||||||
"#range" : "1-10",
|
"#range" : "1-10",
|
||||||
"#count" : 10,
|
"#count" : 10,
|
||||||
|
|
||||||
|
"type": {"archive", "game"},
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user