1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-23 03:02:50 +01:00

[newgrounds] extract image embeds (closes #1033)

This commit is contained in:
Mike Fährmann 2020-10-08 22:05:44 +02:00
parent 43b156fb40
commit c5e3971b18
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -19,8 +19,8 @@ class NewgroundsExtractor(Extractor):
"""Base class for newgrounds extractors""" """Base class for newgrounds extractors"""
category = "newgrounds" category = "newgrounds"
directory_fmt = ("{category}", "{artist[:10]:J, }") directory_fmt = ("{category}", "{artist[:10]:J, }")
filename_fmt = "{category}_{index}_{title}.{extension}" filename_fmt = "{category}_{_index}_{title}.{extension}"
archive_fmt = "{index}" archive_fmt = "{_index}"
root = "https://www.newgrounds.com" root = "https://www.newgrounds.com"
cookiedomain = ".newgrounds.com" cookiedomain = ".newgrounds.com"
cookienames = ("NG_GG_username", "vmk1du5I8m") cookienames = ("NG_GG_username", "vmk1du5I8m")
@ -44,6 +44,13 @@ class NewgroundsExtractor(Extractor):
if url: if url:
yield Message.Directory, post yield Message.Directory, post
yield Message.Url, url, text.nameext_from_url(url, post) yield Message.Url, url, text.nameext_from_url(url, post)
for num, url in enumerate(text.extract_iter(
post["_comment"], 'data-smartload-src="', '"'), 1):
post["num"] = num
post["_index"] = "{}_{:>02}".format(post["index"], num)
text.nameext_from_url(url, post)
yield Message.Url, url, post
else: else:
self.log.warning( self.log.warning(
"Unable to get download URL for '%s'", post_url) "Unable to get download URL for '%s'", post_url)
@ -97,8 +104,9 @@ class NewgroundsExtractor(Extractor):
else: else:
data = self._extract_media_data(extr, post_url) data = self._extract_media_data(extr, post_url)
data["comment"] = text.unescape(text.remove_html(extr( data["_comment"] = extr('id="author_comments"', '</div>')
'id="author_comments"', '</div>').partition(">")[2], "", "")) data["comment"] = text.unescape(text.remove_html(
data["_comment"].partition(">")[2], "", ""))
data["favorites"] = text.parse_int(extr( data["favorites"] = text.parse_int(extr(
'id="faves_load">', '<').replace(",", "")) 'id="faves_load">', '<').replace(",", ""))
data["score"] = text.parse_float(extr('id="score_number">', '<')) data["score"] = text.parse_float(extr('id="score_number">', '<'))
@ -125,19 +133,22 @@ class NewgroundsExtractor(Extractor):
"width" : text.parse_int(full('width="', '"')), "width" : text.parse_int(full('width="', '"')),
"height" : text.parse_int(full('height="', '"')), "height" : text.parse_int(full('height="', '"')),
} }
data["index"] = text.parse_int( index = data["url"].rpartition("/")[2].partition("_")[0]
data["url"].rpartition("/")[2].partition("_")[0]) data["index"] = text.parse_int(index)
data["_index"] = index
return data return data
@staticmethod @staticmethod
def _extract_audio_data(extr, url): def _extract_audio_data(extr, url):
index = url.split("/")[5]
return { return {
"title" : text.unescape(extr('"og:title" content="', '"')), "title" : text.unescape(extr('"og:title" content="', '"')),
"description": text.unescape(extr(':description" content="', '"')), "description": text.unescape(extr(':description" content="', '"')),
"date" : text.parse_datetime(extr( "date" : text.parse_datetime(extr(
'itemprop="datePublished" content="', '"')), 'itemprop="datePublished" content="', '"')),
"url" : extr('{"url":"', '"').replace("\\/", "/"), "url" : extr('{"url":"', '"').replace("\\/", "/"),
"index" : text.parse_int(url.split("/")[5]), "index" : text.parse_int(index),
"_index" : index,
"rating" : "", "rating" : "",
} }
@ -169,6 +180,7 @@ class NewgroundsExtractor(Extractor):
'itemprop="description" content="', '"')), 'itemprop="description" content="', '"')),
"rating" : extr('class="rated-', '"'), "rating" : extr('class="rated-', '"'),
"index" : text.parse_int(index), "index" : text.parse_int(index),
"_index" : index,
} }
def _pagination(self, kind): def _pagination(self, kind):
@ -232,6 +244,10 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
("https://art.ngfiles.com/images/0/94_tomfulp_ryu-is-hawt.gif", { ("https://art.ngfiles.com/images/0/94_tomfulp_ryu-is-hawt.gif", {
"url": "57f182bcbbf2612690c3a54f16ffa1da5105245e", "url": "57f182bcbbf2612690c3a54f16ffa1da5105245e",
}), }),
("https://www.newgrounds.com/art/view/sailoryon/yon-dream-buster", {
"url": "84eec95e663041a80630df72719f231e157e5f5d",
"count": 2,
})
) )
def __init__(self, match): def __init__(self, match):