# -*- coding: utf-8 -*- # Copyright 2018-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://www.newgrounds.com/""" from .common import Extractor, Message from .. import text, util, exception from ..cache import cache import itertools import re BASE_PATTERN = r"(?:https?://)?(?:www\.)?newgrounds\.com" USER_PATTERN = r"(?:https?://)?([\w-]+)\.newgrounds\.com" class NewgroundsExtractor(Extractor): """Base class for newgrounds extractors""" category = "newgrounds" directory_fmt = ("{category}", "{artist[:10]:J, }") filename_fmt = "{category}_{_index}_{title}.{extension}" archive_fmt = "{_type}{_index}" root = "https://www.newgrounds.com" cookies_domain = ".newgrounds.com" cookies_names = ("NG_GG_username", "vmk1du5I8m") request_interval = (0.5, 1.5) def __init__(self, match): Extractor.__init__(self, match) self.user = match.group(1) self.user_root = "https://{}.newgrounds.com".format(self.user) def _init(self): self._extract_comment_urls = re.compile( r'(?:= 0: msg = text.extract(page, 'class="highlight">', '<', pos)[0] self.log.warning('"%s"', msg) return {} if response.status_code >= 400: return {} extr = text.extract_from(page) data = extract_data(extr, post_url) data["_comment"] = extr( 'id="author_comments"', '').partition(">")[2] data["comment"] = text.unescape(text.remove_html( data["_comment"], "", "")) data["favorites"] = text.parse_int(extr( 'id="faves_load">', '<').replace(",", "")) data["score"] = text.parse_float(extr('id="score_number">', '<')) data["tags"] = text.split_html(extr('
', '
')) data["artist"] = [ text.extr(user, '//', '.') for user in text.extract_iter(page, '
', '>') ] data["tags"].sort() data["user"] = self.user or data["artist"][0] data["post_url"] = post_url return data def _extract_image_data(self, extr, url): full = text.extract_from(util.json_loads(extr( '"full_image_text":', '});'))) data = { "title" : text.unescape(extr('"og:title" content="', '"')), "description": text.unescape(extr(':description" content="', '"')), "type" : extr('og:type" content="', '"'), "_type" : "i", "date" : text.parse_datetime(extr( 'itemprop="datePublished" content="', '"')), "rating" : extr('class="rated-', '"'), "url" : full('src="', '"'), "width" : text.parse_int(full('width="', '"')), "height" : text.parse_int(full('height="', '"')), } index = data["url"].rpartition("/")[2].partition("_")[0] data["index"] = text.parse_int(index) data["_index"] = index image_data = extr("let imageData =", "\n];") if image_data: data["_multi"] = self._extract_images_multi(image_data) else: art_images = extr('