# -*- coding: utf-8 -*- # Copyright 2016-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://seiga.nicovideo.jp/""" from .common import Extractor, Message from .. import text, util, exception from ..cache import cache class SeigaExtractor(Extractor): """Base class for seiga extractors""" category = "seiga" archive_fmt = "{image_id}" cookies_domain = ".nicovideo.jp" cookies_names = ("user_session",) root = "https://seiga.nicovideo.jp" def __init__(self, match): Extractor.__init__(self, match) self.start_image = 0 def items(self): self.login() images = iter(self.get_images()) data = next(images) yield Message.Directory, data for image in util.advance(images, self.start_image): data.update(image) data["extension"] = None yield Message.Url, self.get_image_url(data["image_id"]), data def get_images(self): """Return iterable containing metadata and images""" def get_image_url(self, image_id): """Get url for an image with id 'image_id'""" url = "{}/image/source/{}".format(self.root, image_id) response = self.request( url, method="HEAD", allow_redirects=False, notfound="image") location = response.headers["location"] if "nicovideo.jp/login" in location: raise exception.StopExtraction( "HTTP redirect to login page (%s)", location.partition("?")[0]) return location.replace("/o/", "/priv/", 1) def login(self): if self.cookies_check(self.cookies_names): return username, password = self._get_auth_info() if username: return self.cookies_update(self._login_impl(username, password)) raise exception.AuthorizationError( "username & password or 'user_session' cookie required") @cache(maxage=365*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) root = "https://account.nicovideo.jp" response = self.request(root + "/login?site=seiga") page = response.text data = { "mail_tel": username, "password": password, } url = root + text.unescape(text.extr(page, '