# -*- coding: utf-8 -*- # Copyright 2016-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://seiga.nicovideo.jp/""" from .common import Extractor, Message from .. import text, util, exception class SeigaExtractor(Extractor): """Base class for seiga extractors""" category = "seiga" archive_fmt = "{image_id}" cookies_domain = ".nicovideo.jp" root = "https://seiga.nicovideo.jp" def __init__(self, match): Extractor.__init__(self, match) self.start_image = 0 def items(self): if not self.cookies_check(("user_session",)): raise exception.StopExtraction("'user_session' cookie required") images = iter(self.get_images()) data = next(images) yield Message.Directory, data for image in util.advance(images, self.start_image): data.update(image) data["extension"] = None yield Message.Url, self.get_image_url(data["image_id"]), data def get_images(self): """Return iterable containing metadata and images""" def get_image_url(self, image_id): """Get url for an image with id 'image_id'""" url = "{}/image/source/{}".format(self.root, image_id) response = self.request( url, method="HEAD", allow_redirects=False, notfound="image") location = response.headers["location"] if "nicovideo.jp/login" in location: raise exception.StopExtraction( "HTTP redirect to login page (%s)", location.partition("?")[0]) return location.replace("/o/", "/priv/", 1) class SeigaUserExtractor(SeigaExtractor): """Extractor for images of a user from seiga.nicovideo.jp""" subcategory = "user" directory_fmt = ("{category}", "{user[id]}") filename_fmt = "{category}_{user[id]}_{image_id}.{extension}" pattern = (r"(?:https?://)?(?:www\.|(?:sp\.)?seiga\.)?nicovideo\.jp/" r"user/illust/(\d+)(?:\?(?:[^&]+&)*sort=([^]+))?") example = "https://seiga.nicovideo.jp/user/illust/12345" def __init__(self, match): SeigaExtractor.__init__(self, match) self.user_id, self.order = match.groups() self.start_page = 1 def skip(self, num): pages, images = divmod(num, 40) self.start_page += pages self.start_image += images return num def get_metadata(self, page): """Collect metadata from 'page'""" data = text.extract_all(page, ( ("name" , '', ''), (None , 'すべて', ''), ("count", '', ''), ))[0] if not data["name"] and "ユーザー情報が取得出来ませんでした" in page: raise exception.NotFoundError("user") return { "user": { "id": text.parse_int(self.user_id), "name": data["name"], "message": (data["msg"] or "").strip(), }, "count": text.parse_int(data["count"]), } def get_images(self): url = "{}/user/illust/{}".format(self.root, self.user_id) params = {"sort": self.order, "page": self.start_page, "target": "illust_all"} while True: cnt = 0 page = self.request(url, params=params).text if params["page"] == self.start_page: yield self.get_metadata(page) for info in text.extract_iter( page, '
', '
'), ))[0] data["user"] = text.extract_all(page, ( ("id" , '', '<'), ))[0] data["description"] = text.remove_html(data["description"]) data["image_id"] = text.parse_int(self.image_id) data["date"] = text.parse_datetime( data["date"] + ":00+0900", "%Y年%m月%d日 %H:%M:%S%z") return (data, data)