# -*- coding: utf-8 -*- # Copyright 2018-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://idol.sankakucomplex.com/""" from .sankaku import SankakuExtractor from .common import Message from ..cache import cache from .. import text, util, exception import collections import re class IdolcomplexExtractor(SankakuExtractor): """Base class for idolcomplex extractors""" category = "idolcomplex" cookienames = ("login", "pass_hash") cookiedomain = "idol.sankakucomplex.com" root = "https://" + cookiedomain request_interval = 5.0 def __init__(self, match): SankakuExtractor.__init__(self, match) self.logged_in = True self.start_page = 1 self.start_post = 0 self.extags = self.config("tags", False) def items(self): self.login() data = self.metadata() for post_id in util.advance(self.post_ids(), self.start_post): post = self._parse_post(post_id) url = post["file_url"] post.update(data) text.nameext_from_url(url, post) yield Message.Directory, post yield Message.Url, url, post def skip(self, num): self.start_post += num return num def post_ids(self): """Return an iterable containing all relevant post ids""" def login(self): if self._check_cookies(self.cookienames): return username, password = self._get_auth_info() if username: cookies = self._login_impl(username, password) self._update_cookies(cookies) else: self.logged_in = False @cache(maxage=90*24*3600, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) url = self.root + "/user/authenticate" data = { "url" : "", "user[name]" : username, "user[password]": password, "commit" : "Login", } response = self.request(url, method="POST", data=data) if not response.history or response.url != self.root + "/user/home": raise exception.AuthenticationError() cookies = response.history[0].cookies return {c: cookies[c] for c in self.cookienames} def _parse_post(self, post_id): """Extract metadata of a single post""" url = self.root + "/post/show/" + post_id page = self.request(url, retries=10).text extr = text.extract tags , pos = extr(page, "