# -*- coding: utf-8 -*- # Copyright 2015-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://www.hentai-foundry.com/""" from .common import Extractor, Message from .. import text, util BASE_PATTERN = r"(https?://)?(?:www\.)?hentai-foundry\.com" class HentaifoundryExtractor(Extractor): """Base class for hentaifoundry extractors""" category = "hentaifoundry" directory_fmt = ("{category}", "{user}") filename_fmt = "{category}_{index}_{title}.{extension}" archive_fmt = "{index}" cookies_domain = "www.hentai-foundry.com" root = "https://www.hentai-foundry.com" per_page = 25 def __init__(self, match): self.root = (match.group(1) or "https://") + "www.hentai-foundry.com" self.user = match.group(2) Extractor.__init__(self, match) self.page_url = "" self.start_post = 0 self.start_page = 1 def items(self): self._init_site_filters() data = self.metadata() for post_url in util.advance(self.posts(), self.start_post): image = self._parse_post(post_url) image.update(data) yield Message.Directory, image yield Message.Url, image["src"], image def skip(self, num): pages, posts = divmod(num, self.per_page) self.start_page += pages self.start_post += posts return num def metadata(self): return {"user": self.user} def posts(self): return self._pagination(self.page_url) def _pagination(self, url, begin='thumbTitle">', '<')), "artist" : text.unescape(extr('/profile">', '<')), "_body" : extr( '

Description

', '') .replace("\r\n", "\n"), "", "")), "ratings" : [text.unescape(r) for r in text.extract_iter(extr( "class='ratings_box'", ""), "title='", "'")], "date" : text.parse_datetime(extr("datetime='", "'")), "views" : text.parse_int(extr(">Views", "<")), "score" : text.parse_int(extr(">Vote Score", "<")), "media" : text.unescape(extr(">Media", "<").strip()), "tags" : text.split_html(extr( ">Tags ", "")), } body = data["_body"] if "