# -*- coding: utf-8 -*-
# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://www.hentai-foundry.com/"""
from .common import Extractor, Message
from .. import text, util
BASE_PATTERN = r"(https?://)?(?:www\.)?hentai-foundry\.com"
class HentaifoundryExtractor(Extractor):
"""Base class for hentaifoundry extractors"""
category = "hentaifoundry"
directory_fmt = ("{category}", "{user}")
filename_fmt = "{category}_{index}_{title}.{extension}"
archive_fmt = "{index}"
cookies_domain = "www.hentai-foundry.com"
root = "https://www.hentai-foundry.com"
per_page = 25
def __init__(self, match):
self.root = (match.group(1) or "https://") + "www.hentai-foundry.com"
self.user = match.group(2)
Extractor.__init__(self, match)
self.page_url = ""
self.start_post = 0
self.start_page = 1
def items(self):
self._init_site_filters()
data = self.metadata()
for post_url in util.advance(self.posts(), self.start_post):
image = self._parse_post(post_url)
image.update(data)
yield Message.Directory, image
yield Message.Url, image["src"], image
def skip(self, num):
pages, posts = divmod(num, self.per_page)
self.start_page += pages
self.start_post += posts
return num
def metadata(self):
return {"user": self.user}
def posts(self):
return self._pagination(self.page_url)
def _pagination(self, url, begin='thumbTitle">', '<')),
"artist" : text.unescape(extr('/profile">', '<')),
"_body" : extr(
'