From d31a3b5da38bd63ede507896eb738a87d5374dc5 Mon Sep 17 00:00:00 2001 From: missionfloyd Date: Mon, 23 Sep 2024 00:35:19 -0600 Subject: [PATCH] [everia.club] Add support - Unescape title and URL - Add tags and categories metadata Lookup tag id with API instead of downloading tag page - Add category extractor - Add tests - Rename EveriaExtractor to EveriaPostExtractor - Fix EveriaPostExtractor example - Lookup tags/categories by post id - Add date extractor - Remove leftover pages parameter - Add error handling for invalid dates. - Add filename numbering Parse date - Rename extract() to images() - Remove html import - Fix search/date URLs with page number - Fix tag/category search - Fix post extractor - Fix tag, category extractors - Fix search extractor - Only load first page once - Fix date extractor - Fix tests - Clean up search extractor --- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/everia.py | 106 +++++++++++++++++++++++++++++++ test/results/everia.py | 37 +++++++++++ 4 files changed, 150 insertions(+) create mode 100644 gallery_dl/extractor/everia.py create mode 100644 test/results/everia.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index ba5aed8f..e5eac728 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -229,6 +229,12 @@ Consider all listed sites to potentially be NSFW. Albums, Search Results, User Profiles + + Everia + https://everia.club + Categories, Dates, Posts, Search Results, Tag Searches + + ExHentai https://exhentai.org/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 067ec013..b707ea25 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -47,6 +47,7 @@ modules = [ "dynastyscans", "e621", "erome", + "everia", "exhentai", "fanbox", "fanleaks", diff --git a/gallery_dl/extractor/everia.py b/gallery_dl/extractor/everia.py new file mode 100644 index 00000000..c46d5605 --- /dev/null +++ b/gallery_dl/extractor/everia.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019-2023 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://everia.club""" + +from .common import Extractor, Message +from .. import text +import re + +BASE_PATTERN = r"(?:https?://)?everia\.club" + + +class EveriaPostExtractor(Extractor): + category = "everia" + subcategory = "post" + root = "https://everia.club" + pattern = BASE_PATTERN + r"/(\d{4}/\d{2}/\d{2}/[^/]+)/?" + example = "https://everia.club/0000/00/00/TITLE" + directory_fmt = ("{category}", "{title}") + + def __init__(self, match): + super().__init__(match) + self.url = match.group(0) + + def items(self): + page = self.request(self.url).text + content = text.extr(page, 'itemprop="text">', "") + urls = re.findall(r'img.*?src=\"(.+?)\"', content) + + data = { + "title": text.unescape( + text.extr(page, 'itemprop="headline">', "") + ), + "url": self.url, + "tags": list(text.extract_iter(page, 'rel="tag">', "")), + "post_category": text.extr( + page, "post-in-category-", " " + ).capitalize(), + "count": len(urls), + } + + yield Message.Directory, data + for data["num"], url in enumerate(urls, 1): + text.nameext_from_url(text.unquote(url), data) + yield Message.Url, url, data + + +class EveriaTagExtractor(EveriaPostExtractor): + subcategory = "tag" + pattern = BASE_PATTERN + r"/(tag/[^/]+)/?" + example = "https://everia.club/tag/TAG" + + def __init__(self, match): + super().__init__(match) + self.id = match.group(1) + + def _posts(self, page): + posts = re.findall(r'thumbnail\">\s*