From c7c3fef0bc25687b7da5a050b3327c89b5070d21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 8 Mar 2021 22:40:51 +0100 Subject: [PATCH] [exhentai] support '/tag/' URLs (closes #1363) --- gallery_dl/extractor/exhentai.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 552ea4df..cbdf2da7 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -424,9 +424,11 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): class ExhentaiSearchExtractor(ExhentaiExtractor): """Extractor for exhentai search results""" subcategory = "search" - pattern = BASE_PATTERN + r"/?\?(.*)$" + pattern = BASE_PATTERN + r"/(?:\?([^#]*)|tag/([^/?#]+))" test = ( ("https://e-hentai.org/?f_search=touhou"), + ("https://exhentai.org/?f_cats=767&f_search=touhou"), + ("https://exhentai.org/tag/parody:touhou+project"), (("https://exhentai.org/?f_doujinshi=0&f_manga=0&f_artistcg=0" "&f_gamecg=0&f_western=0&f_non-h=1&f_imageset=0&f_cosplay=0" "&f_asianporn=0&f_misc=0&f_search=touhou&f_apply=Apply+Filter"), { @@ -438,10 +440,20 @@ class ExhentaiSearchExtractor(ExhentaiExtractor): def __init__(self, match): ExhentaiExtractor.__init__(self, match) - self.params = text.parse_query(match.group(2)) - self.params["page"] = text.parse_int(self.params.get("page")) self.search_url = self.root + _, query, tag = match.groups() + if tag: + if "+" in tag: + ns, _, tag = tag.rpartition(":") + tag = '{}:"{}$"'.format(ns, tag.replace("+", " ")) + else: + tag += "$" + self.params = {"f_search": tag, "page": 0} + else: + self.params = text.parse_query(query) + self.params["page"] = text.parse_int(self.params.get("page")) + def items(self): self.login() data = {"_extractor": ExhentaiGalleryExtractor} @@ -465,7 +477,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor): class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor): """Extractor for favorited exhentai galleries""" subcategory = "favorite" - pattern = BASE_PATTERN + r"/favorites\.php(?:\?(.*))?" + pattern = BASE_PATTERN + r"/favorites\.php(?:\?([^#]*)())?" test = ( ("https://e-hentai.org/favorites.php", { "count": 1,