From 50098762e326063e8f6e4b44ab1572878d73e918 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Thu, 14 Oct 2021 16:23:47 +0200
Subject: [PATCH] [nhentai] add 'tag' extractor (closes #1950)

---
 docs/supportedsites.md          |  2 +-
 gallery_dl/extractor/nhentai.py | 43 +++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 27ffade3..56802401 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -508,7 +508,7 @@ Consider all sites to be NSFW unless otherwise known.
 <tr>
     <td>nhentai</td>
     <td>https://nhentai.net/</td>
-    <td>Favorites, Galleries, Search Results</td>
+    <td>Favorites, Galleries, Search Results, Tag Searches</td>
     <td></td>
 </tr>
 <tr>
diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py
index 20b716b2..5b5da6a8 100644
--- a/gallery_dl/extractor/nhentai.py
+++ b/gallery_dl/extractor/nhentai.py
@@ -99,6 +99,49 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
         ]
 
 
+class NhentaiTagExtractor(NhentaiBase, Extractor):
+    """Extractor for nhentai tag searches"""
+    subcategory = "tag"
+    pattern = (r"(?:https?://)?nhentai\.net("
+               r"/(?:artist|category|character|group|language|parody|tag)"
+               r"/[^/?#]+(?:/popular[^/?#]*)?/?)(?:\?([^#]+))?")
+    test = (
+        ("https://nhentai.net/tag/sole-female/", {
+            "pattern": NhentaiGalleryExtractor.pattern,
+            "count": 30,
+            "range": "1-30",
+        }),
+        ("https://nhentai.net/artist/itou-life/"),
+        ("https://nhentai.net/group/itou-life/"),
+        ("https://nhentai.net/parody/touhou-project/"),
+        ("https://nhentai.net/character/patchouli-knowledge/popular"),
+        ("https://nhentai.net/category/doujinshi/popular-today"),
+        ("https://nhentai.net/language/english/popular-week"),
+    )
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.path, self.query = match.groups()
+
+    def items(self):
+        data = {"_extractor": NhentaiGalleryExtractor}
+        for gallery_id in self._pagination():
+            url = "{}/g/{}/".format(self.root, gallery_id)
+            yield Message.Queue, url, data
+
+    def _pagination(self):
+        url = self.root + self.path
+        params = text.parse_query(self.query)
+        params["page"] = text.parse_int(params.get("page"), 1)
+
+        while True:
+            page = self.request(url, params=params).text
+            yield from text.extract_iter(page, 'href="/g/', '/')
+            if 'class="next"' not in page:
+                return
+            params["page"] += 1
+
+
 class NhentaiSearchExtractor(NhentaiBase, Extractor):
     """Extractor for nhentai search results"""
     subcategory = "search"