[booru] unquote tags to fix multiple tags in urls

2024-11-25 04:02:32 +01:00 · 2014-12-22 16:45:58 +01:00 · 2014-12-22 16:45:58 +01:00 · 2a7dbd8868
commit 2a7dbd8868
parent f2c68a5753
2 changed files with 4 additions and 4 deletions
--- a/4
+++ b/4
@ -21,5 +21,5 @@ regex0 = (?:https?://)?(?:www\.)?imgbox\.com(/.*)
 regex0 = (?:https?://)?(?:www\.)?imagebam\.com/(gallery|image)/([^/]+).*
 [e621]
-regex0 = (?:https?://)?(?:www\.)?e621\.net/post/index/\d+/(\w+)
+regex0 = (?:https?://)?(?:www\.)?e621\.net/post/index/\d+/([^?]+)
-regex1 = e(?:621)?[.:-_](\w.+)
+regex1 = e(?:621)?[.:-_](\w.+)
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@ -1,12 +1,13 @@
 from .common import AsyncExtractor
 from ..util import filename_from_url
 import xml.etree.ElementTree as ET
 import urllib.parse
 class BooruExtractor(AsyncExtractor):
    def __init__(self, match, config):
        AsyncExtractor.__init__(self, config)
-        self.tags      = match.group(1)
+        self.tags      = urllib.parse.unquote(match.group(1))
        self.category  = "booru"
        self.params    = {"tags": self.tags}
        self.page      = "page"
@ -18,7 +19,6 @@ class BooruExtractor(AsyncExtractor):
            root = ET.fromstring(
                self.request(self.api_url, verify=True, params=self.params).text
            )
            # root = tree.getroot()
            if len(root) == 0:
                return
            for item in root: