remove 'extractor.blacklist' context manager

2025-01-31 11:41:35 +01:00 · 2020-09-11 13:11:46 +02:00 · 2020-09-11 13:11:46 +02:00 · 3918b69677
commit 3918b69677
parent c78aa17506
7 changed files with 12 additions and 78 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -1081,16 +1081,6 @@ Description Controls how to handle redirects to CAPTCHA pages.
 =========== =====


-extractor.recursive.blacklist
-----------------------------
-=========== =====
-Type        ``list`` of ``strings``
-Default     ``["directlink", "oauth", "recursive", "test"]``
-Description A list of extractor categories which should be ignored when using
-            the ``recursive`` extractor.
-=========== =====
-
-
 extractor.reddit.comments
 -------------------------
 =========== =====
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@ -119,10 +119,6 @@
        {
            "captcha": "stop"
        },
-        "recursive":
-        {
-            "blacklist": ["directlink", "oauth", "recursive", "test"]
-        },
        "reddit":
        {
            "comments": 0,
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -140,7 +140,7 @@ def find(url):
    """Find a suitable extractor for the given URL"""
    for cls in _list_classes():
        match = cls.pattern.match(url)
-        if match and cls not in _blacklist:
+        if match:
            return cls(match)
    return None

@ -169,26 +169,10 @@ def extractors():
    )


-class blacklist():
-    """Context Manager to blacklist extractor modules"""
-    def __init__(self, categories, extractors=None):
-        self.extractors = extractors or []
-        for cls in _list_classes():
-            if cls.category in categories:
-                self.extractors.append(cls)
-
-    def __enter__(self):
-        _blacklist.update(self.extractors)
-
-    def __exit__(self, etype, value, traceback):
-        _blacklist.clear()
-
-
 # --------------------------------------------------------------------
 # internals

 _cache = []
-_blacklist = set()
 _module_iter = iter(modules)


--- a/gallery_dl/extractor/plurk.py
+++ b/gallery_dl/extractor/plurk.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2020 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@ -9,7 +9,7 @@
 """Extractors for https://www.plurk.com/"""

 from .common import Extractor, Message
-from .. import text, extractor, exception
+from .. import text, exception
 import datetime
 import time
 import json
@ -23,12 +23,9 @@ class PlurkExtractor(Extractor):

    def items(self):
        urls = self._urls_ex if self.config("comments", False) else self._urls
-
-        yield Message.Version, 1
-        with extractor.blacklist(("plurk",)):
-            for plurk in self.plurks():
-                for url in urls(plurk):
-                    yield Message.Queue, url, plurk
+        for plurk in self.plurks():
+            for url in urls(plurk):
+                yield Message.Queue, url, plurk

    def plurks(self):
        """Return an iterable with all relevant 'plurk' objects"""
--- a/gallery_dl/extractor/recursive.py
+++ b/gallery_dl/extractor/recursive.py
@ -9,7 +9,6 @@
 """Recursive extractor"""

 from .common import Extractor, Message
-from .. import extractor, util
 import requests
 import re

@ -23,17 +22,12 @@ class RecursiveExtractor(Extractor):
    })

    def items(self):
-        blist = self.config(
-            "blacklist", {"directlink"} | util.SPECIAL_EXTRACTORS)
-
        self.session.mount("file://", FileAdapter())
        page = self.request(self.url.partition(":")[2]).text
        del self.session.adapters["file://"]

-        yield Message.Version, 1
-        with extractor.blacklist(blist):
-            for match in re.finditer(r"https?://[^\s\"']+", page):
-                yield Message.Queue, match.group(0), {}
+        for match in re.finditer(r"https?://[^\s\"']+", page):
+            yield Message.Queue, match.group(0), {}


 class FileAdapter(requests.adapters.BaseAdapter):
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@ -9,7 +9,7 @@
 """Extract images from https://www.tumblr.com/"""

 from .common import Extractor, Message
-from .. import text, oauth, extractor, exception
+from .. import text, oauth, exception
 from datetime import datetime, timedelta
 import re

@ -128,12 +128,9 @@ class TumblrExtractor(Extractor):

            if self.external:  # external links
                post["extension"] = None
-                with extractor.blacklist(("tumblr",)):
-                    for key in ("permalink_url", "url"):
-                        url = post.get(key)
-                        if url:
-                            yield Message.Queue, url, post
-                            break
+                url = post.get("permalink_url") or post.get("url")
+                if url:
+                    yield Message.Queue, url, post

    def posts(self):
        """Return an iterable containing all relevant posts"""
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@ -75,30 +75,6 @@ class TestExtractorModule(unittest.TestCase):
        self.assertEqual(classes[0], FakeExtractor)
        self.assertIsInstance(extractor.find(uri), FakeExtractor)

-    def test_blacklist(self):
-        link_uri = "https://example.org/file.jpg"
-        test_uri = "test:"
-        fake_uri = "fake:"
-
-        self.assertIsInstance(extractor.find(link_uri), DirectlinkExtractor)
-        self.assertIsInstance(extractor.find(test_uri), Extractor)
-        self.assertIsNone(extractor.find(fake_uri))
-
-        with extractor.blacklist(["directlink"]):
-            self.assertIsNone(extractor.find(link_uri))
-            self.assertIsInstance(extractor.find(test_uri), Extractor)
-            self.assertIsNone(extractor.find(fake_uri))
-
-        with extractor.blacklist([], [DirectlinkExtractor, FakeExtractor]):
-            self.assertIsNone(extractor.find(link_uri))
-            self.assertIsInstance(extractor.find(test_uri), Extractor)
-            self.assertIsNone(extractor.find(fake_uri))
-
-        with extractor.blacklist(["test"], [DirectlinkExtractor]):
-            self.assertIsNone(extractor.find(link_uri))
-            self.assertIsNone(extractor.find(test_uri))
-            self.assertIsNone(extractor.find(fake_uri))
-
    def test_from_url(self):
        for uri in self.VALID_URIS:
            cls = extractor.find(uri).__class__