mirror of
https://github.com/mikf/gallery-dl.git
synced 2025-01-31 11:41:35 +01:00
remove 'extractor.blacklist' context manager
This commit is contained in:
parent
c78aa17506
commit
3918b69677
@ -1081,16 +1081,6 @@ Description Controls how to handle redirects to CAPTCHA pages.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.recursive.blacklist
|
||||
-----------------------------
|
||||
=========== =====
|
||||
Type ``list`` of ``strings``
|
||||
Default ``["directlink", "oauth", "recursive", "test"]``
|
||||
Description A list of extractor categories which should be ignored when using
|
||||
the ``recursive`` extractor.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.reddit.comments
|
||||
-------------------------
|
||||
=========== =====
|
||||
|
@ -119,10 +119,6 @@
|
||||
{
|
||||
"captcha": "stop"
|
||||
},
|
||||
"recursive":
|
||||
{
|
||||
"blacklist": ["directlink", "oauth", "recursive", "test"]
|
||||
},
|
||||
"reddit":
|
||||
{
|
||||
"comments": 0,
|
||||
|
@ -140,7 +140,7 @@ def find(url):
|
||||
"""Find a suitable extractor for the given URL"""
|
||||
for cls in _list_classes():
|
||||
match = cls.pattern.match(url)
|
||||
if match and cls not in _blacklist:
|
||||
if match:
|
||||
return cls(match)
|
||||
return None
|
||||
|
||||
@ -169,26 +169,10 @@ def extractors():
|
||||
)
|
||||
|
||||
|
||||
class blacklist():
|
||||
"""Context Manager to blacklist extractor modules"""
|
||||
def __init__(self, categories, extractors=None):
|
||||
self.extractors = extractors or []
|
||||
for cls in _list_classes():
|
||||
if cls.category in categories:
|
||||
self.extractors.append(cls)
|
||||
|
||||
def __enter__(self):
|
||||
_blacklist.update(self.extractors)
|
||||
|
||||
def __exit__(self, etype, value, traceback):
|
||||
_blacklist.clear()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# internals
|
||||
|
||||
_cache = []
|
||||
_blacklist = set()
|
||||
_module_iter = iter(modules)
|
||||
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019 Mike Fährmann
|
||||
# Copyright 2019-2020 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -9,7 +9,7 @@
|
||||
"""Extractors for https://www.plurk.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, extractor, exception
|
||||
from .. import text, exception
|
||||
import datetime
|
||||
import time
|
||||
import json
|
||||
@ -23,12 +23,9 @@ class PlurkExtractor(Extractor):
|
||||
|
||||
def items(self):
|
||||
urls = self._urls_ex if self.config("comments", False) else self._urls
|
||||
|
||||
yield Message.Version, 1
|
||||
with extractor.blacklist(("plurk",)):
|
||||
for plurk in self.plurks():
|
||||
for url in urls(plurk):
|
||||
yield Message.Queue, url, plurk
|
||||
for plurk in self.plurks():
|
||||
for url in urls(plurk):
|
||||
yield Message.Queue, url, plurk
|
||||
|
||||
def plurks(self):
|
||||
"""Return an iterable with all relevant 'plurk' objects"""
|
||||
|
@ -9,7 +9,6 @@
|
||||
"""Recursive extractor"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import extractor, util
|
||||
import requests
|
||||
import re
|
||||
|
||||
@ -23,17 +22,12 @@ class RecursiveExtractor(Extractor):
|
||||
})
|
||||
|
||||
def items(self):
|
||||
blist = self.config(
|
||||
"blacklist", {"directlink"} | util.SPECIAL_EXTRACTORS)
|
||||
|
||||
self.session.mount("file://", FileAdapter())
|
||||
page = self.request(self.url.partition(":")[2]).text
|
||||
del self.session.adapters["file://"]
|
||||
|
||||
yield Message.Version, 1
|
||||
with extractor.blacklist(blist):
|
||||
for match in re.finditer(r"https?://[^\s\"']+", page):
|
||||
yield Message.Queue, match.group(0), {}
|
||||
for match in re.finditer(r"https?://[^\s\"']+", page):
|
||||
yield Message.Queue, match.group(0), {}
|
||||
|
||||
|
||||
class FileAdapter(requests.adapters.BaseAdapter):
|
||||
|
@ -9,7 +9,7 @@
|
||||
"""Extract images from https://www.tumblr.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, oauth, extractor, exception
|
||||
from .. import text, oauth, exception
|
||||
from datetime import datetime, timedelta
|
||||
import re
|
||||
|
||||
@ -128,12 +128,9 @@ class TumblrExtractor(Extractor):
|
||||
|
||||
if self.external: # external links
|
||||
post["extension"] = None
|
||||
with extractor.blacklist(("tumblr",)):
|
||||
for key in ("permalink_url", "url"):
|
||||
url = post.get(key)
|
||||
if url:
|
||||
yield Message.Queue, url, post
|
||||
break
|
||||
url = post.get("permalink_url") or post.get("url")
|
||||
if url:
|
||||
yield Message.Queue, url, post
|
||||
|
||||
def posts(self):
|
||||
"""Return an iterable containing all relevant posts"""
|
||||
|
@ -75,30 +75,6 @@ class TestExtractorModule(unittest.TestCase):
|
||||
self.assertEqual(classes[0], FakeExtractor)
|
||||
self.assertIsInstance(extractor.find(uri), FakeExtractor)
|
||||
|
||||
def test_blacklist(self):
|
||||
link_uri = "https://example.org/file.jpg"
|
||||
test_uri = "test:"
|
||||
fake_uri = "fake:"
|
||||
|
||||
self.assertIsInstance(extractor.find(link_uri), DirectlinkExtractor)
|
||||
self.assertIsInstance(extractor.find(test_uri), Extractor)
|
||||
self.assertIsNone(extractor.find(fake_uri))
|
||||
|
||||
with extractor.blacklist(["directlink"]):
|
||||
self.assertIsNone(extractor.find(link_uri))
|
||||
self.assertIsInstance(extractor.find(test_uri), Extractor)
|
||||
self.assertIsNone(extractor.find(fake_uri))
|
||||
|
||||
with extractor.blacklist([], [DirectlinkExtractor, FakeExtractor]):
|
||||
self.assertIsNone(extractor.find(link_uri))
|
||||
self.assertIsInstance(extractor.find(test_uri), Extractor)
|
||||
self.assertIsNone(extractor.find(fake_uri))
|
||||
|
||||
with extractor.blacklist(["test"], [DirectlinkExtractor]):
|
||||
self.assertIsNone(extractor.find(link_uri))
|
||||
self.assertIsNone(extractor.find(test_uri))
|
||||
self.assertIsNone(extractor.find(fake_uri))
|
||||
|
||||
def test_from_url(self):
|
||||
for uri in self.VALID_URIS:
|
||||
cls = extractor.find(uri).__class__
|
||||
|
Loading…
x
Reference in New Issue
Block a user