1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

update extractor-unittest capabilities

- "count" can now be a string defining a comparison in the form of
  '<operator> <value>', for example: '> 12' or '!= 1'. If its value
  is not a string, it is assumed to be a concrete integer as before.

- "keyword" can now be a dictionary defining tests for individual keys.
  These tests can either be a type, a concrete value or a regex
  starting with "re:". Dictionaries can be stacked inside each other.
  Optional keys can be indicated with a "?" before its name.

  For example:
      "keyword:" {
          "image_id": int,
          "gallery_id", 123,
          "name": "re:pattern",
          "user": {
              "id": 321,
          },
          "?optional": None,
      }
This commit is contained in:
Mike Fährmann 2017-12-30 19:05:37 +01:00
parent 88bb0798fd
commit 1a70857a12
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 81 additions and 41 deletions

View File

@ -289,7 +289,8 @@ class TestJob(DownloadJob):
def __init__(self, url, parent=None, content=False):
DownloadJob.__init__(self, url, parent)
self.content = content
self.urllist = []
self.list_url = []
self.list_keyword = []
self.hash_url = hashlib.sha1()
self.hash_keyword = hashlib.sha1()
self.hash_content = hashlib.sha1()
@ -306,7 +307,7 @@ class TestJob(DownloadJob):
self.update_content(url)
def handle_directory(self, keywords):
self.update_keyword(keywords)
self.update_keyword(keywords, False)
def handle_queue(self, url, keywords):
self.update_url(url)
@ -314,14 +315,15 @@ class TestJob(DownloadJob):
def update_url(self, url):
"""Update the URL hash"""
self.urllist.append(url)
self.list_url.append(url)
self.hash_url.update(url.encode())
def update_keyword(self, kwdict):
def update_keyword(self, kwdict, to_list=True):
"""Update the keyword hash"""
if to_list:
self.list_keyword.append(kwdict.copy())
self.hash_keyword.update(
json.dumps(kwdict, sort_keys=True).encode()
)
json.dumps(kwdict, sort_keys=True).encode())
def update_content(self, url):
"""Update the content hash"""

View File

@ -12,6 +12,16 @@ import unittest
from gallery_dl import extractor, job, config, exception
SKIP = {
# don't work on travis-ci
"exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie",
"archivedmoe", "archiveofsins", "thebarchive",
# temporary issues
}
class TestExtractors(unittest.TestCase):
def setUp(self):
@ -51,65 +61,93 @@ class TestExtractors(unittest.TestCase):
try:
if 500 <= exc.args[0].response.status_code < 600:
self.skipTest(exc)
except AttributeError as e:
except AttributeError:
pass
raise
if "url" in result:
self.assertEqual(result["url"], tjob.hash_url.hexdigest())
if "keyword" in result:
self.assertEqual(result["keyword"], tjob.hash_keyword.hexdigest())
if "content" in result:
self.assertEqual(result["content"], tjob.hash_content.hexdigest())
if "keyword" in result:
keyword = result["keyword"]
if isinstance(keyword, dict):
for kwdict in tjob.list_keyword:
self._test_kwdict(kwdict, keyword)
else: # assume SHA1 hash
self.assertEqual(keyword, tjob.hash_keyword.hexdigest())
if "count" in result:
self.assertEqual(len(tjob.urllist), int(result["count"]))
count = result["count"]
if isinstance(count, str):
self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$")
expr = "{} {}".format(len(tjob.list_url), count)
self.assertTrue(eval(expr), msg=expr)
else: # assume integer
self.assertEqual(len(tjob.list_url), count)
if "pattern" in result:
for url in tjob.urllist:
for url in tjob.list_url:
self.assertRegex(url, result["pattern"])
def _test_kwdict(self, kwdict, tests):
for key, test in tests.items():
if key.startswith("?"):
key = key[1:]
if key not in kwdict:
continue
self.assertIn(key, kwdict)
value = kwdict[key]
# dynamically generate tests
def _generate_test(extr, tcase):
def test(self):
url, result = tcase
print("\n", url, sep="")
self._run_test(extr, url, result)
return test
if isinstance(test, dict):
self._test_kwdict(kwdict[key], test)
continue
elif isinstance(test, type):
self.assertIsInstance(value, test)
elif isinstance(test, str) and value.startswith("re:"):
self.assertRegex(value, test[3:])
else:
self.assertEqual(value, test)
skip = [
# don't work on travis-ci
"exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie",
"archivedmoe", "archiveofsins", "thebarchive",
# temporary issues
"mangapark",
]
# enable selective testing for direct calls
if __name__ == '__main__' and len(sys.argv) > 1:
if sys.argv[1].lower() == "all":
extractors = extractor.extractors()
def generate_tests():
"""Dynamically generate extractor unittests"""
def _generate_test(extr, tcase):
def test(self):
url, result = tcase
print("\n", url, sep="")
self._run_test(extr, url, result)
return test
# enable selective testing for direct calls
if __name__ == '__main__' and len(sys.argv) > 1:
if sys.argv[1].lower() == "all":
extractors = extractor.extractors()
else:
extractors = [
extr for extr in extractor.extractors()
if extr.category in sys.argv or
hasattr(extr, "basecategory") and extr.basecategory in sys.argv
]
del sys.argv[1:]
else:
extractors = [
extr for extr in extractor.extractors()
if extr.category in sys.argv or
hasattr(extr, "basecategory") and extr.basecategory in sys.argv
if extr.category not in SKIP
]
del sys.argv[1:]
else:
extractors = [
extr for extr in extractor.extractors()
if extr.category not in skip
]
for extr in extractors:
if hasattr(extr, "test") and extr.test:
for extr in extractors:
if not hasattr(extr, "test") or not extr.test:
continue
name = "test_" + extr.__name__ + "_"
for num, tcase in enumerate(extr.test, 1):
test = _generate_test(extr, tcase)
test.__name__ = name + str(num)
setattr(TestExtractors, test.__name__, test)
del test
generate_tests()
if __name__ == '__main__':
unittest.main(warnings='ignore')