From 1a70857a1203996adea06bcc275dbcdfe14238ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 30 Dec 2017 19:05:37 +0100 Subject: [PATCH] update extractor-unittest capabilities - "count" can now be a string defining a comparison in the form of ' ', for example: '> 12' or '!= 1'. If its value is not a string, it is assumed to be a concrete integer as before. - "keyword" can now be a dictionary defining tests for individual keys. These tests can either be a type, a concrete value or a regex starting with "re:". Dictionaries can be stacked inside each other. Optional keys can be indicated with a "?" before its name. For example: "keyword:" { "image_id": int, "gallery_id", 123, "name": "re:pattern", "user": { "id": 321, }, "?optional": None, } --- gallery_dl/job.py | 14 +++--- test/test_extractors.py | 108 +++++++++++++++++++++++++++------------- 2 files changed, 81 insertions(+), 41 deletions(-) diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 8e592609..31848ec0 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -289,7 +289,8 @@ class TestJob(DownloadJob): def __init__(self, url, parent=None, content=False): DownloadJob.__init__(self, url, parent) self.content = content - self.urllist = [] + self.list_url = [] + self.list_keyword = [] self.hash_url = hashlib.sha1() self.hash_keyword = hashlib.sha1() self.hash_content = hashlib.sha1() @@ -306,7 +307,7 @@ class TestJob(DownloadJob): self.update_content(url) def handle_directory(self, keywords): - self.update_keyword(keywords) + self.update_keyword(keywords, False) def handle_queue(self, url, keywords): self.update_url(url) @@ -314,14 +315,15 @@ class TestJob(DownloadJob): def update_url(self, url): """Update the URL hash""" - self.urllist.append(url) + self.list_url.append(url) self.hash_url.update(url.encode()) - def update_keyword(self, kwdict): + def update_keyword(self, kwdict, to_list=True): """Update the keyword hash""" + if to_list: + self.list_keyword.append(kwdict.copy()) self.hash_keyword.update( - json.dumps(kwdict, sort_keys=True).encode() - ) + json.dumps(kwdict, sort_keys=True).encode()) def update_content(self, url): """Update the content hash""" diff --git a/test/test_extractors.py b/test/test_extractors.py index 9f4de281..9a2110f6 100644 --- a/test/test_extractors.py +++ b/test/test_extractors.py @@ -12,6 +12,16 @@ import unittest from gallery_dl import extractor, job, config, exception +SKIP = { + # don't work on travis-ci + "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", + "archivedmoe", "archiveofsins", "thebarchive", + + # temporary issues + +} + + class TestExtractors(unittest.TestCase): def setUp(self): @@ -51,65 +61,93 @@ class TestExtractors(unittest.TestCase): try: if 500 <= exc.args[0].response.status_code < 600: self.skipTest(exc) - except AttributeError as e: + except AttributeError: pass raise if "url" in result: self.assertEqual(result["url"], tjob.hash_url.hexdigest()) - if "keyword" in result: - self.assertEqual(result["keyword"], tjob.hash_keyword.hexdigest()) + if "content" in result: self.assertEqual(result["content"], tjob.hash_content.hexdigest()) + + if "keyword" in result: + keyword = result["keyword"] + if isinstance(keyword, dict): + for kwdict in tjob.list_keyword: + self._test_kwdict(kwdict, keyword) + else: # assume SHA1 hash + self.assertEqual(keyword, tjob.hash_keyword.hexdigest()) + if "count" in result: - self.assertEqual(len(tjob.urllist), int(result["count"])) + count = result["count"] + if isinstance(count, str): + self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$") + expr = "{} {}".format(len(tjob.list_url), count) + self.assertTrue(eval(expr), msg=expr) + else: # assume integer + self.assertEqual(len(tjob.list_url), count) + if "pattern" in result: - for url in tjob.urllist: + for url in tjob.list_url: self.assertRegex(url, result["pattern"]) + def _test_kwdict(self, kwdict, tests): + for key, test in tests.items(): + if key.startswith("?"): + key = key[1:] + if key not in kwdict: + continue + self.assertIn(key, kwdict) + value = kwdict[key] -# dynamically generate tests -def _generate_test(extr, tcase): - def test(self): - url, result = tcase - print("\n", url, sep="") - self._run_test(extr, url, result) - return test + if isinstance(test, dict): + self._test_kwdict(kwdict[key], test) + continue + elif isinstance(test, type): + self.assertIsInstance(value, test) + elif isinstance(test, str) and value.startswith("re:"): + self.assertRegex(value, test[3:]) + else: + self.assertEqual(value, test) -skip = [ - # don't work on travis-ci - "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", - "archivedmoe", "archiveofsins", "thebarchive", - # temporary issues - "mangapark", -] -# enable selective testing for direct calls -if __name__ == '__main__' and len(sys.argv) > 1: - if sys.argv[1].lower() == "all": - extractors = extractor.extractors() +def generate_tests(): + """Dynamically generate extractor unittests""" + def _generate_test(extr, tcase): + def test(self): + url, result = tcase + print("\n", url, sep="") + self._run_test(extr, url, result) + return test + + # enable selective testing for direct calls + if __name__ == '__main__' and len(sys.argv) > 1: + if sys.argv[1].lower() == "all": + extractors = extractor.extractors() + else: + extractors = [ + extr for extr in extractor.extractors() + if extr.category in sys.argv or + hasattr(extr, "basecategory") and extr.basecategory in sys.argv + ] + del sys.argv[1:] else: extractors = [ extr for extr in extractor.extractors() - if extr.category in sys.argv or - hasattr(extr, "basecategory") and extr.basecategory in sys.argv + if extr.category not in SKIP ] - del sys.argv[1:] -else: - extractors = [ - extr for extr in extractor.extractors() - if extr.category not in skip - ] - -for extr in extractors: - if hasattr(extr, "test") and extr.test: + for extr in extractors: + if not hasattr(extr, "test") or not extr.test: + continue name = "test_" + extr.__name__ + "_" for num, tcase in enumerate(extr.test, 1): test = _generate_test(extr, tcase) test.__name__ = name + str(num) setattr(TestExtractors, test.__name__, test) - del test + +generate_tests() if __name__ == '__main__': unittest.main(warnings='ignore')