gallery-dl/test/test_extractors.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Copyright 2015-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

import sys
import unittest
from gallery_dl import extractor, job, config, exception


SKIP = {
    # don't work on travis-ci
    "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie",
    "archivedmoe", "archiveofsins", "thebarchive",

    # temporary issues
    "chronos",
    "coreimg",
    "hosturimage",
    "yeet",
}


class TestExtractors(unittest.TestCase):

    def setUp(self):
        name = "gallerydl"
        email = "gallerydl@openaliasbox.org"
        config.set(("cache", "file"), ":memory:")
        config.set(("extractor", "username"), name)
        config.set(("extractor", "password"), name)
        config.set(("extractor", "nijie", "username"), email)
        config.set(("extractor", "seiga", "username"), email)
        config.set(("downloader", "part"), False)

    def tearDown(self):
        config.clear()

    def _run_test(self, extr, url, result):
        if result:
            if "options" in result:
                for key, value in result["options"]:
                    config.set(key.split("."), value)
            content = "content" in result
        else:
            content = False

        tjob = job.TestJob(url, content=content)
        self.assertEqual(extr, tjob.extractor.__class__)

        if not result:
            return
        if "exception" in result:
            self.assertRaises(result["exception"], tjob.run)
            return

        try:
            tjob.run()
        except exception.HttpError as exc:
            try:
                if 500 <= exc.args[0].response.status_code < 600:
                    self.skipTest(exc)
            except AttributeError:
                pass
            raise

        # test archive-id uniqueness
        self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive))

        # test extraction results
        if "url" in result:
            self.assertEqual(result["url"], tjob.hash_url.hexdigest())

        if "content" in result:
            self.assertEqual(result["content"], tjob.hash_content.hexdigest())

        if "keyword" in result:
            keyword = result["keyword"]
            if isinstance(keyword, dict):
                for kwdict in tjob.list_keyword:
                    self._test_kwdict(kwdict, keyword)
            else:  # assume SHA1 hash
                self.assertEqual(keyword, tjob.hash_keyword.hexdigest())

        if "count" in result:
            count = result["count"]
            if isinstance(count, str):
                self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$")
                expr = "{} {}".format(len(tjob.list_url), count)
                self.assertTrue(eval(expr), msg=expr)
            else:  # assume integer
                self.assertEqual(len(tjob.list_url), count)

        if "pattern" in result:
            for url in tjob.list_url:
                self.assertRegex(url, result["pattern"])

    def _test_kwdict(self, kwdict, tests):
        for key, test in tests.items():
            if key.startswith("?"):
                key = key[1:]
                if key not in kwdict:
                    continue
            self.assertIn(key, kwdict)
            value = kwdict[key]

            if isinstance(test, dict):
                self._test_kwdict(kwdict[key], test)
                continue
            elif isinstance(test, type):
                self.assertIsInstance(value, test)
            elif isinstance(test, str) and value.startswith("re:"):
                self.assertRegex(value, test[3:])
            else:
                self.assertEqual(value, test)


def generate_tests():
    """Dynamically generate extractor unittests"""
    def _generate_test(extr, tcase):
        def test(self):
            url, result = tcase
            print("\n", url, sep="")
            self._run_test(extr, url, result)
        return test

    # enable selective testing for direct calls
    if __name__ == '__main__' and len(sys.argv) > 1:
        if sys.argv[1].lower() == "all":
            extractors = extractor.extractors()
        else:
            extractors = [
                extr for extr in extractor.extractors()
                if extr.category in sys.argv or
                hasattr(extr, "basecategory") and extr.basecategory in sys.argv
            ]
        del sys.argv[1:]
    else:
        extractors = [
            extr for extr in extractor.extractors()
            if extr.category not in SKIP
        ]

    for extr in extractors:
        if not hasattr(extr, "test") or not extr.test:
            continue
        name = "test_" + extr.__name__ + "_"
        for num, tcase in enumerate(extr.test, 1):
            test = _generate_test(extr, tcase)
            test.__name__ = name + str(num)
            setattr(TestExtractors, test.__name__, test)


generate_tests()
if __name__ == '__main__':
    unittest.main(warnings='ignore')
testing environment for extractor results 2015-12-12 15:58:07 +01:00			`#!/usr/bin/env python3`
			`# -- coding: utf-8 --`

[nijie] fix dojin extraction - correctly extract artist_id - set extension to "jpg" if it was empty and let filetype checks do the rest 2018-02-09 21:51:35 +01:00			`# Copyright 2015-2018 Mike Fährmann`
testing environment for extractor results 2015-12-12 15:58:07 +01:00			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License version 2 as`
			`# published by the Free Software Foundation.`

several changes to make travis build work - fixed html.unescape not being available on Python3.3 - removed inconsistent test result - added username/password pairs for authenticating extractors 2017-01-10 13:41:00 +01:00			`import sys`
testing environment for extractor results 2015-12-12 15:58:07 +01:00			`import unittest`
skip tests on 5xx status codes 2017-11-12 20:51:12 +01:00			`from gallery_dl import extractor, job, config, exception`
make extractor unittest discoverable 2017-01-09 12:27:20 +01:00
testing environment for extractor results 2015-12-12 15:58:07 +01:00
update extractor-unittest capabilities - "count" can now be a string defining a comparison in the form of '<operator> <value>', for example: '> 12' or '!= 1'. If its value is not a string, it is assumed to be a concrete integer as before. - "keyword" can now be a dictionary defining tests for individual keys. These tests can either be a type, a concrete value or a regex starting with "re:". Dictionaries can be stacked inside each other. Optional keys can be indicated with a "?" before its name. For example: "keyword:" { "image_id": int, "gallery_id", 123, "name": "re:pattern", "user": { "id": 321, }, "?optional": None, } 2017-12-30 19:05:37 +01:00			`SKIP = {`
			`# don't work on travis-ci`
			`"exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie",`
			`"archivedmoe", "archiveofsins", "thebarchive",`

			`# temporary issues`
update test results nothing broke, but things got updated or changed 2018-01-23 16:54:19 +01:00			`"chronos",`
			`"coreimg",`
[nijie] fix dojin extraction - correctly extract artist_id - set extension to "jpg" if it was empty and let filetype checks do the rest 2018-02-09 21:51:35 +01:00			`"hosturimage",`
implement generic manga-chapter extractor 2018-02-03 23:14:32 +01:00			`"yeet",`
update extractor-unittest capabilities - "count" can now be a string defining a comparison in the form of '<operator> <value>', for example: '> 12' or '!= 1'. If its value is not a string, it is assumed to be a concrete integer as before. - "keyword" can now be a dictionary defining tests for individual keys. These tests can either be a type, a concrete value or a regex starting with "re:". Dictionaries can be stacked inside each other. Optional keys can be indicated with a "?" before its name. For example: "keyword:" { "image_id": int, "gallery_id", 123, "name": "re:pattern", "user": { "id": 321, }, "?optional": None, } 2017-12-30 19:05:37 +01:00			`}`


remove 'unstable' tests 2015-12-15 23:45:40 +01:00			`class TestExtractors(unittest.TestCase):`
testing environment for extractor results 2015-12-12 15:58:07 +01:00
dynamically create extractor testcases 2016-02-18 15:53:53 +01:00			`def setUp(self):`
several changes to make travis build work - fixed html.unescape not being available on Python3.3 - removed inconsistent test result - added username/password pairs for authenticating extractors 2017-01-10 13:41:00 +01:00			`name = "gallerydl"`
			`email = "gallerydl@openaliasbox.org"`
initialize cache-module before running tests 2016-03-08 18:01:35 +01:00			`config.set(("cache", "file"), ":memory:")`
fix some smaller stuff - remove support for old windows config paths - catch exception if cache-database can't be opened - fix username/password settings for unit tests - rename variable 'max_tries' to 'retries' 2017-03-27 13:22:02 +02:00			`config.set(("extractor", "username"), name)`
			`config.set(("extractor", "password"), name)`
several changes to make travis build work - fixed html.unescape not being available on Python3.3 - removed inconsistent test result - added username/password pairs for authenticating extractors 2017-01-10 13:41:00 +01:00			`config.set(("extractor", "nijie", "username"), email)`
			`config.set(("extractor", "seiga", "username"), email)`
re-enable download unit tests 2017-10-25 12:55:36 +02:00			`config.set(("downloader", "part"), False)`
testing environment for extractor results 2015-12-12 15:58:07 +01:00
code cleanup and fixes 2017-07-25 14:59:41 +02:00			`def tearDown(self):`
			`config.clear()`

make extractor unittest discoverable 2017-01-09 12:27:20 +01:00			`def _run_test(self, extr, url, result):`
[deviantart] always download original images Deviation-objects returned by the DeviantArt API don't always contain the URL and metadata of the original image ([1]). Getting this information requires an additional API call [2], which is indicated by the 'is_downloadable' and 'download_filesize' metadata within a deviation-object. [1] https://myria-moon.deviantart.com/art/Aime-Moi-part-en-vadrouille-261986576 [2] https://www.deviantart.com/developers/http/v1/20160316/deviation_download/bed6982b88949bdb08b52cd6763fcafd 2017-10-07 13:07:34 +02:00			`if result:`
			`if "options" in result:`
			`for key, value in result["options"]:`
			`config.set(key.split("."), value)`
			`content = "content" in result`
			`else:`
			`content = False`

re-enable download unit tests 2017-10-25 12:55:36 +02:00			`tjob = job.TestJob(url, content=content)`
fix exception based tests 2017-02-26 02:06:56 +01:00			`self.assertEqual(extr, tjob.extractor.__class__)`
[deviantart] always download original images Deviation-objects returned by the DeviantArt API don't always contain the URL and metadata of the original image ([1]). Getting this information requires an additional API call [2], which is indicated by the 'is_downloadable' and 'download_filesize' metadata within a deviation-object. [1] https://myria-moon.deviantart.com/art/Aime-Moi-part-en-vadrouille-261986576 [2] https://www.deviantart.com/developers/http/v1/20160316/deviation_download/bed6982b88949bdb08b52cd6763fcafd 2017-10-07 13:07:34 +02:00
allow 'only_matching' tests 2017-06-13 23:10:42 +02:00			`if not result:`
			`return`
restore exception-testing to its old form 2017-02-27 23:05:08 +01:00			`if "exception" in result:`
			`self.assertRaises(result["exception"], tjob.run)`
			`return`
[deviantart] always download original images Deviation-objects returned by the DeviantArt API don't always contain the URL and metadata of the original image ([1]). Getting this information requires an additional API call [2], which is indicated by the 'is_downloadable' and 'download_filesize' metadata within a deviation-object. [1] https://myria-moon.deviantart.com/art/Aime-Moi-part-en-vadrouille-261986576 [2] https://www.deviantart.com/developers/http/v1/20160316/deviation_download/bed6982b88949bdb08b52cd6763fcafd 2017-10-07 13:07:34 +02:00
skip tests on 5xx status codes 2017-11-12 20:51:12 +01:00			`try:`
			`tjob.run()`
			`except exception.HttpError as exc:`
			`try:`
			`if 500 <= exc.args[0].response.status_code < 600:`
			`self.skipTest(exc)`
update extractor-unittest capabilities - "count" can now be a string defining a comparison in the form of '<operator> <value>', for example: '> 12' or '!= 1'. If its value is not a string, it is assumed to be a concrete integer as before. - "keyword" can now be a dictionary defining tests for individual keys. These tests can either be a type, a concrete value or a regex starting with "re:". Dictionaries can be stacked inside each other. Optional keys can be indicated with a "?" before its name. For example: "keyword:" { "image_id": int, "gallery_id", 123, "name": "re:pattern", "user": { "id": 321, }, "?optional": None, } 2017-12-30 19:05:37 +01:00			`except AttributeError:`
skip tests on 5xx status codes 2017-11-12 20:51:12 +01:00			`pass`
			`raise`

test archive-id creation and uniqueness 2018-02-12 23:02:09 +01:00			`# test archive-id uniqueness`
			`self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive))`

			`# test extraction results`
make test-parameters optional 2015-12-13 03:56:29 +01:00			`if "url" in result:`
implement support for additional unit test result types - "pattern" matches all resulting URLs against the given regex - "count" allows to specify the amount of returned URLs 2017-08-25 22:01:14 +02:00			`self.assertEqual(result["url"], tjob.hash_url.hexdigest())`
update extractor-unittest capabilities - "count" can now be a string defining a comparison in the form of '<operator> <value>', for example: '> 12' or '!= 1'. If its value is not a string, it is assumed to be a concrete integer as before. - "keyword" can now be a dictionary defining tests for individual keys. These tests can either be a type, a concrete value or a regex starting with "re:". Dictionaries can be stacked inside each other. Optional keys can be indicated with a "?" before its name. For example: "keyword:" { "image_id": int, "gallery_id", 123, "name": "re:pattern", "user": { "id": 321, }, "?optional": None, } 2017-12-30 19:05:37 +01:00
re-enable download unit tests 2017-10-25 12:55:36 +02:00			`if "content" in result:`
			`self.assertEqual(result["content"], tjob.hash_content.hexdigest())`
testing environment for extractor results 2015-12-12 15:58:07 +01:00
update extractor-unittest capabilities - "count" can now be a string defining a comparison in the form of '<operator> <value>', for example: '> 12' or '!= 1'. If its value is not a string, it is assumed to be a concrete integer as before. - "keyword" can now be a dictionary defining tests for individual keys. These tests can either be a type, a concrete value or a regex starting with "re:". Dictionaries can be stacked inside each other. Optional keys can be indicated with a "?" before its name. For example: "keyword:" { "image_id": int, "gallery_id", 123, "name": "re:pattern", "user": { "id": 321, }, "?optional": None, } 2017-12-30 19:05:37 +01:00			`if "keyword" in result:`
			`keyword = result["keyword"]`
			`if isinstance(keyword, dict):`
			`for kwdict in tjob.list_keyword:`
			`self._test_kwdict(kwdict, keyword)`
			`else: # assume SHA1 hash`
			`self.assertEqual(keyword, tjob.hash_keyword.hexdigest())`
dynamically create extractor testcases 2016-02-18 15:53:53 +01:00
update extractor-unittest capabilities - "count" can now be a string defining a comparison in the form of '<operator> <value>', for example: '> 12' or '!= 1'. If its value is not a string, it is assumed to be a concrete integer as before. - "keyword" can now be a dictionary defining tests for individual keys. These tests can either be a type, a concrete value or a regex starting with "re:". Dictionaries can be stacked inside each other. Optional keys can be indicated with a "?" before its name. For example: "keyword:" { "image_id": int, "gallery_id", 123, "name": "re:pattern", "user": { "id": 321, }, "?optional": None, } 2017-12-30 19:05:37 +01:00			`if "count" in result:`
			`count = result["count"]`
			`if isinstance(count, str):`
			`self.assertRegex(count, r"^ (==\|!=\|<\|<=\|>\|>=) \d+ *$")`
			`expr = "{} {}".format(len(tjob.list_url), count)`
			`self.assertTrue(eval(expr), msg=expr)`
			`else: # assume integer`
			`self.assertEqual(len(tjob.list_url), count)`
dynamically create extractor testcases 2016-02-18 15:53:53 +01:00
update extractor-unittest capabilities - "count" can now be a string defining a comparison in the form of '<operator> <value>', for example: '> 12' or '!= 1'. If its value is not a string, it is assumed to be a concrete integer as before. - "keyword" can now be a dictionary defining tests for individual keys. These tests can either be a type, a concrete value or a regex starting with "re:". Dictionaries can be stacked inside each other. Optional keys can be indicated with a "?" before its name. For example: "keyword:" { "image_id": int, "gallery_id", 123, "name": "re:pattern", "user": { "id": 321, }, "?optional": None, } 2017-12-30 19:05:37 +01:00			`if "pattern" in result:`
			`for url in tjob.list_url:`
			`self.assertRegex(url, result["pattern"])`
code adjustments according to pep8 2017-01-30 19:40:15 +01:00
update extractor-unittest capabilities - "count" can now be a string defining a comparison in the form of '<operator> <value>', for example: '> 12' or '!= 1'. If its value is not a string, it is assumed to be a concrete integer as before. - "keyword" can now be a dictionary defining tests for individual keys. These tests can either be a type, a concrete value or a regex starting with "re:". Dictionaries can be stacked inside each other. Optional keys can be indicated with a "?" before its name. For example: "keyword:" { "image_id": int, "gallery_id", 123, "name": "re:pattern", "user": { "id": 321, }, "?optional": None, } 2017-12-30 19:05:37 +01:00			`def _test_kwdict(self, kwdict, tests):`
			`for key, test in tests.items():`
			`if key.startswith("?"):`
			`key = key[1:]`
			`if key not in kwdict:`
			`continue`
			`self.assertIn(key, kwdict)`
			`value = kwdict[key]`

			`if isinstance(test, dict):`
			`self._test_kwdict(kwdict[key], test)`
			`continue`
			`elif isinstance(test, type):`
			`self.assertIsInstance(value, test)`
			`elif isinstance(test, str) and value.startswith("re:"):`
			`self.assertRegex(value, test[3:])`
			`else:`
			`self.assertEqual(value, test)`


			`def generate_tests():`
			`"""Dynamically generate extractor unittests"""`
			`def _generate_test(extr, tcase):`
			`def test(self):`
			`url, result = tcase`
			`print("\n", url, sep="")`
			`self._run_test(extr, url, result)`
			`return test`

			`# enable selective testing for direct calls`
			`if __name__ == '__main__' and len(sys.argv) > 1:`
			`if sys.argv[1].lower() == "all":`
			`extractors = extractor.extractors()`
			`else:`
			`extractors = [`
			`extr for extr in extractor.extractors()`
			`if extr.category in sys.argv or`
			`hasattr(extr, "basecategory") and extr.basecategory in sys.argv`
			`]`
			`del sys.argv[1:]`
enable extractor tests without filters $ python test_extractors.py all 2017-07-02 08:15:12 +02:00			`else:`
			`extractors = [`
			`extr for extr in extractor.extractors()`
update extractor-unittest capabilities - "count" can now be a string defining a comparison in the form of '<operator> <value>', for example: '> 12' or '!= 1'. If its value is not a string, it is assumed to be a concrete integer as before. - "keyword" can now be a dictionary defining tests for individual keys. These tests can either be a type, a concrete value or a regex starting with "re:". Dictionaries can be stacked inside each other. Optional keys can be indicated with a "?" before its name. For example: "keyword:" { "image_id": int, "gallery_id", 123, "name": "re:pattern", "user": { "id": 321, }, "?optional": None, } 2017-12-30 19:05:37 +01:00			`if extr.category not in SKIP`
enable extractor tests without filters $ python test_extractors.py all 2017-07-02 08:15:12 +02:00			`]`
dynamically create extractor testcases 2016-02-18 15:53:53 +01:00
update extractor-unittest capabilities - "count" can now be a string defining a comparison in the form of '<operator> <value>', for example: '> 12' or '!= 1'. If its value is not a string, it is assumed to be a concrete integer as before. - "keyword" can now be a dictionary defining tests for individual keys. These tests can either be a type, a concrete value or a regex starting with "re:". Dictionaries can be stacked inside each other. Optional keys can be indicated with a "?" before its name. For example: "keyword:" { "image_id": int, "gallery_id", 123, "name": "re:pattern", "user": { "id": 321, }, "?optional": None, } 2017-12-30 19:05:37 +01:00			`for extr in extractors:`
			`if not hasattr(extr, "test") or not extr.test:`
			`continue`
make extractor unittest discoverable 2017-01-09 12:27:20 +01:00			`name = "test_" + extr.__name__ + "_"`
			`for num, tcase in enumerate(extr.test, 1):`
			`test = _generate_test(extr, tcase)`
			`test.__name__ = name + str(num)`
			`setattr(TestExtractors, test.__name__, test)`

update extractor-unittest capabilities - "count" can now be a string defining a comparison in the form of '<operator> <value>', for example: '> 12' or '!= 1'. If its value is not a string, it is assumed to be a concrete integer as before. - "keyword" can now be a dictionary defining tests for individual keys. These tests can either be a type, a concrete value or a regex starting with "re:". Dictionaries can be stacked inside each other. Optional keys can be indicated with a "?" before its name. For example: "keyword:" { "image_id": int, "gallery_id", 123, "name": "re:pattern", "user": { "id": 321, }, "?optional": None, } 2017-12-30 19:05:37 +01:00
			`generate_tests()`
testing environment for extractor results 2015-12-12 15:58:07 +01:00			`if __name__ == '__main__':`
			`unittest.main(warnings='ignore')`