2015-12-12 15:58:07 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2023-09-13 14:54:25 +02:00
|
|
|
# Copyright 2015-2023 Mike Fährmann
|
2015-12-12 15:58:07 +01:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2018-03-13 13:11:10 +01:00
|
|
|
import os
|
2017-01-10 13:41:00 +01:00
|
|
|
import sys
|
2020-05-02 01:15:50 +02:00
|
|
|
import unittest
|
|
|
|
|
2018-07-19 18:47:23 +02:00
|
|
|
import re
|
2019-02-17 18:15:40 +01:00
|
|
|
import json
|
|
|
|
import hashlib
|
2020-02-23 16:48:30 +01:00
|
|
|
import datetime
|
2023-09-13 14:54:25 +02:00
|
|
|
import collections
|
2020-05-02 01:15:50 +02:00
|
|
|
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
2021-09-28 23:35:29 +02:00
|
|
|
from gallery_dl import \
|
|
|
|
extractor, util, job, config, exception, formatter # noqa E402
|
2023-09-13 14:54:25 +02:00
|
|
|
from test import results # noqa E402
|
2017-01-09 12:27:20 +01:00
|
|
|
|
2015-12-12 15:58:07 +01:00
|
|
|
|
2018-03-13 13:11:10 +01:00
|
|
|
# temporary issues, etc.
|
|
|
|
BROKEN = {
|
2019-12-07 22:07:55 +01:00
|
|
|
"photobucket",
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
}
|
|
|
|
|
2024-01-12 03:21:44 +01:00
|
|
|
CONFIG = {
|
|
|
|
"cache": {
|
|
|
|
"file": None,
|
|
|
|
},
|
|
|
|
"downloader": {
|
|
|
|
"adjust-extensions": False,
|
|
|
|
"part": False,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-01-14 22:09:26 +01:00
|
|
|
AUTH = {
|
|
|
|
"pixiv",
|
|
|
|
"nijie",
|
|
|
|
"horne",
|
2024-01-20 18:02:36 +01:00
|
|
|
"reddit",
|
2024-01-14 22:09:26 +01:00
|
|
|
"seiga",
|
2024-01-20 18:02:36 +01:00
|
|
|
"fantia",
|
2024-01-14 22:09:26 +01:00
|
|
|
"instagram",
|
|
|
|
"twitter",
|
|
|
|
}
|
|
|
|
|
2024-02-12 23:39:45 +01:00
|
|
|
AUTH_CONFIG = (
|
|
|
|
"username",
|
|
|
|
"cookies",
|
|
|
|
"api-key",
|
|
|
|
"client-id",
|
|
|
|
"refresh-token",
|
|
|
|
)
|
|
|
|
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
|
2018-03-19 17:57:32 +01:00
|
|
|
class TestExtractorResults(unittest.TestCase):
|
2015-12-12 15:58:07 +01:00
|
|
|
|
2016-02-18 15:53:53 +01:00
|
|
|
def setUp(self):
|
2018-11-15 14:24:18 +01:00
|
|
|
setup_test_config()
|
2015-12-12 15:58:07 +01:00
|
|
|
|
2017-07-25 14:59:41 +02:00
|
|
|
def tearDown(self):
|
|
|
|
config.clear()
|
|
|
|
|
2019-06-01 17:15:32 +02:00
|
|
|
@classmethod
|
|
|
|
def setUpClass(cls):
|
|
|
|
cls._skipped = []
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def tearDownClass(cls):
|
|
|
|
if cls._skipped:
|
|
|
|
print("\n\nSkipped tests:")
|
|
|
|
for url, exc in cls._skipped:
|
|
|
|
print('- {} ("{}")'.format(url, exc))
|
|
|
|
|
2023-09-23 17:54:53 +02:00
|
|
|
def assertRange(self, value, range, msg=None):
|
|
|
|
if range.step > 1:
|
|
|
|
self.assertIn(value, range, msg=msg)
|
|
|
|
else:
|
|
|
|
self.assertLessEqual(value, range.stop, msg=msg)
|
|
|
|
self.assertGreaterEqual(value, range.start, msg=msg)
|
|
|
|
|
2023-09-13 14:54:25 +02:00
|
|
|
def _run_test(self, result):
|
|
|
|
result.pop("#comment", None)
|
|
|
|
only_matching = (len(result) <= 3)
|
|
|
|
|
|
|
|
if only_matching:
|
|
|
|
content = False
|
|
|
|
else:
|
|
|
|
if "#options" in result:
|
|
|
|
for key, value in result["#options"].items():
|
2019-11-23 23:50:16 +01:00
|
|
|
key = key.split(".")
|
|
|
|
config.set(key[:-1], key[-1], value)
|
2024-01-14 22:09:26 +01:00
|
|
|
|
2024-02-12 23:39:45 +01:00
|
|
|
auth = result.get("#auth")
|
|
|
|
if auth is None:
|
|
|
|
auth = (result["#category"][1] in AUTH)
|
|
|
|
elif not auth:
|
|
|
|
for key in AUTH_CONFIG:
|
|
|
|
config.set((), key, None)
|
|
|
|
|
|
|
|
if auth:
|
2024-01-14 22:09:26 +01:00
|
|
|
extr = result["#class"].from_url(result["#url"])
|
2024-02-12 23:39:45 +01:00
|
|
|
if not any(extr.config(key) for key in AUTH_CONFIG):
|
2024-01-14 22:09:26 +01:00
|
|
|
msg = "no auth"
|
|
|
|
self._skipped.append((result["#url"], msg))
|
|
|
|
self.skipTest(msg)
|
|
|
|
|
2023-09-13 14:54:25 +02:00
|
|
|
if "#range" in result:
|
|
|
|
config.set((), "image-range" , result["#range"])
|
|
|
|
config.set((), "chapter-range", result["#range"])
|
|
|
|
content = ("#sha1_content" in result)
|
2017-10-07 13:07:34 +02:00
|
|
|
|
2023-09-13 14:54:25 +02:00
|
|
|
tjob = ResultJob(result["#url"], content=content)
|
2023-10-01 13:52:00 +02:00
|
|
|
self.assertEqual(result["#class"], tjob.extractor.__class__, "#class")
|
2017-10-07 13:07:34 +02:00
|
|
|
|
2023-09-13 14:54:25 +02:00
|
|
|
if only_matching:
|
2017-06-13 23:10:42 +02:00
|
|
|
return
|
2023-09-13 14:54:25 +02:00
|
|
|
|
|
|
|
if "#exception" in result:
|
2023-10-01 13:52:00 +02:00
|
|
|
with self.assertRaises(result["#exception"], msg="#exception"):
|
2019-05-13 11:48:20 +02:00
|
|
|
tjob.run()
|
2017-02-27 23:05:08 +01:00
|
|
|
return
|
2023-09-13 14:54:25 +02:00
|
|
|
|
2017-11-12 20:51:12 +01:00
|
|
|
try:
|
|
|
|
tjob.run()
|
2018-08-15 20:41:53 +02:00
|
|
|
except exception.StopExtraction:
|
|
|
|
pass
|
2017-11-12 20:51:12 +01:00
|
|
|
except exception.HttpError as exc:
|
2019-06-01 17:15:32 +02:00
|
|
|
exc = str(exc)
|
2019-11-20 21:45:48 +01:00
|
|
|
if re.match(r"'5\d\d ", exc) or \
|
2019-06-01 17:15:32 +02:00
|
|
|
re.search(r"\bRead timed out\b", exc):
|
2023-09-13 14:54:25 +02:00
|
|
|
self._skipped.append((result["#url"], exc))
|
2018-07-19 18:47:23 +02:00
|
|
|
self.skipTest(exc)
|
2017-11-12 20:51:12 +01:00
|
|
|
raise
|
|
|
|
|
2023-09-13 14:54:25 +02:00
|
|
|
if result.get("#archive", True):
|
2019-11-10 17:03:38 +01:00
|
|
|
self.assertEqual(
|
|
|
|
len(set(tjob.archive_list)),
|
|
|
|
len(tjob.archive_list),
|
2023-10-01 13:52:00 +02:00
|
|
|
msg="archive-id uniqueness")
|
2018-02-12 23:02:09 +01:00
|
|
|
|
2019-02-17 18:15:40 +01:00
|
|
|
if tjob.queue:
|
2019-08-14 12:28:21 +02:00
|
|
|
# test '_extractor' entries
|
2019-10-29 15:46:35 +01:00
|
|
|
for url, kwdict in zip(tjob.url_list, tjob.kwdict_list):
|
2019-02-17 18:15:40 +01:00
|
|
|
if "_extractor" in kwdict:
|
|
|
|
extr = kwdict["_extractor"].from_url(url)
|
2023-09-13 14:54:25 +02:00
|
|
|
if extr is None and not result.get("#extractor", True):
|
2021-09-25 23:55:52 +02:00
|
|
|
continue
|
2019-02-17 18:15:40 +01:00
|
|
|
self.assertIsInstance(extr, kwdict["_extractor"])
|
|
|
|
self.assertEqual(extr.url, url)
|
2019-08-14 12:28:21 +02:00
|
|
|
else:
|
|
|
|
# test 'extension' entries
|
2019-10-29 15:46:35 +01:00
|
|
|
for kwdict in tjob.kwdict_list:
|
2023-10-01 13:52:00 +02:00
|
|
|
self.assertIn("extension", kwdict, msg="#extension")
|
2019-02-17 18:15:40 +01:00
|
|
|
|
2018-02-12 23:02:09 +01:00
|
|
|
# test extraction results
|
2023-09-13 14:54:25 +02:00
|
|
|
if "#sha1_url" in result:
|
|
|
|
self.assertEqual(
|
2023-10-01 13:52:00 +02:00
|
|
|
result["#sha1_url"],
|
|
|
|
tjob.url_hash.hexdigest(),
|
|
|
|
msg="#sha1_url")
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
|
2023-09-13 14:54:25 +02:00
|
|
|
if "#sha1_content" in result:
|
|
|
|
expected = result["#sha1_content"]
|
2020-01-15 23:46:37 +01:00
|
|
|
digest = tjob.content_hash.hexdigest()
|
|
|
|
if isinstance(expected, str):
|
2023-10-01 13:52:00 +02:00
|
|
|
self.assertEqual(expected, digest, msg="#sha1_content")
|
2023-09-13 14:54:25 +02:00
|
|
|
else: # iterable
|
2023-10-01 13:52:00 +02:00
|
|
|
self.assertIn(digest, expected, msg="#sha1_content")
|
2023-09-13 14:54:25 +02:00
|
|
|
|
|
|
|
if "#sha1_metadata" in result:
|
|
|
|
self.assertEqual(
|
2023-10-01 13:52:00 +02:00
|
|
|
result["#sha1_metadata"],
|
|
|
|
tjob.kwdict_hash.hexdigest(),
|
|
|
|
"#sha1_metadata")
|
2023-09-13 14:54:25 +02:00
|
|
|
|
|
|
|
if "#count" in result:
|
|
|
|
count = result["#count"]
|
2023-09-23 17:54:53 +02:00
|
|
|
len_urls = len(tjob.url_list)
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
if isinstance(count, str):
|
2023-10-01 13:52:00 +02:00
|
|
|
self.assertRegex(
|
|
|
|
count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$", msg="#count")
|
2023-09-23 17:54:53 +02:00
|
|
|
expr = "{} {}".format(len_urls, count)
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
self.assertTrue(eval(expr), msg=expr)
|
2023-09-23 17:54:53 +02:00
|
|
|
elif isinstance(count, range):
|
2023-10-01 13:52:00 +02:00
|
|
|
self.assertRange(len_urls, count, msg="#count")
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
else: # assume integer
|
2023-10-01 13:52:00 +02:00
|
|
|
self.assertEqual(len_urls, count, msg="#count")
|
2016-02-18 15:53:53 +01:00
|
|
|
|
2023-09-13 14:54:25 +02:00
|
|
|
if "#pattern" in result:
|
2023-10-01 13:52:00 +02:00
|
|
|
self.assertGreater(len(tjob.url_list), 0, msg="#pattern")
|
2023-09-28 21:56:09 +02:00
|
|
|
pattern = result["#pattern"]
|
|
|
|
if isinstance(pattern, str):
|
|
|
|
for url in tjob.url_list:
|
|
|
|
self.assertRegex(url, pattern, msg="#pattern")
|
|
|
|
else:
|
|
|
|
for url, pat in zip(tjob.url_list, pattern):
|
|
|
|
self.assertRegex(url, pat, msg="#pattern")
|
2023-09-13 14:54:25 +02:00
|
|
|
|
2023-09-23 19:26:54 +02:00
|
|
|
if "#urls" in result:
|
|
|
|
expected = result["#urls"]
|
|
|
|
if isinstance(expected, str):
|
2023-10-01 13:52:00 +02:00
|
|
|
self.assertEqual(tjob.url_list[0], expected, msg="#urls")
|
|
|
|
else:
|
|
|
|
self.assertSequenceEqual(tjob.url_list, expected, msg="#urls")
|
2023-09-23 19:26:54 +02:00
|
|
|
|
2023-09-13 14:54:25 +02:00
|
|
|
metadata = {k: v for k, v in result.items() if k[0] != "#"}
|
|
|
|
if metadata:
|
|
|
|
for kwdict in tjob.kwdict_list:
|
|
|
|
self._test_kwdict(kwdict, metadata)
|
2017-01-30 19:40:15 +01:00
|
|
|
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
def _test_kwdict(self, kwdict, tests):
|
|
|
|
for key, test in tests.items():
|
|
|
|
if key.startswith("?"):
|
|
|
|
key = key[1:]
|
|
|
|
if key not in kwdict:
|
|
|
|
continue
|
2023-10-01 13:52:00 +02:00
|
|
|
self.assertIn(key, kwdict, msg=key)
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
value = kwdict[key]
|
|
|
|
|
|
|
|
if isinstance(test, dict):
|
2019-01-01 15:39:34 +01:00
|
|
|
self._test_kwdict(value, test)
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
elif isinstance(test, type):
|
2019-01-01 15:39:34 +01:00
|
|
|
self.assertIsInstance(value, test, msg=key)
|
2023-09-23 17:54:53 +02:00
|
|
|
elif isinstance(test, range):
|
|
|
|
self.assertRange(value, test, msg=key)
|
2021-11-21 22:46:34 +01:00
|
|
|
elif isinstance(test, list):
|
|
|
|
subtest = False
|
|
|
|
for idx, item in enumerate(test):
|
|
|
|
if isinstance(item, dict):
|
|
|
|
subtest = True
|
|
|
|
self._test_kwdict(value[idx], item)
|
|
|
|
if not subtest:
|
2023-11-27 01:02:39 +01:00
|
|
|
self.assertEqual(test, value, msg=key)
|
2019-04-29 17:27:59 +02:00
|
|
|
elif isinstance(test, str):
|
|
|
|
if test.startswith("re:"):
|
|
|
|
self.assertRegex(value, test[3:], msg=key)
|
2020-02-23 16:48:30 +01:00
|
|
|
elif test.startswith("dt:"):
|
|
|
|
self.assertIsInstance(value, datetime.datetime, msg=key)
|
2023-11-27 01:02:39 +01:00
|
|
|
self.assertEqual(test[3:], str(value), msg=key)
|
2019-04-29 17:27:59 +02:00
|
|
|
elif test.startswith("type:"):
|
2023-11-27 01:02:39 +01:00
|
|
|
self.assertEqual(test[5:], type(value).__name__, msg=key)
|
2023-10-17 19:23:48 +02:00
|
|
|
elif test.startswith("len:"):
|
|
|
|
self.assertIsInstance(value, (list, tuple), msg=key)
|
2023-11-27 01:02:39 +01:00
|
|
|
self.assertEqual(int(test[4:]), len(value), msg=key)
|
2019-04-29 17:27:59 +02:00
|
|
|
else:
|
2023-11-27 01:02:39 +01:00
|
|
|
self.assertEqual(test, value, msg=key)
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
else:
|
2023-11-27 01:02:39 +01:00
|
|
|
self.assertEqual(test, value, msg=key)
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
|
|
|
|
|
2019-02-17 18:15:40 +01:00
|
|
|
class ResultJob(job.DownloadJob):
|
|
|
|
"""Generate test-results for extractor runs"""
|
|
|
|
|
|
|
|
def __init__(self, url, parent=None, content=False):
|
|
|
|
job.DownloadJob.__init__(self, url, parent)
|
|
|
|
self.queue = False
|
|
|
|
self.content = content
|
2019-10-29 15:46:35 +01:00
|
|
|
|
|
|
|
self.url_list = []
|
|
|
|
self.url_hash = hashlib.sha1()
|
|
|
|
self.kwdict_list = []
|
|
|
|
self.kwdict_hash = hashlib.sha1()
|
|
|
|
self.archive_list = []
|
|
|
|
self.archive_hash = hashlib.sha1()
|
|
|
|
self.content_hash = hashlib.sha1()
|
2019-06-29 15:39:52 +02:00
|
|
|
|
2019-02-17 18:15:40 +01:00
|
|
|
if content:
|
2019-10-29 15:46:35 +01:00
|
|
|
self.fileobj = TestPathfmt(self.content_hash)
|
2023-09-30 21:00:55 +02:00
|
|
|
else:
|
|
|
|
self._update_content = lambda url, kwdict: None
|
2019-02-17 18:15:40 +01:00
|
|
|
|
2019-06-25 18:11:13 +02:00
|
|
|
self.format_directory = TestFormatter(
|
2019-10-29 15:46:35 +01:00
|
|
|
"".join(self.extractor.directory_fmt)).format_map
|
|
|
|
self.format_filename = TestFormatter(
|
|
|
|
self.extractor.filename_fmt).format_map
|
2019-06-25 18:11:13 +02:00
|
|
|
|
2019-02-17 18:15:40 +01:00
|
|
|
def run(self):
|
2023-09-21 23:14:08 +02:00
|
|
|
self._init()
|
2019-02-17 18:15:40 +01:00
|
|
|
for msg in self.extractor:
|
|
|
|
self.dispatch(msg)
|
|
|
|
|
2019-10-29 15:46:35 +01:00
|
|
|
def handle_url(self, url, kwdict, fallback=None):
|
|
|
|
self._update_url(url)
|
|
|
|
self._update_kwdict(kwdict)
|
|
|
|
self._update_archive(kwdict)
|
2019-11-19 23:50:54 +01:00
|
|
|
self._update_content(url, kwdict)
|
2019-10-29 15:46:35 +01:00
|
|
|
self.format_filename(kwdict)
|
2019-02-17 18:15:40 +01:00
|
|
|
|
2019-10-29 15:46:35 +01:00
|
|
|
def handle_directory(self, kwdict):
|
|
|
|
self._update_kwdict(kwdict, False)
|
|
|
|
self.format_directory(kwdict)
|
2019-02-17 18:15:40 +01:00
|
|
|
|
2020-11-24 13:34:54 +01:00
|
|
|
def handle_metadata(self, kwdict):
|
|
|
|
pass
|
|
|
|
|
2019-10-29 15:46:35 +01:00
|
|
|
def handle_queue(self, url, kwdict):
|
2019-02-17 18:15:40 +01:00
|
|
|
self.queue = True
|
2019-10-29 15:46:35 +01:00
|
|
|
self._update_url(url)
|
|
|
|
self._update_kwdict(kwdict)
|
2019-02-17 18:15:40 +01:00
|
|
|
|
2019-10-29 15:46:35 +01:00
|
|
|
def _update_url(self, url):
|
|
|
|
self.url_list.append(url)
|
|
|
|
self.url_hash.update(url.encode())
|
2019-02-17 18:15:40 +01:00
|
|
|
|
2019-10-29 15:46:35 +01:00
|
|
|
def _update_kwdict(self, kwdict, to_list=True):
|
2019-02-17 18:15:40 +01:00
|
|
|
if to_list:
|
2019-10-29 15:46:35 +01:00
|
|
|
self.kwdict_list.append(kwdict.copy())
|
2019-11-21 16:57:39 +01:00
|
|
|
kwdict = util.filter_dict(kwdict)
|
2019-10-29 15:46:35 +01:00
|
|
|
self.kwdict_hash.update(
|
2019-02-17 18:15:40 +01:00
|
|
|
json.dumps(kwdict, sort_keys=True, default=str).encode())
|
|
|
|
|
2019-10-29 15:46:35 +01:00
|
|
|
def _update_archive(self, kwdict):
|
2019-02-17 18:15:40 +01:00
|
|
|
archive_id = self.extractor.archive_fmt.format_map(kwdict)
|
2019-10-29 15:46:35 +01:00
|
|
|
self.archive_list.append(archive_id)
|
|
|
|
self.archive_hash.update(archive_id.encode())
|
2019-02-17 18:15:40 +01:00
|
|
|
|
2019-11-19 23:50:54 +01:00
|
|
|
def _update_content(self, url, kwdict):
|
2023-09-30 21:00:55 +02:00
|
|
|
self.fileobj.kwdict = kwdict
|
|
|
|
|
|
|
|
downloader = self.get_downloader(url.partition(":")[0])
|
|
|
|
if downloader.download(url, self.fileobj):
|
|
|
|
return
|
|
|
|
|
|
|
|
for num, url in enumerate(kwdict.get("_fallback") or (), 1):
|
|
|
|
self.log.warning("Trying fallback URL #%d", num)
|
|
|
|
downloader = self.get_downloader(url.partition(":")[0])
|
|
|
|
if downloader.download(url, self.fileobj):
|
|
|
|
return
|
2019-02-17 18:15:40 +01:00
|
|
|
|
|
|
|
|
2019-06-25 18:11:13 +02:00
|
|
|
class TestPathfmt():
|
2019-02-17 18:15:40 +01:00
|
|
|
|
|
|
|
def __init__(self, hashobj):
|
|
|
|
self.hashobj = hashobj
|
|
|
|
self.path = ""
|
|
|
|
self.size = 0
|
2019-08-12 21:40:37 +02:00
|
|
|
self.kwdict = {}
|
|
|
|
self.extension = "jpg"
|
2019-02-17 18:15:40 +01:00
|
|
|
|
|
|
|
def __enter__(self):
|
|
|
|
return self
|
|
|
|
|
|
|
|
def __exit__(self, *args):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def open(self, mode):
|
|
|
|
self.size = 0
|
|
|
|
return self
|
|
|
|
|
|
|
|
def write(self, content):
|
|
|
|
"""Update SHA1 hash"""
|
|
|
|
self.size += len(content)
|
|
|
|
self.hashobj.update(content)
|
|
|
|
|
|
|
|
def tell(self):
|
|
|
|
return self.size
|
|
|
|
|
|
|
|
def part_size(self):
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
2021-09-28 23:35:29 +02:00
|
|
|
class TestFormatter(formatter.StringFormatter):
|
2019-06-25 18:11:13 +02:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _noop(_):
|
|
|
|
return ""
|
|
|
|
|
|
|
|
def _apply_simple(self, key, fmt):
|
2021-09-29 23:38:20 +02:00
|
|
|
if key == "extension" or "_parse_optional." in repr(fmt):
|
2019-06-25 18:11:13 +02:00
|
|
|
return self._noop
|
|
|
|
|
|
|
|
def wrap(obj):
|
|
|
|
return fmt(obj[key])
|
|
|
|
return wrap
|
|
|
|
|
|
|
|
def _apply(self, key, funcs, fmt):
|
2021-09-29 23:38:20 +02:00
|
|
|
if key == "extension" or "_parse_optional." in repr(fmt):
|
2019-06-25 18:11:13 +02:00
|
|
|
return self._noop
|
|
|
|
|
|
|
|
def wrap(obj):
|
|
|
|
obj = obj[key]
|
|
|
|
for func in funcs:
|
|
|
|
obj = func(obj)
|
|
|
|
return fmt(obj)
|
|
|
|
return wrap
|
|
|
|
|
|
|
|
|
2019-02-17 18:15:40 +01:00
|
|
|
def setup_test_config():
|
2024-01-12 03:21:44 +01:00
|
|
|
config._config.update(CONFIG)
|
|
|
|
|
|
|
|
|
|
|
|
def load_test_config():
|
|
|
|
try:
|
|
|
|
path = os.path.join(
|
|
|
|
os.path.dirname(os.path.dirname(__file__)),
|
|
|
|
"archive", "config.json")
|
|
|
|
with open(path) as fp:
|
|
|
|
CONFIG.update(json.loads(fp.read()))
|
|
|
|
except FileNotFoundError:
|
|
|
|
pass
|
|
|
|
except Exception as exc:
|
2024-01-14 22:09:26 +01:00
|
|
|
sys.exit("Error when loading {}: {}: {}".format(
|
2024-01-12 03:21:44 +01:00
|
|
|
path, exc.__class__.__name__, exc))
|
2019-02-17 18:15:40 +01:00
|
|
|
|
|
|
|
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
def generate_tests():
|
|
|
|
"""Dynamically generate extractor unittests"""
|
2023-09-13 14:54:25 +02:00
|
|
|
def _generate_method(result):
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
def test(self):
|
2023-09-13 14:54:25 +02:00
|
|
|
print("\n" + result["#url"])
|
2023-11-24 00:43:07 +01:00
|
|
|
try:
|
|
|
|
self._run_test(result)
|
|
|
|
except KeyboardInterrupt as exc:
|
|
|
|
v = input("\n[e]xit | [f]ail | [S]kip ? ").strip().lower()
|
|
|
|
if v in ("e", "exit"):
|
|
|
|
raise
|
|
|
|
if v in ("f", "fail"):
|
|
|
|
self.fail("manual test failure")
|
|
|
|
else:
|
2024-01-20 18:02:36 +01:00
|
|
|
self._skipped.append((result["#url"], "manual skip"))
|
2023-11-24 00:43:07 +01:00
|
|
|
self.skipTest(exc)
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
return test
|
|
|
|
|
|
|
|
# enable selective testing for direct calls
|
2023-09-13 14:54:25 +02:00
|
|
|
if __name__ == "__main__" and len(sys.argv) > 1:
|
|
|
|
category, _, subcategory = sys.argv[1].partition(":")
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
del sys.argv[1:]
|
2023-09-13 14:54:25 +02:00
|
|
|
|
2023-10-01 13:31:23 +02:00
|
|
|
if category.startswith("+"):
|
|
|
|
basecategory = category[1:].lower()
|
|
|
|
tests = [t for t in results.all()
|
|
|
|
if t["#category"][0].lower() == basecategory]
|
|
|
|
else:
|
|
|
|
tests = results.category(category)
|
|
|
|
|
2023-09-13 14:54:25 +02:00
|
|
|
if subcategory:
|
|
|
|
tests = [t for t in tests if t["#category"][-1] == subcategory]
|
2017-07-02 08:15:12 +02:00
|
|
|
else:
|
2023-09-13 14:54:25 +02:00
|
|
|
tests = results.all()
|
2018-03-13 13:11:10 +01:00
|
|
|
|
2018-03-19 17:57:32 +01:00
|
|
|
# add 'test_...' methods
|
2023-09-13 14:54:25 +02:00
|
|
|
enum = collections.defaultdict(int)
|
|
|
|
for result in tests:
|
|
|
|
name = "{1}_{2}".format(*result["#category"])
|
|
|
|
enum[name] += 1
|
|
|
|
|
|
|
|
method = _generate_method(result)
|
2024-01-12 03:23:21 +01:00
|
|
|
method.__doc__ = result["#url"]
|
2023-09-13 14:54:25 +02:00
|
|
|
method.__name__ = "test_{}_{}".format(name, enum[name])
|
|
|
|
setattr(TestExtractorResults, method.__name__, method)
|
2017-01-09 12:27:20 +01:00
|
|
|
|
update extractor-unittest capabilities
- "count" can now be a string defining a comparison in the form of
'<operator> <value>', for example: '> 12' or '!= 1'. If its value
is not a string, it is assumed to be a concrete integer as before.
- "keyword" can now be a dictionary defining tests for individual keys.
These tests can either be a type, a concrete value or a regex
starting with "re:". Dictionaries can be stacked inside each other.
Optional keys can be indicated with a "?" before its name.
For example:
"keyword:" {
"image_id": int,
"gallery_id", 123,
"name": "re:pattern",
"user": {
"id": 321,
},
"?optional": None,
}
2017-12-30 19:05:37 +01:00
|
|
|
|
|
|
|
generate_tests()
|
2023-09-13 14:54:25 +02:00
|
|
|
if __name__ == "__main__":
|
2024-01-14 22:09:26 +01:00
|
|
|
load_test_config()
|
2023-09-13 14:54:25 +02:00
|
|
|
unittest.main(warnings="ignore")
|