2018-03-24 17:24:34 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2023-01-08 15:35:05 +01:00
|
|
|
# Copyright 2018-2023 Mike Fährmann
|
2018-03-24 17:24:34 +01:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2020-05-02 01:15:50 +02:00
|
|
|
import os
|
2018-03-24 17:24:34 +01:00
|
|
|
import sys
|
2020-05-02 01:15:50 +02:00
|
|
|
import unittest
|
|
|
|
from unittest.mock import patch
|
|
|
|
|
2020-04-07 20:24:56 +02:00
|
|
|
import time
|
2018-08-15 20:39:13 +02:00
|
|
|
import string
|
2020-04-07 20:24:56 +02:00
|
|
|
from datetime import datetime, timedelta
|
|
|
|
|
2020-05-02 01:15:50 +02:00
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from gallery_dl import extractor # noqa E402
|
|
|
|
from gallery_dl.extractor import mastodon # noqa E402
|
|
|
|
from gallery_dl.extractor.common import Extractor, Message # noqa E402
|
|
|
|
from gallery_dl.extractor.directlink import DirectlinkExtractor # noqa E402
|
2018-03-24 17:24:34 +01:00
|
|
|
|
2020-10-25 15:39:52 +01:00
|
|
|
_list_classes = extractor._list_classes
|
|
|
|
|
2018-03-24 17:24:34 +01:00
|
|
|
|
|
|
|
class FakeExtractor(Extractor):
|
|
|
|
category = "fake"
|
|
|
|
subcategory = "test"
|
2019-02-08 13:45:40 +01:00
|
|
|
pattern = "fake:"
|
2018-03-24 17:24:34 +01:00
|
|
|
|
|
|
|
def items(self):
|
|
|
|
yield Message.Version, 1
|
|
|
|
yield Message.Url, "text:foobar", {}
|
|
|
|
|
|
|
|
|
2020-04-07 20:24:56 +02:00
|
|
|
class TestExtractorModule(unittest.TestCase):
|
2019-02-09 14:39:38 +01:00
|
|
|
VALID_URIS = (
|
|
|
|
"https://example.org/file.jpg",
|
|
|
|
"tumblr:foobar",
|
|
|
|
"oauth:flickr",
|
|
|
|
"test:pixiv:",
|
|
|
|
"recursive:https://example.org/document.html",
|
|
|
|
)
|
2018-03-24 17:24:34 +01:00
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
extractor._cache.clear()
|
2023-01-30 20:07:18 +01:00
|
|
|
extractor._module_iter = extractor._modules_internal()
|
2020-10-25 15:39:52 +01:00
|
|
|
extractor._list_classes = _list_classes
|
2018-03-24 17:24:34 +01:00
|
|
|
|
|
|
|
def test_find(self):
|
2019-02-09 14:39:38 +01:00
|
|
|
for uri in self.VALID_URIS:
|
2018-03-24 17:24:34 +01:00
|
|
|
result = extractor.find(uri)
|
|
|
|
self.assertIsInstance(result, Extractor, uri)
|
|
|
|
|
|
|
|
for not_found in ("", "/tmp/file.ext"):
|
|
|
|
self.assertIsNone(extractor.find(not_found))
|
|
|
|
|
|
|
|
for invalid in (None, [], {}, 123, b"test:"):
|
|
|
|
with self.assertRaises(TypeError):
|
|
|
|
extractor.find(invalid)
|
|
|
|
|
|
|
|
def test_add(self):
|
|
|
|
uri = "fake:foobar"
|
|
|
|
self.assertIsNone(extractor.find(uri))
|
|
|
|
|
|
|
|
extractor.add(FakeExtractor)
|
|
|
|
self.assertIsInstance(extractor.find(uri), FakeExtractor)
|
|
|
|
|
|
|
|
def test_add_module(self):
|
|
|
|
uri = "fake:foobar"
|
|
|
|
self.assertIsNone(extractor.find(uri))
|
|
|
|
|
2019-02-08 20:08:16 +01:00
|
|
|
classes = extractor.add_module(sys.modules[__name__])
|
|
|
|
self.assertEqual(len(classes), 1)
|
|
|
|
self.assertEqual(classes[0].pattern, FakeExtractor.pattern)
|
|
|
|
self.assertEqual(classes[0], FakeExtractor)
|
2018-03-24 17:24:34 +01:00
|
|
|
self.assertIsInstance(extractor.find(uri), FakeExtractor)
|
|
|
|
|
2019-02-09 14:39:38 +01:00
|
|
|
def test_from_url(self):
|
|
|
|
for uri in self.VALID_URIS:
|
|
|
|
cls = extractor.find(uri).__class__
|
|
|
|
extr = cls.from_url(uri)
|
|
|
|
self.assertIs(type(extr), cls)
|
|
|
|
self.assertIsInstance(extr, Extractor)
|
|
|
|
|
|
|
|
for not_found in ("", "/tmp/file.ext"):
|
|
|
|
self.assertIsNone(FakeExtractor.from_url(not_found))
|
|
|
|
|
|
|
|
for invalid in (None, [], {}, 123, b"test:"):
|
|
|
|
with self.assertRaises(TypeError):
|
|
|
|
FakeExtractor.from_url(invalid)
|
|
|
|
|
2018-03-24 17:24:34 +01:00
|
|
|
def test_unique_pattern_matches(self):
|
|
|
|
test_urls = []
|
|
|
|
|
|
|
|
# collect testcase URLs
|
2023-01-08 15:35:05 +01:00
|
|
|
append = test_urls.append
|
2018-03-24 17:24:34 +01:00
|
|
|
for extr in extractor.extractors():
|
2019-02-06 17:24:44 +01:00
|
|
|
for testcase in extr._get_tests():
|
2023-01-08 15:35:05 +01:00
|
|
|
append((testcase[0], extr))
|
2018-03-24 17:24:34 +01:00
|
|
|
|
|
|
|
# iterate over all testcase URLs
|
|
|
|
for url, extr1 in test_urls:
|
|
|
|
matches = []
|
|
|
|
|
|
|
|
# ... and apply all regex patterns to each one
|
2019-02-08 20:08:16 +01:00
|
|
|
for extr2 in extractor._cache:
|
2018-03-24 17:24:34 +01:00
|
|
|
|
|
|
|
# skip DirectlinkExtractor pattern if it isn't tested
|
2020-05-02 01:15:50 +02:00
|
|
|
if extr1 != DirectlinkExtractor and \
|
|
|
|
extr2 == DirectlinkExtractor:
|
2018-03-24 17:24:34 +01:00
|
|
|
continue
|
|
|
|
|
2019-02-08 20:08:16 +01:00
|
|
|
match = extr2.pattern.match(url)
|
2018-03-24 17:24:34 +01:00
|
|
|
if match:
|
2023-01-08 15:35:05 +01:00
|
|
|
matches.append((match, extr2))
|
2018-03-24 17:24:34 +01:00
|
|
|
|
|
|
|
# fail if more or less than 1 match happened
|
|
|
|
if len(matches) > 1:
|
|
|
|
msg = "'{}' gets matched by more than one pattern:".format(url)
|
2023-01-08 15:35:05 +01:00
|
|
|
for match, extr in matches:
|
|
|
|
msg += "\n\n- {}:\n{}".format(
|
|
|
|
extr.__name__, match.re.pattern)
|
2018-03-24 17:24:34 +01:00
|
|
|
self.fail(msg)
|
|
|
|
|
2023-01-08 15:35:05 +01:00
|
|
|
elif len(matches) < 1:
|
2018-03-24 17:24:34 +01:00
|
|
|
msg = "'{}' isn't matched by any pattern".format(url)
|
|
|
|
self.fail(msg)
|
|
|
|
|
2023-01-08 15:35:05 +01:00
|
|
|
else:
|
|
|
|
self.assertIs(extr1, matches[0][1], url)
|
|
|
|
|
2023-07-28 16:58:16 +02:00
|
|
|
def test_init(self):
|
|
|
|
"""Test for exceptions in Extractor.initialize(()"""
|
|
|
|
for cls in extractor.extractors():
|
|
|
|
for test in cls._get_tests():
|
|
|
|
extr = cls.from_url(test[0])
|
|
|
|
extr.initialize()
|
|
|
|
break
|
|
|
|
|
2018-04-18 18:01:43 +02:00
|
|
|
def test_docstrings(self):
|
2023-07-28 16:58:16 +02:00
|
|
|
"""Ensure docstring uniqueness"""
|
2018-04-18 18:01:43 +02:00
|
|
|
for extr1 in extractor.extractors():
|
|
|
|
for extr2 in extractor.extractors():
|
|
|
|
if extr1 != extr2 and extr1.__doc__ and extr2.__doc__:
|
|
|
|
self.assertNotEqual(
|
|
|
|
extr1.__doc__,
|
|
|
|
extr2.__doc__,
|
|
|
|
"{} <-> {}".format(extr1, extr2),
|
|
|
|
)
|
|
|
|
|
2018-04-18 18:06:30 +02:00
|
|
|
def test_names(self):
|
|
|
|
"""Ensure extractor classes are named CategorySubcategoryExtractor"""
|
2018-08-15 20:39:13 +02:00
|
|
|
def capitalize(c):
|
|
|
|
if "-" in c:
|
|
|
|
return string.capwords(c.replace("-", " ")).replace(" ", "")
|
|
|
|
return c.capitalize()
|
|
|
|
|
2018-04-18 18:06:30 +02:00
|
|
|
for extr in extractor.extractors():
|
2021-07-10 20:47:33 +02:00
|
|
|
if extr.category not in ("", "oauth", "ytdl"):
|
2018-04-18 18:06:30 +02:00
|
|
|
expected = "{}{}Extractor".format(
|
2019-11-02 20:42:09 +01:00
|
|
|
capitalize(extr.category),
|
2018-08-15 20:39:13 +02:00
|
|
|
capitalize(extr.subcategory),
|
2018-04-18 18:06:30 +02:00
|
|
|
)
|
2019-03-18 01:11:30 +01:00
|
|
|
if expected[0].isdigit():
|
|
|
|
expected = "_" + expected
|
2018-04-18 18:06:30 +02:00
|
|
|
self.assertEqual(expected, extr.__name__)
|
|
|
|
|
2018-03-24 17:24:34 +01:00
|
|
|
|
2020-04-07 20:24:56 +02:00
|
|
|
class TestExtractorWait(unittest.TestCase):
|
|
|
|
|
|
|
|
def test_wait_seconds(self):
|
|
|
|
extr = extractor.find("test:")
|
|
|
|
seconds = 5
|
|
|
|
until = time.time() + seconds
|
|
|
|
|
|
|
|
with patch("time.sleep") as sleep, patch.object(extr, "log") as log:
|
|
|
|
extr.wait(seconds=seconds)
|
|
|
|
|
|
|
|
sleep.assert_called_once_with(6.0)
|
|
|
|
|
|
|
|
calls = log.info.mock_calls
|
|
|
|
self.assertEqual(len(calls), 1)
|
|
|
|
self._assert_isotime(calls[0][1][1], until)
|
|
|
|
|
|
|
|
def test_wait_until(self):
|
|
|
|
extr = extractor.find("test:")
|
|
|
|
until = time.time() + 5
|
|
|
|
|
|
|
|
with patch("time.sleep") as sleep, patch.object(extr, "log") as log:
|
|
|
|
extr.wait(until=until)
|
|
|
|
|
|
|
|
calls = sleep.mock_calls
|
|
|
|
self.assertEqual(len(calls), 1)
|
|
|
|
self.assertAlmostEqual(calls[0][1][0], 6.0, places=1)
|
|
|
|
|
|
|
|
calls = log.info.mock_calls
|
|
|
|
self.assertEqual(len(calls), 1)
|
|
|
|
self._assert_isotime(calls[0][1][1], until)
|
|
|
|
|
|
|
|
def test_wait_until_datetime(self):
|
|
|
|
extr = extractor.find("test:")
|
|
|
|
until = datetime.utcnow() + timedelta(seconds=5)
|
|
|
|
until_local = datetime.now() + timedelta(seconds=5)
|
|
|
|
|
|
|
|
with patch("time.sleep") as sleep, patch.object(extr, "log") as log:
|
|
|
|
extr.wait(until=until)
|
|
|
|
|
|
|
|
calls = sleep.mock_calls
|
|
|
|
self.assertEqual(len(calls), 1)
|
|
|
|
self.assertAlmostEqual(calls[0][1][0], 6.0, places=1)
|
|
|
|
|
|
|
|
calls = log.info.mock_calls
|
|
|
|
self.assertEqual(len(calls), 1)
|
|
|
|
self._assert_isotime(calls[0][1][1], until_local)
|
|
|
|
|
|
|
|
def _assert_isotime(self, output, until):
|
|
|
|
if not isinstance(until, datetime):
|
|
|
|
until = datetime.fromtimestamp(until)
|
|
|
|
o = self._isotime_to_seconds(output)
|
|
|
|
u = self._isotime_to_seconds(until.time().isoformat()[:8])
|
|
|
|
self.assertLess(o-u, 1.0)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _isotime_to_seconds(isotime):
|
|
|
|
parts = isotime.split(":")
|
|
|
|
return int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
|
|
|
|
|
|
|
|
|
2020-04-07 20:26:12 +02:00
|
|
|
class TextExtractorOAuth(unittest.TestCase):
|
|
|
|
|
|
|
|
def test_oauth1(self):
|
|
|
|
for category in ("flickr", "smugmug", "tumblr"):
|
|
|
|
extr = extractor.find("oauth:" + category)
|
|
|
|
|
|
|
|
with patch.object(extr, "_oauth1_authorization_flow") as m:
|
|
|
|
for msg in extr:
|
|
|
|
pass
|
|
|
|
self.assertEqual(len(m.mock_calls), 1)
|
|
|
|
|
|
|
|
def test_oauth2(self):
|
|
|
|
for category in ("deviantart", "reddit"):
|
|
|
|
extr = extractor.find("oauth:" + category)
|
|
|
|
|
|
|
|
with patch.object(extr, "_oauth2_authorization_code_grant") as m:
|
|
|
|
for msg in extr:
|
|
|
|
pass
|
|
|
|
self.assertEqual(len(m.mock_calls), 1)
|
|
|
|
|
|
|
|
def test_oauth2_mastodon(self):
|
|
|
|
extr = extractor.find("oauth:mastodon:pawoo.net")
|
|
|
|
|
|
|
|
with patch.object(extr, "_oauth2_authorization_code_grant") as m, \
|
|
|
|
patch.object(extr, "_register") as r:
|
|
|
|
for msg in extr:
|
|
|
|
pass
|
|
|
|
self.assertEqual(len(r.mock_calls), 0)
|
|
|
|
self.assertEqual(len(m.mock_calls), 1)
|
|
|
|
|
|
|
|
def test_oauth2_mastodon_unknown(self):
|
|
|
|
extr = extractor.find("oauth:mastodon:example.com")
|
|
|
|
|
|
|
|
with patch.object(extr, "_oauth2_authorization_code_grant") as m, \
|
|
|
|
patch.object(extr, "_register") as r:
|
|
|
|
r.return_value = {
|
|
|
|
"client-id" : "foo",
|
|
|
|
"client-secret": "bar",
|
|
|
|
}
|
|
|
|
|
|
|
|
for msg in extr:
|
|
|
|
pass
|
|
|
|
|
|
|
|
self.assertEqual(len(r.mock_calls), 1)
|
|
|
|
self.assertEqual(len(m.mock_calls), 1)
|
|
|
|
|
|
|
|
|
2018-03-24 17:24:34 +01:00
|
|
|
if __name__ == "__main__":
|
|
|
|
unittest.main()
|