2018-03-25 15:10:25 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2022-11-01 18:48:35 +01:00
|
|
|
# Copyright 2018-2022 Mike Fährmann
|
2018-03-25 15:10:25 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2020-05-02 01:15:50 +02:00
|
|
|
import os
|
2019-06-20 16:59:44 +02:00
|
|
|
import sys
|
2020-05-02 01:15:50 +02:00
|
|
|
import unittest
|
|
|
|
from unittest.mock import Mock, MagicMock, patch
|
|
|
|
|
|
|
|
import re
|
2020-05-18 01:35:53 +02:00
|
|
|
import logging
|
2018-03-25 15:10:25 +02:00
|
|
|
import os.path
|
2022-11-01 18:48:35 +01:00
|
|
|
import binascii
|
2018-03-25 15:10:25 +02:00
|
|
|
import tempfile
|
|
|
|
import threading
|
|
|
|
import http.server
|
|
|
|
|
2020-05-18 01:35:53 +02:00
|
|
|
|
2020-05-02 01:15:50 +02:00
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
2021-09-27 21:17:44 +02:00
|
|
|
from gallery_dl import downloader, extractor, output, config, path # noqa E402
|
2022-11-01 18:48:35 +01:00
|
|
|
from gallery_dl.downloader.http import MIME_TYPES, SIGNATURE_CHECKS # noqa E402
|
2018-03-25 15:10:25 +02:00
|
|
|
|
|
|
|
|
2019-06-20 16:59:44 +02:00
|
|
|
class MockDownloaderModule(Mock):
|
|
|
|
__downloader__ = "mock"
|
|
|
|
|
|
|
|
|
2020-05-18 01:35:53 +02:00
|
|
|
class FakeJob():
|
|
|
|
|
|
|
|
def __init__(self):
|
2024-02-27 01:37:57 +01:00
|
|
|
self.extractor = extractor.find("generic:https://example.org/")
|
2023-07-25 20:09:44 +02:00
|
|
|
self.extractor.initialize()
|
2021-09-27 21:17:44 +02:00
|
|
|
self.pathfmt = path.PathFormat(self.extractor)
|
2020-05-18 01:35:53 +02:00
|
|
|
self.out = output.NullOutput()
|
|
|
|
self.get_logger = logging.getLogger
|
|
|
|
|
|
|
|
|
2019-06-20 16:59:44 +02:00
|
|
|
class TestDownloaderModule(unittest.TestCase):
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def setUpClass(cls):
|
|
|
|
# allow import of ytdl downloader module without youtube_dl installed
|
2024-08-31 09:05:07 +02:00
|
|
|
cls._orig_ytdl = sys.modules.get("youtube_dl")
|
2019-06-20 16:59:44 +02:00
|
|
|
sys.modules["youtube_dl"] = MagicMock()
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def tearDownClass(cls):
|
2024-08-31 09:05:07 +02:00
|
|
|
if cls._orig_ytdl:
|
|
|
|
sys.modules["youtube_dl"] = cls._orig_ytdl
|
|
|
|
else:
|
|
|
|
del sys.modules["youtube_dl"]
|
2019-06-20 16:59:44 +02:00
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
downloader._cache.clear()
|
|
|
|
|
|
|
|
def test_find(self):
|
|
|
|
cls = downloader.find("http")
|
|
|
|
self.assertEqual(cls.__name__, "HttpDownloader")
|
|
|
|
self.assertEqual(cls.scheme , "http")
|
|
|
|
|
|
|
|
cls = downloader.find("https")
|
|
|
|
self.assertEqual(cls.__name__, "HttpDownloader")
|
|
|
|
self.assertEqual(cls.scheme , "http")
|
|
|
|
|
|
|
|
cls = downloader.find("text")
|
|
|
|
self.assertEqual(cls.__name__, "TextDownloader")
|
|
|
|
self.assertEqual(cls.scheme , "text")
|
|
|
|
|
|
|
|
cls = downloader.find("ytdl")
|
|
|
|
self.assertEqual(cls.__name__, "YoutubeDLDownloader")
|
|
|
|
self.assertEqual(cls.scheme , "ytdl")
|
|
|
|
|
|
|
|
self.assertEqual(downloader.find("ftp"), None)
|
|
|
|
self.assertEqual(downloader.find("foo"), None)
|
|
|
|
self.assertEqual(downloader.find(1234) , None)
|
|
|
|
self.assertEqual(downloader.find(None) , None)
|
|
|
|
|
2021-03-01 01:25:46 +01:00
|
|
|
@patch("builtins.__import__")
|
2019-06-20 16:59:44 +02:00
|
|
|
def test_cache(self, import_module):
|
|
|
|
import_module.return_value = MockDownloaderModule()
|
|
|
|
downloader.find("http")
|
|
|
|
downloader.find("text")
|
|
|
|
downloader.find("ytdl")
|
|
|
|
self.assertEqual(import_module.call_count, 3)
|
|
|
|
downloader.find("http")
|
|
|
|
downloader.find("text")
|
|
|
|
downloader.find("ytdl")
|
|
|
|
self.assertEqual(import_module.call_count, 3)
|
|
|
|
|
2021-03-01 01:25:46 +01:00
|
|
|
@patch("builtins.__import__")
|
2019-06-20 16:59:44 +02:00
|
|
|
def test_cache_http(self, import_module):
|
|
|
|
import_module.return_value = MockDownloaderModule()
|
|
|
|
downloader.find("http")
|
|
|
|
downloader.find("https")
|
|
|
|
self.assertEqual(import_module.call_count, 1)
|
|
|
|
|
2021-03-01 01:25:46 +01:00
|
|
|
@patch("builtins.__import__")
|
2019-06-20 16:59:44 +02:00
|
|
|
def test_cache_https(self, import_module):
|
|
|
|
import_module.return_value = MockDownloaderModule()
|
|
|
|
downloader.find("https")
|
|
|
|
downloader.find("http")
|
|
|
|
self.assertEqual(import_module.call_count, 1)
|
|
|
|
|
|
|
|
|
2018-03-25 15:10:25 +02:00
|
|
|
class TestDownloaderBase(unittest.TestCase):
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def setUpClass(cls):
|
|
|
|
cls.dir = tempfile.TemporaryDirectory()
|
|
|
|
cls.fnum = 0
|
2019-11-23 23:50:16 +01:00
|
|
|
config.set((), "base-directory", cls.dir.name)
|
2020-05-18 01:35:53 +02:00
|
|
|
cls.job = FakeJob()
|
2018-03-25 15:10:25 +02:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def tearDownClass(cls):
|
|
|
|
cls.dir.cleanup()
|
|
|
|
config.clear()
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def _prepare_destination(cls, content=None, part=True, extension=None):
|
|
|
|
name = "file-{}".format(cls.fnum)
|
|
|
|
cls.fnum += 1
|
|
|
|
|
|
|
|
kwdict = {
|
2020-05-18 01:35:53 +02:00
|
|
|
"category" : "test",
|
2018-03-25 15:10:25 +02:00
|
|
|
"subcategory": "test",
|
2020-05-18 01:35:53 +02:00
|
|
|
"filename" : name,
|
|
|
|
"extension" : extension,
|
2018-03-25 15:10:25 +02:00
|
|
|
}
|
2020-05-18 01:35:53 +02:00
|
|
|
|
|
|
|
pathfmt = cls.job.pathfmt
|
2018-03-25 15:10:25 +02:00
|
|
|
pathfmt.set_directory(kwdict)
|
2019-08-12 21:40:37 +02:00
|
|
|
pathfmt.set_filename(kwdict)
|
2022-11-08 17:01:10 +01:00
|
|
|
pathfmt.build_path()
|
2018-03-25 15:10:25 +02:00
|
|
|
|
|
|
|
if content:
|
|
|
|
mode = "w" + ("b" if isinstance(content, bytes) else "")
|
2024-06-14 01:22:00 +02:00
|
|
|
with pathfmt.open(mode) as fp:
|
|
|
|
fp.write(content)
|
2018-03-25 15:10:25 +02:00
|
|
|
|
|
|
|
return pathfmt
|
|
|
|
|
|
|
|
def _run_test(self, url, input, output,
|
|
|
|
extension, expected_extension=None):
|
|
|
|
pathfmt = self._prepare_destination(input, extension=extension)
|
|
|
|
success = self.downloader.download(url, pathfmt)
|
|
|
|
|
|
|
|
# test successful download
|
|
|
|
self.assertTrue(success, "downloading '{}' failed".format(url))
|
|
|
|
|
|
|
|
# test content
|
|
|
|
mode = "r" + ("b" if isinstance(output, bytes) else "")
|
2024-06-14 01:22:00 +02:00
|
|
|
with pathfmt.open(mode) as fp:
|
|
|
|
content = fp.read()
|
2018-03-25 15:10:25 +02:00
|
|
|
self.assertEqual(content, output)
|
|
|
|
|
|
|
|
# test filename extension
|
|
|
|
self.assertEqual(
|
2019-08-12 21:40:37 +02:00
|
|
|
pathfmt.extension,
|
2018-06-07 22:27:36 +02:00
|
|
|
expected_extension,
|
2022-11-16 22:59:18 +01:00
|
|
|
content[0:16],
|
2018-06-07 22:27:36 +02:00
|
|
|
)
|
|
|
|
self.assertEqual(
|
|
|
|
os.path.splitext(pathfmt.realpath)[1][1:],
|
2018-03-25 15:10:25 +02:00
|
|
|
expected_extension,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
class TestHTTPDownloader(TestDownloaderBase):
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def setUpClass(cls):
|
|
|
|
TestDownloaderBase.setUpClass()
|
2020-05-18 01:35:53 +02:00
|
|
|
cls.downloader = downloader.find("http")(cls.job)
|
2018-03-25 15:10:25 +02:00
|
|
|
|
2024-05-02 22:54:15 +02:00
|
|
|
host = "127.0.0.1"
|
|
|
|
port = 0 # select random not-in-use port
|
|
|
|
|
|
|
|
try:
|
|
|
|
server = http.server.HTTPServer((host, port), HttpRequestHandler)
|
|
|
|
except OSError as exc:
|
|
|
|
raise unittest.SkipTest(
|
|
|
|
"cannot spawn local HTTP server ({})".format(exc))
|
|
|
|
|
|
|
|
host, port = server.server_address
|
|
|
|
cls.address = "http://{}:{}".format(host, port)
|
2018-03-25 15:10:25 +02:00
|
|
|
threading.Thread(target=server.serve_forever, daemon=True).start()
|
|
|
|
|
2022-11-01 18:48:35 +01:00
|
|
|
def _run_test(self, ext, input, output,
|
|
|
|
extension, expected_extension=None):
|
|
|
|
TestDownloaderBase._run_test(
|
2022-11-16 22:59:18 +01:00
|
|
|
self, self.address + "/" + ext, input, output,
|
2022-11-01 18:48:35 +01:00
|
|
|
extension, expected_extension)
|
|
|
|
|
2020-09-01 22:05:17 +02:00
|
|
|
def tearDown(self):
|
|
|
|
self.downloader.minsize = self.downloader.maxsize = None
|
|
|
|
|
2018-03-25 15:10:25 +02:00
|
|
|
def test_http_download(self):
|
2022-11-01 18:48:35 +01:00
|
|
|
self._run_test("jpg", None, DATA["jpg"], "jpg", "jpg")
|
|
|
|
self._run_test("png", None, DATA["png"], "png", "png")
|
|
|
|
self._run_test("gif", None, DATA["gif"], "gif", "gif")
|
2018-03-25 15:10:25 +02:00
|
|
|
|
|
|
|
def test_http_offset(self):
|
2022-11-01 18:48:35 +01:00
|
|
|
self._run_test("jpg", DATA["jpg"][:123], DATA["jpg"], "jpg", "jpg")
|
|
|
|
self._run_test("png", DATA["png"][:12] , DATA["png"], "png", "png")
|
|
|
|
self._run_test("gif", DATA["gif"][:1] , DATA["gif"], "gif", "gif")
|
2018-03-25 15:10:25 +02:00
|
|
|
|
|
|
|
def test_http_extension(self):
|
2022-11-01 18:48:35 +01:00
|
|
|
self._run_test("jpg", None, DATA["jpg"], None, "jpg")
|
|
|
|
self._run_test("png", None, DATA["png"], None, "png")
|
|
|
|
self._run_test("gif", None, DATA["gif"], None, "gif")
|
2018-03-25 15:10:25 +02:00
|
|
|
|
|
|
|
def test_http_adjust_extension(self):
|
2022-11-01 18:48:35 +01:00
|
|
|
self._run_test("jpg", None, DATA["jpg"], "png", "jpg")
|
|
|
|
self._run_test("png", None, DATA["png"], "gif", "png")
|
|
|
|
self._run_test("gif", None, DATA["gif"], "jpg", "gif")
|
2018-03-25 15:10:25 +02:00
|
|
|
|
2020-09-01 22:05:17 +02:00
|
|
|
def test_http_filesize_min(self):
|
2022-11-16 22:59:18 +01:00
|
|
|
url = self.address + "/gif"
|
2020-09-01 22:05:17 +02:00
|
|
|
pathfmt = self._prepare_destination(None, extension=None)
|
|
|
|
self.downloader.minsize = 100
|
|
|
|
with self.assertLogs(self.downloader.log, "WARNING"):
|
2022-11-01 18:48:35 +01:00
|
|
|
success = self.downloader.download(url, pathfmt)
|
2023-11-17 15:56:00 +01:00
|
|
|
self.assertTrue(success)
|
|
|
|
self.assertEqual(pathfmt.temppath, "")
|
2020-09-01 22:05:17 +02:00
|
|
|
|
|
|
|
def test_http_filesize_max(self):
|
2022-11-16 22:59:18 +01:00
|
|
|
url = self.address + "/jpg"
|
2020-09-01 22:05:17 +02:00
|
|
|
pathfmt = self._prepare_destination(None, extension=None)
|
|
|
|
self.downloader.maxsize = 100
|
|
|
|
with self.assertLogs(self.downloader.log, "WARNING"):
|
2022-11-01 18:48:35 +01:00
|
|
|
success = self.downloader.download(url, pathfmt)
|
2023-11-17 15:56:00 +01:00
|
|
|
self.assertTrue(success)
|
|
|
|
self.assertEqual(pathfmt.temppath, "")
|
2020-09-01 22:05:17 +02:00
|
|
|
|
2018-03-25 15:10:25 +02:00
|
|
|
|
|
|
|
class TestTextDownloader(TestDownloaderBase):
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def setUpClass(cls):
|
|
|
|
TestDownloaderBase.setUpClass()
|
2020-05-18 01:35:53 +02:00
|
|
|
cls.downloader = downloader.find("text")(cls.job)
|
2018-03-25 15:10:25 +02:00
|
|
|
|
|
|
|
def test_text_download(self):
|
|
|
|
self._run_test("text:foobar", None, "foobar", "txt", "txt")
|
|
|
|
|
|
|
|
def test_text_offset(self):
|
|
|
|
self._run_test("text:foobar", "foo", "foobar", "txt", "txt")
|
|
|
|
|
|
|
|
def test_text_empty(self):
|
|
|
|
self._run_test("text:", None, "", "txt", "txt")
|
|
|
|
|
|
|
|
|
|
|
|
class HttpRequestHandler(http.server.BaseHTTPRequestHandler):
|
|
|
|
|
|
|
|
def do_GET(self):
|
2022-11-16 22:59:18 +01:00
|
|
|
try:
|
|
|
|
output = DATA[self.path[1:]]
|
|
|
|
except KeyError:
|
2018-03-25 15:10:25 +02:00
|
|
|
self.send_response(404)
|
|
|
|
self.wfile.write(self.path.encode())
|
|
|
|
return
|
|
|
|
|
2022-11-16 22:59:18 +01:00
|
|
|
headers = {"Content-Length": len(output)}
|
2018-03-25 15:10:25 +02:00
|
|
|
|
|
|
|
if "Range" in self.headers:
|
|
|
|
status = 206
|
|
|
|
|
|
|
|
match = re.match(r"bytes=(\d+)-", self.headers["Range"])
|
|
|
|
start = int(match.group(1))
|
|
|
|
|
|
|
|
headers["Content-Range"] = "bytes {}-{}/{}".format(
|
|
|
|
start, len(output)-1, len(output))
|
|
|
|
output = output[start:]
|
|
|
|
else:
|
|
|
|
status = 200
|
|
|
|
|
|
|
|
self.send_response(status)
|
|
|
|
for key, value in headers.items():
|
|
|
|
self.send_header(key, value)
|
|
|
|
self.end_headers()
|
|
|
|
self.wfile.write(output)
|
|
|
|
|
|
|
|
|
2022-11-16 22:59:18 +01:00
|
|
|
SAMPLES = {
|
|
|
|
("jpg" , binascii.a2b_base64(
|
2022-11-01 18:48:35 +01:00
|
|
|
"/9j/4AAQSkZJRgABAQEASABIAAD/2wBDAAEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB"
|
|
|
|
"AQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/2wBDAQEB"
|
|
|
|
"AQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB"
|
|
|
|
"AQEBAQEBAQEBAQEBAQH/wAARCAABAAEDAREAAhEBAxEB/8QAFAABAAAAAAAAAAAA"
|
|
|
|
"AAAAAAAACv/EABQQAQAAAAAAAAAAAAAAAAAAAAD/xAAUAQEAAAAAAAAAAAAAAAAA"
|
2022-11-16 22:59:18 +01:00
|
|
|
"AAAA/8QAFBEBAAAAAAAAAAAAAAAAAAAAAP/aAAwDAQACEQMRAD8AfwD/2Q==")),
|
|
|
|
("png" , binascii.a2b_base64(
|
2022-11-01 18:48:35 +01:00
|
|
|
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAAAAAA6fptVAAAACklEQVQIHWP4DwAB"
|
2022-11-16 22:59:18 +01:00
|
|
|
"AQEANl9ngAAAAABJRU5ErkJggg==")),
|
|
|
|
("gif" , binascii.a2b_base64(
|
|
|
|
"R0lGODdhAQABAIAAAP///////ywAAAAAAQABAAACAkQBADs=")),
|
|
|
|
("bmp" , b"BM"),
|
|
|
|
("webp", b"RIFF????WEBP"),
|
|
|
|
("avif", b"????ftypavif"),
|
|
|
|
("avif", b"????ftypavis"),
|
2023-04-15 17:09:22 +02:00
|
|
|
("heic", b"????ftypheic"),
|
|
|
|
("heic", b"????ftypheim"),
|
|
|
|
("heic", b"????ftypheis"),
|
|
|
|
("heic", b"????ftypheix"),
|
2022-11-16 22:59:18 +01:00
|
|
|
("svg" , b"<?xml"),
|
|
|
|
("ico" , b"\x00\x00\x01\x00"),
|
|
|
|
("cur" , b"\x00\x00\x02\x00"),
|
|
|
|
("psd" , b"8BPS"),
|
|
|
|
("mp4" , b"????ftypmp4"),
|
|
|
|
("mp4" , b"????ftypavc1"),
|
|
|
|
("mp4" , b"????ftypiso3"),
|
2024-04-25 01:01:35 +02:00
|
|
|
("m4v" , b"????ftypM4V"),
|
2024-03-06 14:00:24 +01:00
|
|
|
("mov" , b"????ftypqt "),
|
2022-11-16 22:59:18 +01:00
|
|
|
("webm", b"\x1A\x45\xDF\xA3"),
|
|
|
|
("ogg" , b"OggS"),
|
|
|
|
("wav" , b"RIFF????WAVE"),
|
|
|
|
("mp3" , b"ID3"),
|
|
|
|
("mp3" , b"\xFF\xFB"),
|
|
|
|
("mp3" , b"\xFF\xF3"),
|
|
|
|
("mp3" , b"\xFF\xF2"),
|
|
|
|
("zip" , b"PK\x03\x04"),
|
|
|
|
("zip" , b"PK\x05\x06"),
|
|
|
|
("zip" , b"PK\x07\x08"),
|
|
|
|
("rar" , b"Rar!\x1A\x07"),
|
|
|
|
("rar" , b"\x52\x61\x72\x21\x1A\x07"),
|
|
|
|
("7z" , b"\x37\x7A\xBC\xAF\x27\x1C"),
|
|
|
|
("pdf" , b"%PDF-"),
|
|
|
|
("swf" , b"FWS"),
|
|
|
|
("swf" , b"CWS"),
|
2023-01-15 16:40:55 +01:00
|
|
|
("blend", b"BLENDER-v303RENDH"),
|
|
|
|
("obj" , b"# Blender v3.2.0 OBJ File: 'foo.blend'"),
|
|
|
|
("clip", b"CSFCHUNK\x00\x00\x00\x00"),
|
2022-11-01 18:48:35 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-11-16 22:59:18 +01:00
|
|
|
DATA = {}
|
|
|
|
|
|
|
|
for ext, content in SAMPLES:
|
|
|
|
if ext not in DATA:
|
|
|
|
DATA[ext] = content
|
|
|
|
|
|
|
|
for idx, (_, content) in enumerate(SAMPLES):
|
|
|
|
DATA["S{:>02}".format(idx)] = content
|
|
|
|
|
|
|
|
|
2022-11-01 18:48:35 +01:00
|
|
|
# reverse mime types mapping
|
|
|
|
MIME_TYPES = {
|
|
|
|
ext: mtype
|
|
|
|
for mtype, ext in MIME_TYPES.items()
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def generate_tests():
|
2022-11-16 22:59:18 +01:00
|
|
|
def generate_test(idx, ext, content):
|
2022-11-01 18:48:35 +01:00
|
|
|
def test(self):
|
2022-11-16 22:59:18 +01:00
|
|
|
self._run_test("S{:>02}".format(idx), None, content, "bin", ext)
|
|
|
|
test.__name__ = "test_http_ext_{:>02}_{}".format(idx, ext)
|
2022-11-01 18:48:35 +01:00
|
|
|
return test
|
|
|
|
|
2022-11-16 22:59:18 +01:00
|
|
|
for idx, (ext, content) in enumerate(SAMPLES):
|
|
|
|
test = generate_test(idx, ext, content)
|
2022-11-01 18:48:35 +01:00
|
|
|
setattr(TestHTTPDownloader, test.__name__, test)
|
|
|
|
|
|
|
|
|
|
|
|
generate_tests()
|
2018-03-25 15:10:25 +02:00
|
|
|
if __name__ == "__main__":
|
|
|
|
unittest.main()
|