mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-23 03:02:50 +01:00
2501adeda0
to reuse its code in create_test_data.py later rename to 'pyprint' since 'pprint' is already used by stdlib module
248 lines
6.4 KiB
Python
Executable File
248 lines
6.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright 2023 Mike Fährmann
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
# published by the Free Software Foundation.
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import itertools
|
|
import collections
|
|
|
|
import util
|
|
from pyprint import pyprint
|
|
from gallery_dl import extractor
|
|
|
|
|
|
FORMAT = '''\
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
# published by the Free Software Foundation.
|
|
|
|
{imports}
|
|
|
|
|
|
__tests__ = (
|
|
{tests}\
|
|
)
|
|
'''
|
|
|
|
|
|
def extract_tests_from_source(lines):
|
|
tests = {}
|
|
|
|
match_url = re.compile(
|
|
r''' (?:test = | )?\(\(?"([^"]+)"(.*)''').match
|
|
match_end = re.compile(
|
|
r" (\}\)| \}\),)\n$").match
|
|
first = 0
|
|
url = ""
|
|
|
|
for index, line in enumerate(lines):
|
|
if first and match_end(line):
|
|
tests[url] = lines[first-1:index+1]
|
|
first = 0
|
|
|
|
elif (m := match_url(line)):
|
|
offset = index
|
|
while not m[2]:
|
|
offset += 1
|
|
next = lines[offset]
|
|
line = line[:-2] + next[next.index('"')+1:]
|
|
m = match_url(line)
|
|
url = m[1]
|
|
if m[2] in (",)", "),"):
|
|
tests[url] = lines[index-1:index+1]
|
|
first = 0
|
|
else:
|
|
first = index
|
|
|
|
return tests
|
|
|
|
|
|
def get_test_source(extr, *, cache={}):
|
|
try:
|
|
tests = cache[extr.__module__]
|
|
except KeyError:
|
|
path = sys.modules[extr.__module__].__file__
|
|
with open(path) as fp:
|
|
lines = fp.readlines()
|
|
tests = cache[extr.__module__] = extract_tests_from_source(lines)
|
|
return tests.get(extr.url) or ("",)
|
|
return tests[extr.url]
|
|
|
|
|
|
def comment_from_source(source):
|
|
match = re.match(r"\s+#\s*(.+)", source[0])
|
|
return match[1] if match else ""
|
|
|
|
|
|
def build_test(extr, data):
|
|
source = get_test_source(extr)
|
|
comment = comment_from_source(source)
|
|
|
|
head = {
|
|
"#url" : extr.url,
|
|
"#comment" : comment.replace('"', "'"),
|
|
"#category": (extr.basecategory,
|
|
extr.category,
|
|
extr.subcategory),
|
|
"#class" : extr.__class__,
|
|
}
|
|
|
|
if not comment:
|
|
del head["#comment"]
|
|
|
|
instr = {}
|
|
|
|
if not data:
|
|
data = {}
|
|
if (options := data.pop("options", None)):
|
|
instr["#options"] = {
|
|
name: value
|
|
for name, value in options
|
|
}
|
|
if (pattern := data.pop("pattern", None)):
|
|
if pattern in PATTERNS:
|
|
cls = PATTERNS[pattern]
|
|
pattern = f"lit:{pyprint(cls)}.pattern"
|
|
instr["#pattern"] = pattern
|
|
if (exception := data.pop("exception", None)):
|
|
instr["#exception"] = exception
|
|
if (range := data.pop("range", None)):
|
|
instr["#range"] = range
|
|
if (count := data.pop("count", None)) is not None:
|
|
instr["#count"] = count
|
|
if (archive := data.pop("archive", None)) is not None:
|
|
instr["#archive"] = archive
|
|
if (extractor := data.pop("extractor", None)) is not None:
|
|
instr["#extractor"] = extractor
|
|
if (url := data.pop("url", None)):
|
|
instr["#sha1_url"] = url
|
|
if (metadata := data.pop("keyword", None)):
|
|
if isinstance(metadata, str) and len(metadata) == 40:
|
|
instr["#sha1_metadata"] = metadata
|
|
metadata = {}
|
|
if (content := data.pop("content", None)):
|
|
if isinstance(content, tuple):
|
|
content = list(content)
|
|
instr["#sha1_content"] = content
|
|
|
|
if data:
|
|
print(extr)
|
|
for k in data:
|
|
print(k)
|
|
exit()
|
|
|
|
return head, instr, metadata
|
|
|
|
|
|
def collect_patterns():
|
|
return {
|
|
cls.pattern.pattern: cls
|
|
for cls in extractor._list_classes()
|
|
}
|
|
|
|
|
|
def collect_tests(whitelist=None):
|
|
tests = collections.defaultdict(list)
|
|
|
|
for cls in extractor._list_classes():
|
|
for url, data in cls._get_tests():
|
|
|
|
extr = cls.from_url(url)
|
|
if whitelist and extr.category not in whitelist:
|
|
continue
|
|
test = build_test(extr, data)
|
|
tests[extr.category].append(test)
|
|
|
|
return tests
|
|
|
|
|
|
def export_tests(data):
|
|
imports = {}
|
|
tests = []
|
|
|
|
for head, instr, metadata in data:
|
|
|
|
for v in itertools.chain(
|
|
head.values(),
|
|
instr.values() if instr else (),
|
|
metadata.values() if metadata else (),
|
|
):
|
|
if not isinstance(v, type) or v.__module__ == "builtins":
|
|
continue
|
|
|
|
module, _, name = v.__module__.rpartition(".")
|
|
if name[0].isdecimal():
|
|
stmt = f'''\
|
|
{module.partition(".")[0]} = __import__("{v.__module__}")
|
|
_{name} = getattr({module}, "{name}")'''
|
|
elif module:
|
|
stmt = f"from {module} import {name}"
|
|
else:
|
|
stmt = f"import {name}"
|
|
imports[v.__module__] = stmt
|
|
|
|
test = pyprint(head)
|
|
if instr:
|
|
test = f"{test[:-2]}{pyprint(instr)[1:]}"
|
|
if metadata:
|
|
for k, v in metadata.items():
|
|
if v == "type:datetime":
|
|
imports["datetime"] = "import datetime"
|
|
metadata[k] = "lit:datetime.datetime"
|
|
test = f"{test[:-1]}{pyprint(metadata, lmin=0)[1:]}"
|
|
|
|
tests.append(f"{test},\n\n")
|
|
|
|
return FORMAT.format(
|
|
imports="\n".join(imports.values()),
|
|
tests="".join(tests),
|
|
)
|
|
|
|
|
|
PATTERNS = None
|
|
DIRECTORY = "/tmp/_/results"
|
|
|
|
|
|
def main():
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
"-t", "--target",
|
|
help="target directory",
|
|
)
|
|
parser.add_argument(
|
|
"-c", "--category", action="append",
|
|
help="extractor categories to export",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not args.target:
|
|
args.target = os.path.join(
|
|
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
|
"test", "results",
|
|
)
|
|
|
|
global PATTERNS
|
|
PATTERNS = collect_patterns()
|
|
|
|
os.makedirs(args.target, exist_ok=True)
|
|
for name, tests in collect_tests(args.category).items():
|
|
name = name.replace(".", "")
|
|
with util.lazy(f"{args.target}/{name}.py") as file:
|
|
file.write(export_tests(tests))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|