mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 02:32:33 +01:00
rework and extend input file processing (#4732)
- add 2 command-line options to modify input file contents - -I/--input-file-comment - -x/--input-file-delete - implement InputManager class - move code from util.py to __init__.py (mainly to avoid import cycles)
This commit is contained in:
parent
17e710c4bf
commit
4700051562
@ -6,8 +6,6 @@
|
||||
## General Options:
|
||||
-h, --help Print this help message and exit
|
||||
--version Print program version and exit
|
||||
-i, --input-file FILE Download URLs found in FILE ('-' for stdin).
|
||||
More than one --input-file can be specified
|
||||
-f, --filename FORMAT Filename format string for downloaded files
|
||||
('/O' for "original" filenames)
|
||||
-d, --destination PATH Target location for file downloads
|
||||
@ -19,6 +17,16 @@
|
||||
--clear-cache MODULE Delete cached login sessions, cookies, etc. for
|
||||
MODULE (ALL to delete everything)
|
||||
|
||||
## Input Options:
|
||||
-i, --input-file FILE Download URLs found in FILE ('-' for stdin).
|
||||
More than one --input-file can be specified
|
||||
-I, --input-file-comment FILE
|
||||
Download URLs found in FILE. Comment them out
|
||||
after they were downloaded successfully.
|
||||
-x, --input-file-delete FILE
|
||||
Download URLs found in FILE. Delete them after
|
||||
they were downloaded successfully.
|
||||
|
||||
## Output Options:
|
||||
-q, --quiet Activate quiet mode
|
||||
-v, --verbose Print various debugging information
|
||||
|
@ -18,19 +18,6 @@ __email__ = "mike_faehrmann@web.de"
|
||||
__version__ = version.__version__
|
||||
|
||||
|
||||
def progress(urls, pformat):
|
||||
"""Wrapper around urls to output a simple progress indicator"""
|
||||
if pformat is True:
|
||||
pformat = "[{current}/{total}] {url}\n"
|
||||
else:
|
||||
pformat += "\n"
|
||||
|
||||
pinfo = {"total": len(urls)}
|
||||
for pinfo["current"], pinfo["url"] in enumerate(urls, 1):
|
||||
output.stderr_write(pformat.format_map(pinfo))
|
||||
yield pinfo["url"]
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
parser = option.build_parser()
|
||||
@ -224,7 +211,7 @@ def main():
|
||||
return config.initialize()
|
||||
|
||||
else:
|
||||
if not args.urls and not args.inputfiles:
|
||||
if not args.urls and not args.input_files:
|
||||
parser.error(
|
||||
"The following arguments are required: URL\n"
|
||||
"Use 'gallery-dl --help' to get a list of all options.")
|
||||
@ -238,22 +225,6 @@ def main():
|
||||
else:
|
||||
jobtype = args.jobtype or job.DownloadJob
|
||||
|
||||
urls = args.urls
|
||||
if args.inputfiles:
|
||||
for inputfile in args.inputfiles:
|
||||
try:
|
||||
if inputfile == "-":
|
||||
if sys.stdin:
|
||||
urls += util.parse_inputfile(sys.stdin, log)
|
||||
else:
|
||||
log.warning(
|
||||
"input file: stdin is not readable")
|
||||
else:
|
||||
with open(inputfile, encoding="utf-8") as file:
|
||||
urls += util.parse_inputfile(file, log)
|
||||
except OSError as exc:
|
||||
log.warning("input file: %s", exc)
|
||||
|
||||
# unsupported file logging handler
|
||||
handler = output.setup_logging_handler(
|
||||
"unsupportedfile", fmt="{message}")
|
||||
@ -263,25 +234,44 @@ def main():
|
||||
ulog.propagate = False
|
||||
job.Job.ulog = ulog
|
||||
|
||||
# collect input URLs
|
||||
input_manager = InputManager()
|
||||
input_manager.log = input_log = logging.getLogger("inputfile")
|
||||
input_manager.add_list(args.urls)
|
||||
|
||||
if args.input_files:
|
||||
for input_file, action in args.input_files:
|
||||
try:
|
||||
path = util.expand_path(input_file)
|
||||
input_manager.add_file(path, action)
|
||||
except Exception as exc:
|
||||
input_log.error(exc)
|
||||
return getattr(exc, "code", 128)
|
||||
|
||||
pformat = config.get(("output",), "progress", True)
|
||||
if pformat and len(urls) > 1 and args.loglevel < logging.ERROR:
|
||||
urls = progress(urls, pformat)
|
||||
else:
|
||||
urls = iter(urls)
|
||||
if pformat and len(input_manager.urls) > 1 and \
|
||||
args.loglevel < logging.ERROR:
|
||||
input_manager.progress(pformat)
|
||||
|
||||
# process input URLs
|
||||
retval = 0
|
||||
url = next(urls, None)
|
||||
|
||||
while url is not None:
|
||||
for url in input_manager:
|
||||
try:
|
||||
log.debug("Starting %s for '%s'", jobtype.__name__, url)
|
||||
if isinstance(url, util.ExtendedUrl):
|
||||
|
||||
if isinstance(url, ExtendedUrl):
|
||||
for opts in url.gconfig:
|
||||
config.set(*opts)
|
||||
with config.apply(url.lconfig):
|
||||
retval |= jobtype(url.value).run()
|
||||
status = jobtype(url.value).run()
|
||||
else:
|
||||
retval |= jobtype(url).run()
|
||||
status = jobtype(url).run()
|
||||
|
||||
if status:
|
||||
retval |= status
|
||||
else:
|
||||
input_manager.success()
|
||||
|
||||
except exception.TerminateExtraction:
|
||||
pass
|
||||
except exception.RestartExtraction:
|
||||
@ -291,8 +281,7 @@ def main():
|
||||
log.error("Unsupported URL '%s'", url)
|
||||
retval |= 64
|
||||
|
||||
url = next(urls, None)
|
||||
|
||||
input_manager.next()
|
||||
return retval
|
||||
|
||||
except KeyboardInterrupt:
|
||||
@ -304,3 +293,206 @@ def main():
|
||||
if exc.errno != errno.EPIPE:
|
||||
raise
|
||||
return 1
|
||||
|
||||
|
||||
class InputManager():
|
||||
|
||||
def __init__(self):
|
||||
self.urls = []
|
||||
self.files = ()
|
||||
self._index = 0
|
||||
self._current = None
|
||||
self._pformat = None
|
||||
|
||||
def add_url(self, url):
|
||||
self.urls.append(url)
|
||||
|
||||
def add_list(self, urls):
|
||||
self.urls += urls
|
||||
|
||||
def add_file(self, path, action=None):
|
||||
"""Process an input file.
|
||||
|
||||
Lines starting with '#' and empty lines will be ignored.
|
||||
Lines starting with '-' will be interpreted as a key-value pair
|
||||
separated by an '='. where
|
||||
'key' is a dot-separated option name and
|
||||
'value' is a JSON-parsable string.
|
||||
These configuration options will be applied
|
||||
while processing the next URL only.
|
||||
Lines starting with '-G' are the same as above, except these options
|
||||
will be applied for *all* following URLs, i.e. they are Global.
|
||||
Everything else will be used as a potential URL.
|
||||
|
||||
Example input file:
|
||||
|
||||
# settings global options
|
||||
-G base-directory = "/tmp/"
|
||||
-G skip = false
|
||||
|
||||
# setting local options for the next URL
|
||||
-filename="spaces_are_optional.jpg"
|
||||
-skip = true
|
||||
|
||||
https://example.org/
|
||||
|
||||
# next URL uses default filename and 'skip' is false.
|
||||
https://example.com/index.htm # comment1
|
||||
https://example.com/404.htm # comment2
|
||||
"""
|
||||
if path == "-" and not action:
|
||||
try:
|
||||
lines = sys.stdin.readlines()
|
||||
except Exception:
|
||||
raise exception.InputFileError("stdin is not readable")
|
||||
path = None
|
||||
else:
|
||||
try:
|
||||
with open(path, encoding="utf-8") as fp:
|
||||
lines = fp.readlines()
|
||||
except Exception as exc:
|
||||
raise exception.InputFileError(str(exc))
|
||||
|
||||
if self.files:
|
||||
self.files[path] = lines
|
||||
else:
|
||||
self.files = {path: lines}
|
||||
|
||||
if action == "c":
|
||||
action = self._action_comment
|
||||
elif action == "d":
|
||||
action = self._action_delete
|
||||
else:
|
||||
action = None
|
||||
|
||||
gconf = []
|
||||
lconf = []
|
||||
indicies = []
|
||||
strip_comment = None
|
||||
append = self.urls.append
|
||||
|
||||
for n, line in enumerate(lines):
|
||||
line = line.strip()
|
||||
|
||||
if not line or line[0] == "#":
|
||||
# empty line or comment
|
||||
continue
|
||||
|
||||
elif line[0] == "-":
|
||||
# config spec
|
||||
if len(line) >= 2 and line[1] == "G":
|
||||
conf = gconf
|
||||
line = line[2:]
|
||||
else:
|
||||
conf = lconf
|
||||
line = line[1:]
|
||||
if action:
|
||||
indicies.append(n)
|
||||
|
||||
key, sep, value = line.partition("=")
|
||||
if not sep:
|
||||
raise exception.InputFileError(
|
||||
"Invalid KEY=VALUE pair '%s' on line %s in %s",
|
||||
line, n+1, path)
|
||||
|
||||
try:
|
||||
value = util.json_loads(value.strip())
|
||||
except ValueError as exc:
|
||||
self.log.debug("%s: %s", exc.__class__.__name__, exc)
|
||||
raise exception.InputFileError(
|
||||
"Unable to parse '%s' on line %s in %s",
|
||||
value, n+1, path)
|
||||
|
||||
key = key.strip().split(".")
|
||||
conf.append((key[:-1], key[-1], value))
|
||||
|
||||
else:
|
||||
# url
|
||||
if " #" in line or "\t#" in line:
|
||||
if strip_comment is None:
|
||||
import re
|
||||
strip_comment = re.compile(r"\s+#.*").sub
|
||||
line = strip_comment("", line)
|
||||
if gconf or lconf:
|
||||
url = ExtendedUrl(line, gconf, lconf)
|
||||
gconf = []
|
||||
lconf = []
|
||||
else:
|
||||
url = line
|
||||
|
||||
if action:
|
||||
indicies.append(n)
|
||||
append((url, path, action, indicies))
|
||||
indicies = []
|
||||
else:
|
||||
append(url)
|
||||
|
||||
def progress(self, pformat=True):
|
||||
if pformat is True:
|
||||
pformat = "[{current}/{total}] {url}\n"
|
||||
else:
|
||||
pformat += "\n"
|
||||
self._pformat = pformat.format_map
|
||||
|
||||
def next(self):
|
||||
self._index += 1
|
||||
|
||||
def success(self):
|
||||
if self._current:
|
||||
url, path, action, indicies = self._current
|
||||
lines = self.files[path]
|
||||
action(lines, indicies)
|
||||
try:
|
||||
with open(path, "w", encoding="utf-8") as fp:
|
||||
fp.writelines(lines)
|
||||
except Exception as exc:
|
||||
self.log.warning(
|
||||
"Unable to update '%s' (%s: %s)",
|
||||
path, exc.__class__.__name__, exc)
|
||||
|
||||
@staticmethod
|
||||
def _action_comment(lines, indicies):
|
||||
for i in indicies:
|
||||
lines[i] = "# " + lines[i]
|
||||
|
||||
@staticmethod
|
||||
def _action_delete(lines, indicies):
|
||||
for i in indicies:
|
||||
lines[i] = ""
|
||||
|
||||
def __iter__(self):
|
||||
self._index = 0
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
try:
|
||||
item = self.urls[self._index]
|
||||
except IndexError:
|
||||
raise StopIteration
|
||||
|
||||
if isinstance(item, tuple):
|
||||
self._current = item
|
||||
item = item[0]
|
||||
else:
|
||||
self._current = None
|
||||
|
||||
if self._pformat:
|
||||
output.stderr_write(self._pformat({
|
||||
"total" : len(self.urls),
|
||||
"current": self._index + 1,
|
||||
"url" : item,
|
||||
}))
|
||||
return item
|
||||
|
||||
|
||||
class ExtendedUrl():
|
||||
"""URL with attached config key-value pairs"""
|
||||
__slots__ = ("value", "gconfig", "lconfig")
|
||||
|
||||
def __init__(self, url, gconf, lconf):
|
||||
self.value = url
|
||||
self.gconfig = gconf
|
||||
self.lconfig = lconf
|
||||
|
||||
def __str__(self):
|
||||
return self.value
|
||||
|
@ -21,6 +21,7 @@ Exception
|
||||
| +-- FilenameFormatError
|
||||
| +-- DirectoryFormatError
|
||||
+-- FilterError
|
||||
+-- InputFileError
|
||||
+-- NoExtractorError
|
||||
+-- StopExtraction
|
||||
+-- TerminateExtraction
|
||||
@ -99,6 +100,15 @@ class FilterError(GalleryDLException):
|
||||
code = 32
|
||||
|
||||
|
||||
class InputFileError(GalleryDLException):
|
||||
"""Error when parsing input file"""
|
||||
code = 32
|
||||
|
||||
def __init__(self, message, *args):
|
||||
GalleryDLException.__init__(
|
||||
self, message % args if args else message)
|
||||
|
||||
|
||||
class NoExtractorError(GalleryDLException):
|
||||
"""No extractor can handle the given URL"""
|
||||
code = 64
|
||||
|
@ -59,6 +59,12 @@ class OptionAction(argparse.Action):
|
||||
namespace.options_pp[key] = value
|
||||
|
||||
|
||||
class InputfileAction(argparse.Action):
|
||||
"""Process input files"""
|
||||
def __call__(self, parser, namespace, value, option_string=None):
|
||||
namespace.input_files.append((value, self.const))
|
||||
|
||||
|
||||
class Formatter(argparse.HelpFormatter):
|
||||
"""Custom HelpFormatter class to customize help output"""
|
||||
def __init__(self, prog):
|
||||
@ -100,12 +106,6 @@ def build_parser():
|
||||
action="version", version=version.__version__,
|
||||
help="Print program version and exit",
|
||||
)
|
||||
general.add_argument(
|
||||
"-i", "--input-file",
|
||||
dest="inputfiles", metavar="FILE", action="append",
|
||||
help=("Download URLs found in FILE ('-' for stdin). "
|
||||
"More than one --input-file can be specified"),
|
||||
)
|
||||
general.add_argument(
|
||||
"-f", "--filename",
|
||||
dest="filename", metavar="FORMAT",
|
||||
@ -149,6 +149,32 @@ def build_parser():
|
||||
"(ALL to delete everything)",
|
||||
)
|
||||
|
||||
input = parser.add_argument_group("Input Options")
|
||||
input.add_argument(
|
||||
"urls",
|
||||
metavar="URL", nargs="*",
|
||||
help=argparse.SUPPRESS,
|
||||
)
|
||||
input.add_argument(
|
||||
"-i", "--input-file",
|
||||
dest="input_files", metavar="FILE", action=InputfileAction, const=None,
|
||||
default=[],
|
||||
help=("Download URLs found in FILE ('-' for stdin). "
|
||||
"More than one --input-file can be specified"),
|
||||
)
|
||||
input.add_argument(
|
||||
"-I", "--input-file-comment",
|
||||
dest="input_files", metavar="FILE", action=InputfileAction, const="c",
|
||||
help=("Download URLs found in FILE. "
|
||||
"Comment them out after they were downloaded successfully."),
|
||||
)
|
||||
input.add_argument(
|
||||
"-x", "--input-file-delete",
|
||||
dest="input_files", metavar="FILE", action=InputfileAction, const="d",
|
||||
help=("Download URLs found in FILE. "
|
||||
"Delete them after they were downloaded successfully."),
|
||||
)
|
||||
|
||||
output = parser.add_argument_group("Output Options")
|
||||
output.add_argument(
|
||||
"-q", "--quiet",
|
||||
@ -534,10 +560,4 @@ def build_parser():
|
||||
help="Additional '<key>=<value>' post processor options",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"urls",
|
||||
metavar="URL", nargs="*",
|
||||
help=argparse.SUPPRESS,
|
||||
)
|
||||
|
||||
return parser
|
||||
|
@ -487,82 +487,6 @@ CODES = {
|
||||
}
|
||||
|
||||
|
||||
def parse_inputfile(file, log):
|
||||
"""Filter and process strings from an input file.
|
||||
|
||||
Lines starting with '#' and empty lines will be ignored.
|
||||
Lines starting with '-' will be interpreted as a key-value pair separated
|
||||
by an '='. where 'key' is a dot-separated option name and 'value' is a
|
||||
JSON-parsable value. These configuration options will be applied while
|
||||
processing the next URL.
|
||||
Lines starting with '-G' are the same as above, except these options will
|
||||
be applied for *all* following URLs, i.e. they are Global.
|
||||
Everything else will be used as a potential URL.
|
||||
|
||||
Example input file:
|
||||
|
||||
# settings global options
|
||||
-G base-directory = "/tmp/"
|
||||
-G skip = false
|
||||
|
||||
# setting local options for the next URL
|
||||
-filename="spaces_are_optional.jpg"
|
||||
-skip = true
|
||||
|
||||
https://example.org/
|
||||
|
||||
# next URL uses default filename and 'skip' is false.
|
||||
https://example.com/index.htm # comment1
|
||||
https://example.com/404.htm # comment2
|
||||
"""
|
||||
gconf = []
|
||||
lconf = []
|
||||
strip_comment = None
|
||||
|
||||
for line in file:
|
||||
line = line.strip()
|
||||
|
||||
if not line or line[0] == "#":
|
||||
# empty line or comment
|
||||
continue
|
||||
|
||||
elif line[0] == "-":
|
||||
# config spec
|
||||
if len(line) >= 2 and line[1] == "G":
|
||||
conf = gconf
|
||||
line = line[2:]
|
||||
else:
|
||||
conf = lconf
|
||||
line = line[1:]
|
||||
|
||||
key, sep, value = line.partition("=")
|
||||
if not sep:
|
||||
log.warning("input file: invalid <key>=<value> pair: %s", line)
|
||||
continue
|
||||
|
||||
try:
|
||||
value = json_loads(value.strip())
|
||||
except ValueError as exc:
|
||||
log.warning("input file: unable to parse '%s': %s", value, exc)
|
||||
continue
|
||||
|
||||
key = key.strip().split(".")
|
||||
conf.append((key[:-1], key[-1], value))
|
||||
|
||||
else:
|
||||
# url
|
||||
if " #" in line or "\t#" in line:
|
||||
if strip_comment is None:
|
||||
strip_comment = re.compile(r"\s+#.*").sub
|
||||
line = strip_comment("", line)
|
||||
if gconf or lconf:
|
||||
yield ExtendedUrl(line, gconf, lconf)
|
||||
gconf = []
|
||||
lconf = []
|
||||
else:
|
||||
yield line
|
||||
|
||||
|
||||
class CustomNone():
|
||||
"""None-style type that supports more operations than regular None"""
|
||||
__slots__ = ()
|
||||
@ -873,15 +797,6 @@ class FilterPredicate():
|
||||
raise exception.FilterError(exc)
|
||||
|
||||
|
||||
class ExtendedUrl():
|
||||
"""URL with attached config key-value pairs"""
|
||||
def __init__(self, url, gconf, lconf):
|
||||
self.value, self.gconfig, self.lconfig = url, gconf, lconf
|
||||
|
||||
def __str__(self):
|
||||
return self.value
|
||||
|
||||
|
||||
class DownloadArchive():
|
||||
|
||||
def __init__(self, path, format_string, pragma=None,
|
||||
|
Loading…
Reference in New Issue
Block a user