diff --git a/docs/options.md b/docs/options.md index 2486cbfe..548b5868 100644 --- a/docs/options.md +++ b/docs/options.md @@ -6,8 +6,6 @@ ## General Options: -h, --help Print this help message and exit --version Print program version and exit - -i, --input-file FILE Download URLs found in FILE ('-' for stdin). - More than one --input-file can be specified -f, --filename FORMAT Filename format string for downloaded files ('/O' for "original" filenames) -d, --destination PATH Target location for file downloads @@ -19,6 +17,16 @@ --clear-cache MODULE Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything) +## Input Options: + -i, --input-file FILE Download URLs found in FILE ('-' for stdin). + More than one --input-file can be specified + -I, --input-file-comment FILE + Download URLs found in FILE. Comment them out + after they were downloaded successfully. + -x, --input-file-delete FILE + Download URLs found in FILE. Delete them after + they were downloaded successfully. + ## Output Options: -q, --quiet Activate quiet mode -v, --verbose Print various debugging information diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index d3a0f588..1d64fefc 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -18,19 +18,6 @@ __email__ = "mike_faehrmann@web.de" __version__ = version.__version__ -def progress(urls, pformat): - """Wrapper around urls to output a simple progress indicator""" - if pformat is True: - pformat = "[{current}/{total}] {url}\n" - else: - pformat += "\n" - - pinfo = {"total": len(urls)} - for pinfo["current"], pinfo["url"] in enumerate(urls, 1): - output.stderr_write(pformat.format_map(pinfo)) - yield pinfo["url"] - - def main(): try: parser = option.build_parser() @@ -224,7 +211,7 @@ def main(): return config.initialize() else: - if not args.urls and not args.inputfiles: + if not args.urls and not args.input_files: parser.error( "The following arguments are required: URL\n" "Use 'gallery-dl --help' to get a list of all options.") @@ -238,22 +225,6 @@ def main(): else: jobtype = args.jobtype or job.DownloadJob - urls = args.urls - if args.inputfiles: - for inputfile in args.inputfiles: - try: - if inputfile == "-": - if sys.stdin: - urls += util.parse_inputfile(sys.stdin, log) - else: - log.warning( - "input file: stdin is not readable") - else: - with open(inputfile, encoding="utf-8") as file: - urls += util.parse_inputfile(file, log) - except OSError as exc: - log.warning("input file: %s", exc) - # unsupported file logging handler handler = output.setup_logging_handler( "unsupportedfile", fmt="{message}") @@ -263,25 +234,44 @@ def main(): ulog.propagate = False job.Job.ulog = ulog + # collect input URLs + input_manager = InputManager() + input_manager.log = input_log = logging.getLogger("inputfile") + input_manager.add_list(args.urls) + + if args.input_files: + for input_file, action in args.input_files: + try: + path = util.expand_path(input_file) + input_manager.add_file(path, action) + except Exception as exc: + input_log.error(exc) + return getattr(exc, "code", 128) + pformat = config.get(("output",), "progress", True) - if pformat and len(urls) > 1 and args.loglevel < logging.ERROR: - urls = progress(urls, pformat) - else: - urls = iter(urls) + if pformat and len(input_manager.urls) > 1 and \ + args.loglevel < logging.ERROR: + input_manager.progress(pformat) + # process input URLs retval = 0 - url = next(urls, None) - - while url is not None: + for url in input_manager: try: log.debug("Starting %s for '%s'", jobtype.__name__, url) - if isinstance(url, util.ExtendedUrl): + + if isinstance(url, ExtendedUrl): for opts in url.gconfig: config.set(*opts) with config.apply(url.lconfig): - retval |= jobtype(url.value).run() + status = jobtype(url.value).run() else: - retval |= jobtype(url).run() + status = jobtype(url).run() + + if status: + retval |= status + else: + input_manager.success() + except exception.TerminateExtraction: pass except exception.RestartExtraction: @@ -291,8 +281,7 @@ def main(): log.error("Unsupported URL '%s'", url) retval |= 64 - url = next(urls, None) - + input_manager.next() return retval except KeyboardInterrupt: @@ -304,3 +293,206 @@ def main(): if exc.errno != errno.EPIPE: raise return 1 + + +class InputManager(): + + def __init__(self): + self.urls = [] + self.files = () + self._index = 0 + self._current = None + self._pformat = None + + def add_url(self, url): + self.urls.append(url) + + def add_list(self, urls): + self.urls += urls + + def add_file(self, path, action=None): + """Process an input file. + + Lines starting with '#' and empty lines will be ignored. + Lines starting with '-' will be interpreted as a key-value pair + separated by an '='. where + 'key' is a dot-separated option name and + 'value' is a JSON-parsable string. + These configuration options will be applied + while processing the next URL only. + Lines starting with '-G' are the same as above, except these options + will be applied for *all* following URLs, i.e. they are Global. + Everything else will be used as a potential URL. + + Example input file: + + # settings global options + -G base-directory = "/tmp/" + -G skip = false + + # setting local options for the next URL + -filename="spaces_are_optional.jpg" + -skip = true + + https://example.org/ + + # next URL uses default filename and 'skip' is false. + https://example.com/index.htm # comment1 + https://example.com/404.htm # comment2 + """ + if path == "-" and not action: + try: + lines = sys.stdin.readlines() + except Exception: + raise exception.InputFileError("stdin is not readable") + path = None + else: + try: + with open(path, encoding="utf-8") as fp: + lines = fp.readlines() + except Exception as exc: + raise exception.InputFileError(str(exc)) + + if self.files: + self.files[path] = lines + else: + self.files = {path: lines} + + if action == "c": + action = self._action_comment + elif action == "d": + action = self._action_delete + else: + action = None + + gconf = [] + lconf = [] + indicies = [] + strip_comment = None + append = self.urls.append + + for n, line in enumerate(lines): + line = line.strip() + + if not line or line[0] == "#": + # empty line or comment + continue + + elif line[0] == "-": + # config spec + if len(line) >= 2 and line[1] == "G": + conf = gconf + line = line[2:] + else: + conf = lconf + line = line[1:] + if action: + indicies.append(n) + + key, sep, value = line.partition("=") + if not sep: + raise exception.InputFileError( + "Invalid KEY=VALUE pair '%s' on line %s in %s", + line, n+1, path) + + try: + value = util.json_loads(value.strip()) + except ValueError as exc: + self.log.debug("%s: %s", exc.__class__.__name__, exc) + raise exception.InputFileError( + "Unable to parse '%s' on line %s in %s", + value, n+1, path) + + key = key.strip().split(".") + conf.append((key[:-1], key[-1], value)) + + else: + # url + if " #" in line or "\t#" in line: + if strip_comment is None: + import re + strip_comment = re.compile(r"\s+#.*").sub + line = strip_comment("", line) + if gconf or lconf: + url = ExtendedUrl(line, gconf, lconf) + gconf = [] + lconf = [] + else: + url = line + + if action: + indicies.append(n) + append((url, path, action, indicies)) + indicies = [] + else: + append(url) + + def progress(self, pformat=True): + if pformat is True: + pformat = "[{current}/{total}] {url}\n" + else: + pformat += "\n" + self._pformat = pformat.format_map + + def next(self): + self._index += 1 + + def success(self): + if self._current: + url, path, action, indicies = self._current + lines = self.files[path] + action(lines, indicies) + try: + with open(path, "w", encoding="utf-8") as fp: + fp.writelines(lines) + except Exception as exc: + self.log.warning( + "Unable to update '%s' (%s: %s)", + path, exc.__class__.__name__, exc) + + @staticmethod + def _action_comment(lines, indicies): + for i in indicies: + lines[i] = "# " + lines[i] + + @staticmethod + def _action_delete(lines, indicies): + for i in indicies: + lines[i] = "" + + def __iter__(self): + self._index = 0 + return self + + def __next__(self): + try: + item = self.urls[self._index] + except IndexError: + raise StopIteration + + if isinstance(item, tuple): + self._current = item + item = item[0] + else: + self._current = None + + if self._pformat: + output.stderr_write(self._pformat({ + "total" : len(self.urls), + "current": self._index + 1, + "url" : item, + })) + return item + + +class ExtendedUrl(): + """URL with attached config key-value pairs""" + __slots__ = ("value", "gconfig", "lconfig") + + def __init__(self, url, gconf, lconf): + self.value = url + self.gconfig = gconf + self.lconfig = lconf + + def __str__(self): + return self.value diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py index ef190f26..ee183fcc 100644 --- a/gallery_dl/exception.py +++ b/gallery_dl/exception.py @@ -21,6 +21,7 @@ Exception | +-- FilenameFormatError | +-- DirectoryFormatError +-- FilterError + +-- InputFileError +-- NoExtractorError +-- StopExtraction +-- TerminateExtraction @@ -99,6 +100,15 @@ class FilterError(GalleryDLException): code = 32 +class InputFileError(GalleryDLException): + """Error when parsing input file""" + code = 32 + + def __init__(self, message, *args): + GalleryDLException.__init__( + self, message % args if args else message) + + class NoExtractorError(GalleryDLException): """No extractor can handle the given URL""" code = 64 diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 1982b71d..2c15eecd 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -59,6 +59,12 @@ class OptionAction(argparse.Action): namespace.options_pp[key] = value +class InputfileAction(argparse.Action): + """Process input files""" + def __call__(self, parser, namespace, value, option_string=None): + namespace.input_files.append((value, self.const)) + + class Formatter(argparse.HelpFormatter): """Custom HelpFormatter class to customize help output""" def __init__(self, prog): @@ -100,12 +106,6 @@ def build_parser(): action="version", version=version.__version__, help="Print program version and exit", ) - general.add_argument( - "-i", "--input-file", - dest="inputfiles", metavar="FILE", action="append", - help=("Download URLs found in FILE ('-' for stdin). " - "More than one --input-file can be specified"), - ) general.add_argument( "-f", "--filename", dest="filename", metavar="FORMAT", @@ -149,6 +149,32 @@ def build_parser(): "(ALL to delete everything)", ) + input = parser.add_argument_group("Input Options") + input.add_argument( + "urls", + metavar="URL", nargs="*", + help=argparse.SUPPRESS, + ) + input.add_argument( + "-i", "--input-file", + dest="input_files", metavar="FILE", action=InputfileAction, const=None, + default=[], + help=("Download URLs found in FILE ('-' for stdin). " + "More than one --input-file can be specified"), + ) + input.add_argument( + "-I", "--input-file-comment", + dest="input_files", metavar="FILE", action=InputfileAction, const="c", + help=("Download URLs found in FILE. " + "Comment them out after they were downloaded successfully."), + ) + input.add_argument( + "-x", "--input-file-delete", + dest="input_files", metavar="FILE", action=InputfileAction, const="d", + help=("Download URLs found in FILE. " + "Delete them after they were downloaded successfully."), + ) + output = parser.add_argument_group("Output Options") output.add_argument( "-q", "--quiet", @@ -534,10 +560,4 @@ def build_parser(): help="Additional '=' post processor options", ) - parser.add_argument( - "urls", - metavar="URL", nargs="*", - help=argparse.SUPPRESS, - ) - return parser diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 6255d49e..62aa12da 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -487,82 +487,6 @@ CODES = { } -def parse_inputfile(file, log): - """Filter and process strings from an input file. - - Lines starting with '#' and empty lines will be ignored. - Lines starting with '-' will be interpreted as a key-value pair separated - by an '='. where 'key' is a dot-separated option name and 'value' is a - JSON-parsable value. These configuration options will be applied while - processing the next URL. - Lines starting with '-G' are the same as above, except these options will - be applied for *all* following URLs, i.e. they are Global. - Everything else will be used as a potential URL. - - Example input file: - - # settings global options - -G base-directory = "/tmp/" - -G skip = false - - # setting local options for the next URL - -filename="spaces_are_optional.jpg" - -skip = true - - https://example.org/ - - # next URL uses default filename and 'skip' is false. - https://example.com/index.htm # comment1 - https://example.com/404.htm # comment2 - """ - gconf = [] - lconf = [] - strip_comment = None - - for line in file: - line = line.strip() - - if not line or line[0] == "#": - # empty line or comment - continue - - elif line[0] == "-": - # config spec - if len(line) >= 2 and line[1] == "G": - conf = gconf - line = line[2:] - else: - conf = lconf - line = line[1:] - - key, sep, value = line.partition("=") - if not sep: - log.warning("input file: invalid = pair: %s", line) - continue - - try: - value = json_loads(value.strip()) - except ValueError as exc: - log.warning("input file: unable to parse '%s': %s", value, exc) - continue - - key = key.strip().split(".") - conf.append((key[:-1], key[-1], value)) - - else: - # url - if " #" in line or "\t#" in line: - if strip_comment is None: - strip_comment = re.compile(r"\s+#.*").sub - line = strip_comment("", line) - if gconf or lconf: - yield ExtendedUrl(line, gconf, lconf) - gconf = [] - lconf = [] - else: - yield line - - class CustomNone(): """None-style type that supports more operations than regular None""" __slots__ = () @@ -873,15 +797,6 @@ class FilterPredicate(): raise exception.FilterError(exc) -class ExtendedUrl(): - """URL with attached config key-value pairs""" - def __init__(self, url, gconf, lconf): - self.value, self.gconfig, self.lconfig = url, gconf, lconf - - def __str__(self): - return self.value - - class DownloadArchive(): def __init__(self, path, format_string, pragma=None,