diff --git a/docs/configuration.rst b/docs/configuration.rst index 244a9ba2..37179f67 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -111,13 +111,19 @@ extractor.*.skip =========== ===== Type ``bool`` or ``string`` Default ``true`` -Description Controls the behavior when downloading a file whose filename +Description Controls the behavior when downloading files whose filename already exists. - * ``true``: Skip the download - * ``false``: Overwrite the already existing file + * ``true``: Skip downloads + * ``false``: Overwrite already existing files + * ``"abort"``: Abort the current extractor run + * ``"abort:N"``: Skip downloads and abort extractor run + after ``N`` consecutive skips + * ``"exit"``: Exit the program altogether + * ``"exit:N"``: Skip downloads and exit the program + after ``N`` consecutive skips =========== ===== diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 6017f47a..12f1e5c7 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -12,7 +12,7 @@ import json import hashlib import logging from . import extractor, downloader, postprocessor -from . import config, util, output, exception +from . import config, text, util, output, exception from .extractor.message import Message @@ -183,7 +183,7 @@ class DownloadJob(Job): self.pathfmt.set_keywords(keywords) if self.pathfmt.exists(self.archive): - self.out.skip(self.pathfmt.path) + self.handle_skip() return if self.sleep: @@ -204,7 +204,7 @@ class DownloadJob(Job): return if not self.pathfmt.temppath: - self.out.skip(self.pathfmt.path) + self.handle_skip() return # run post processors @@ -217,6 +217,7 @@ class DownloadJob(Job): self.out.success(self.pathfmt.path, 0) if self.archive: self.archive.add(keywords) + self._skipcnt = 0 def handle_urllist(self, urls, keywords): """Download the resource specified in 'url'""" @@ -241,6 +242,13 @@ class DownloadJob(Job): for pp in self.postprocessors: pp.finalize() + def handle_skip(self): + self.out.skip(self.pathfmt.path) + if self._skipexc: + self._skipcnt += 1 + if self._skipcnt >= self._skipmax: + raise self._skipexc() + def download(self, url): """Download 'url'""" scheme = url.partition(":")[0] @@ -272,6 +280,20 @@ class DownloadJob(Job): self.pathfmt = util.PathFormat(self.extractor) self.sleep = self.extractor.config("sleep") + skip = self.extractor.config("skip", True) + if skip: + self._skipexc = None + if isinstance(skip, str): + skip, _, smax = skip.partition(":") + if skip == "abort": + self._skipexc = exception.StopExtraction + elif skip == "exit": + self._skipexc = sys.exit + self._skipcnt = 0 + self._skipmax = text.parse_int(smax) + else: + self.pathfmt.exists = lambda x=None: False + archive = self.extractor.config("archive") if archive: path = util.expand_path(archive) diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 89934967..8185dbaa 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -452,17 +452,6 @@ class PathFormat(): if os.altsep: self.basedirectory = self.basedirectory.replace(os.altsep, os.sep) - skip = extractor.config("skip", True) - if skip: - if skip == "abort": - self._skipexc = exception.StopExtraction - elif skip == "exit": - self._skipexc = sys.exit - else: - self._skipexc = None - else: - self.exists = lambda x=None: False - def open(self, mode="wb"): """Open file and return a corresponding file object""" return open(self.temppath, mode) @@ -471,9 +460,8 @@ class PathFormat(): """Return True if the file exists on disk or in 'archive'""" if (archive and archive.check(self.keywords) or self.has_extension and os.path.exists(self.realpath)): - if self._skipexc: - raise self._skipexc() if not self.has_extension: + # adjust display name self.set_extension("") if self.path[-1] == ".": self.path = self.path[:-1]