From 8124c16a5056e0506f92648da6b63e7d9c639df8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 8 Nov 2022 17:01:10 +0100 Subject: [PATCH] split 'build_path' from 'set_filename' and 'set_extension' Do not automatically build a new path when setting file metadata or updating its extension. --- gallery_dl/downloader/http.py | 10 ++++--- gallery_dl/downloader/ytdl.py | 2 ++ gallery_dl/job.py | 16 ++++++----- gallery_dl/path.py | 41 +++++++++++++---------------- gallery_dl/postprocessor/compare.py | 5 ++-- gallery_dl/postprocessor/ugoira.py | 13 ++++++--- test/test_downloader.py | 1 + test/test_postprocessor.py | 3 +++ 8 files changed, 53 insertions(+), 38 deletions(-) diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 8e112dc6..fb824361 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -186,16 +186,19 @@ class HttpDownloader(DownloaderBase): size, self.maxsize) return False + build_path = False + # set missing filename extension from MIME type if not pathfmt.extension: pathfmt.set_extension(self._find_extension(response)) - if pathfmt.exists(): - pathfmt.temppath = "" - return True + build_path = True # set metadata from HTTP headers if self.metadata: kwdict[self.metadata] = util.extract_headers(response) + build_path = True + + if build_path: pathfmt.build_path() if pathfmt.exists(): pathfmt.temppath = "" @@ -328,6 +331,7 @@ class HttpDownloader(DownloaderBase): for ext, check in SIGNATURE_CHECKS.items(): if check(file_header): pathfmt.set_extension(ext) + pathfmt.build_path() return True return False diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index efa957b4..c44ea0a5 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -98,6 +98,7 @@ class YoutubeDLDownloader(DownloaderBase): pathfmt.realdirectory + filename) else: pathfmt.set_extension(info_dict["ext"]) + pathfmt.build_path() if pathfmt.exists(): pathfmt.temppath = "" @@ -118,6 +119,7 @@ class YoutubeDLDownloader(DownloaderBase): def _download_playlist(self, ytdl_instance, pathfmt, info_dict): pathfmt.set_extension("%(playlist_index)s.%(ext)s") + pathfmt.build_path() self._set_outtmpl(ytdl_instance, pathfmt.realpath) for entry in info_dict["entries"]: diff --git a/gallery_dl/job.py b/gallery_dl/job.py index c03a7c90..2d8ab4ef 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -232,11 +232,14 @@ class DownloadJob(Job): self.handle_skip() return - if pathfmt.exists(): - if archive: - archive.add(kwdict) - self.handle_skip() - return + if pathfmt.extension and not self.metadata_http: + pathfmt.build_path() + + if pathfmt.exists(): + if archive: + archive.add(kwdict) + self.handle_skip() + return if self.sleep: self.extractor.sleep(self.sleep(), "download") @@ -536,12 +539,11 @@ class SimulationJob(DownloadJob): def handle_url(self, url, kwdict): if not kwdict["extension"]: kwdict["extension"] = "jpg" - self.pathfmt.set_filename(kwdict) if self.sleep: self.extractor.sleep(self.sleep(), "download") if self.archive: self.archive.add(kwdict) - self.out.skip(self.pathfmt.path) + self.out.skip(self.pathfmt.build_filename(kwdict)) def handle_directory(self, kwdict): if not self.pathfmt: diff --git a/gallery_dl/path.py b/gallery_dl/path.py index 28c07c3c..efcacf6d 100644 --- a/gallery_dl/path.py +++ b/gallery_dl/path.py @@ -15,16 +15,16 @@ import functools from . import util, formatter, exception WINDOWS = util.WINDOWS +EXTENSION_MAP = { + "jpeg": "jpg", + "jpe" : "jpg", + "jfif": "jpg", + "jif" : "jpg", + "jfi" : "jpg", +} class PathFormat(): - EXTENSION_MAP = { - "jpeg": "jpg", - "jpe" : "jpg", - "jfif": "jpg", - "jif" : "jpg", - "jfi" : "jpg", - } def __init__(self, extractor): config = extractor.config @@ -78,7 +78,7 @@ class PathFormat(): extension_map = config("extension-map") if extension_map is None: - extension_map = self.EXTENSION_MAP + extension_map = EXTENSION_MAP self.extension_map = extension_map.get restrict = config("path-restrict", "auto") @@ -161,12 +161,14 @@ class PathFormat(): num = 1 try: while True: - self.prefix = str(num) + "." - self.set_extension(self.extension, False) + prefix = format(num) + "." + self.kwdict["extension"] = prefix + self.extension + self.build_path() os.stat(self.realpath) # raises OSError if file doesn't exist num += 1 except OSError: pass + self.prefix = prefix return False def set_directory(self, kwdict): @@ -198,28 +200,21 @@ class PathFormat(): def set_filename(self, kwdict): """Set general filename data""" self.kwdict = kwdict - self.temppath = self.prefix = "" + self.filename = self.temppath = self.prefix = "" ext = kwdict["extension"] kwdict["extension"] = self.extension = self.extension_map(ext, ext) - if self.extension: - self.build_path() - else: - self.filename = "" - def set_extension(self, extension, real=True): """Set filename extension""" - extension = self.extension_map(extension, extension) - if real: - self.extension = extension + self.extension = extension = self.extension_map(extension, extension) self.kwdict["extension"] = self.prefix + extension - self.build_path() def fix_extension(self, _=None): """Fix filenames without a given filename extension""" if not self.extension: - self.set_extension("", False) + self.kwdict["extension"] = self.prefix + self.extension_map("", "") + self.build_path() if self.path[-1] == ".": self.path = self.path[:-1] self.temppath = self.realpath = self.realpath[:-1] @@ -296,7 +291,9 @@ class PathFormat(): if self.extension: self.temppath += ".part" else: - self.set_extension("part", False) + self.kwdict["extension"] = self.prefix + self.extension_map( + "part", "part") + self.build_path() if part_directory: self.temppath = os.path.join( part_directory, diff --git a/gallery_dl/postprocessor/compare.py b/gallery_dl/postprocessor/compare.py index b3b94f7a..910e1d77 100644 --- a/gallery_dl/postprocessor/compare.py +++ b/gallery_dl/postprocessor/compare.py @@ -51,8 +51,9 @@ class ComparePP(PostProcessor): num = 1 try: while not self._compare(pathfmt.realpath, pathfmt.temppath): - pathfmt.prefix = str(num) + "." - pathfmt.set_extension(pathfmt.extension, False) + pathfmt.prefix = prefix = format(num) + "." + pathfmt.kwdict["extension"] = prefix + pathfmt.extension + pathfmt.build_path() num += 1 return self._equal(pathfmt) except OSError: diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py index 98c82468..9d2cb34e 100644 --- a/gallery_dl/postprocessor/ugoira.py +++ b/gallery_dl/postprocessor/ugoira.py @@ -90,15 +90,17 @@ class UgoiraPP(PostProcessor): if pathfmt.extension != "zip": return - if "frames" in pathfmt.kwdict: - self._frames = pathfmt.kwdict["frames"] - elif "pixiv_ugoira_frame_data" in pathfmt.kwdict: - self._frames = pathfmt.kwdict["pixiv_ugoira_frame_data"]["data"] + kwdict = pathfmt.kwdict + if "frames" in kwdict: + self._frames = kwdict["frames"] + elif "pixiv_ugoira_frame_data" in kwdict: + self._frames = kwdict["pixiv_ugoira_frame_data"]["data"] else: return if self.delete: pathfmt.set_extension(self.extension) + pathfmt.build_path() def convert(self, pathfmt): if not self._frames: @@ -115,6 +117,8 @@ class UgoiraPP(PostProcessor): # process frames and collect command-line arguments pathfmt.set_extension(self.extension) + pathfmt.build_path() + args = self._process(pathfmt, tempdir) if self.args: args += self.args @@ -151,6 +155,7 @@ class UgoiraPP(PostProcessor): pathfmt.delete = True else: pathfmt.set_extension("zip") + pathfmt.build_path() def _exec(self, args): self.log.debug(args) diff --git a/test/test_downloader.py b/test/test_downloader.py index 9d460c33..ae826a0f 100644 --- a/test/test_downloader.py +++ b/test/test_downloader.py @@ -131,6 +131,7 @@ class TestDownloaderBase(unittest.TestCase): pathfmt = cls.job.pathfmt pathfmt.set_directory(kwdict) pathfmt.set_filename(kwdict) + pathfmt.build_path() if content: mode = "w" + ("b" if isinstance(content, bytes) else "") diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index ba37ee08..7da2089d 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -97,6 +97,7 @@ class BasePostprocessorTest(unittest.TestCase): self.pathfmt = self.job.pathfmt self.pathfmt.set_directory(kwdict) self.pathfmt.set_filename(kwdict) + self.pathfmt.build_path() pp = postprocessor.find(self.__class__.__name__[:-4].lower()) return pp(self.job, options) @@ -118,6 +119,7 @@ class ClassifyTest(BasePostprocessorTest): for ext in exts }) self.pathfmt.set_extension("jpg") + self.pathfmt.build_path() pp.prepare(self.pathfmt) path = os.path.join(self.dir.name, "test", "Pictures") @@ -150,6 +152,7 @@ class ClassifyTest(BasePostprocessorTest): "bar": "foo/bar", }) self.pathfmt.set_extension("foo") + self.pathfmt.build_path() pp.prepare(self.pathfmt) path = os.path.join(self.dir.name, "test", "foo", "bar")