From b1bea8aaebde303de368a69222b577dce54c1194 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 23 Jul 2019 17:36:07 +0200 Subject: [PATCH] add 'restrict-filenames' option (#348) --- docs/configuration.rst | 18 ++++++++++++++++++ docs/gallery-dl.conf | 1 + gallery_dl/text.py | 22 ---------------------- gallery_dl/util.py | 25 +++++++++++++++++++++++-- test/test_text.py | 20 -------------------- 5 files changed, 42 insertions(+), 44 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 32a529a0..0ceca9fb 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -108,6 +108,24 @@ Description Directory path used as the base for all download destinations. =========== ===== +extractor.*.restrict-filenames +------------------------------ +=========== ===== +Type ``string`` +Default ``"auto"`` +Example ``"/!? ()[]{}"`` +Description Characters to replace with underscores (``_``) when generating + directory and file names. + + Special values: + + * ``"auto"``: Use characters from ``"unix"`` or ``"windows"`` + depending on the local operating system + * ``"unix"``: ``"/"`` + * ``"windows"``: ``"<>:\"\\|/?*"`` +=========== ===== + + extractor.*.skip ---------------- =========== ===== diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 835ed17d..2e448be4 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -9,6 +9,7 @@ "skip": true, "sleep": 0, "user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0", + "restrict-filenames": "auto", "artstation": { diff --git a/gallery_dl/text.py b/gallery_dl/text.py index 81e87b50..72dad5b1 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -83,22 +83,6 @@ def nameext_from_url(url, data=None): return data -def clean_path_windows(path): - """Remove illegal characters from a path-segment (Windows)""" - try: - return re.sub(r'[<>:"\\/|?*]', "_", path) - except TypeError: - return "" - - -def clean_path_posix(path): - """Remove illegal characters from a path-segment (Posix)""" - try: - return path.replace("/", "_") - except AttributeError: - return "" - - def extract(txt, begin, end, pos=0): """Extract the text between 'begin' and 'end' from 'txt' @@ -266,12 +250,6 @@ def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z"): return date_string -if os.name == "nt": - clean_path = clean_path_windows -else: - clean_path = clean_path_posix - - urljoin = urllib.parse.urljoin quote = urllib.parse.quote diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 14ae3d21..02d998d1 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -535,6 +535,27 @@ class PathFormat(): if os.altsep and os.altsep in self.basedirectory: self.basedirectory = self.basedirectory.replace(os.altsep, os.sep) + restrict = extractor.config("restrict-filenames", "auto") + if restrict == "auto": + restrict = "<>:\"\\/|?*" if os.name == "nt" else "/" + elif restrict == "unix": + restrict = "/" + elif restrict == "windows": + restrict = "<>:\"\\/|?*" + self.clean_path = self._build_cleanfunc(restrict) + + @staticmethod + def _build_cleanfunc(repl): + if not repl: + return lambda x: x + elif len(repl) == 1: + def func(x, r=repl): + return x.replace(r, "_") + else: + def func(x, sub=re.compile("[" + re.escape(repl) + "]").sub): + return sub("_", x) + return func + def open(self, mode="wb"): """Open file and return a corresponding file object""" return open(self.temppath, mode) @@ -551,7 +572,7 @@ class PathFormat(): """Build directory path and create it if necessary""" try: segments = [ - text.clean_path( + self.clean_path( Formatter(segment, self.kwdefault) .format_map(keywords).strip()) for segment in self.directory_fmt @@ -597,7 +618,7 @@ class PathFormat(): def build_path(self): """Use filename-keywords and directory to build a full path""" try: - self.filename = text.clean_path( + self.filename = self.clean_path( self.formatter.format_map(self.keywords)) except Exception as exc: raise exception.FormatError(exc, "filename") diff --git a/test/test_text.py b/test/test_text.py index 405acd35..6a6d83ae 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -139,26 +139,6 @@ class TestText(unittest.TestCase): for value in INVALID: self.assertEqual(f(value), empty) - def test_clean_path_windows(self, f=text.clean_path_windows): - self.assertEqual(f(""), "") - self.assertEqual(f("foo"), "foo") - self.assertEqual(f("foo/bar"), "foo_bar") - self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo_________bar") - - # invalid arguments - for value in INVALID: - self.assertEqual(f(value), "") - - def test_clean_path_posix(self, f=text.clean_path_posix): - self.assertEqual(f(""), "") - self.assertEqual(f("foo"), "foo") - self.assertEqual(f("foo/bar"), "foo_bar") - self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo<>:\"\\_|?*bar") - - # invalid arguments - for value in INVALID: - self.assertEqual(f(value), "") - def test_extract(self, f=text.extract): txt = "" self.assertEqual(f(txt, "<", ">"), ("a" , 3))