1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-07-07 03:40:06 +02:00

Add --sanitize-paths option (#1452)

This commit is contained in:
canh 2022-03-17 15:27:36 +01:00 committed by GitHub
parent 37a93ee59a
commit 604b107586
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 48 additions and 17 deletions

View File

@ -239,6 +239,13 @@ How to Download
.. versionadded:: 4.8
.. option:: --sanitize-paths
Force sanitization of paths so that the resulting file and directory names
are valid on both Windows and Unix.
.. versionadded:: 4.9
.. option:: --resume-prefix prefix
For many targets, Instaloader is capable of resuming a previously-aborted

View File

@ -383,6 +383,9 @@ def main():
g_how.add_argument('--resume-prefix', metavar='PREFIX',
help='Prefix for filenames that are used to save the information to resume an interrupted '
'download.')
g_how.add_argument('--sanitize-paths', action='store_true',
help='Sanitize paths so that the resulting file and directory names are valid on both '
'Windows and Unix.')
g_how.add_argument('--no-resume', action='store_true',
help='Do not resume a previously-aborted download iteration, and do not save such information '
'when interrupted.')
@ -463,7 +466,8 @@ def main():
slide=args.slide,
fatal_status_codes=args.abort_on,
iphone_support=not args.no_iphone,
title_pattern=args.title_pattern)
title_pattern=args.title_pattern,
sanitize_paths=args.sanitize_paths)
_main(loader,
args.profile,
username=args.login.lower() if args.login is not None else None,

View File

@ -137,24 +137,38 @@ class _ArbitraryItemFormatter(string.Formatter):
class _PostPathFormatter(_ArbitraryItemFormatter):
RESERVED: set = {'CON', 'PRN', 'AUX', 'NUL',
'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'}
def __init__(self, item: Any, force_windows_path: bool = False):
super().__init__(item)
self.force_windows_path = force_windows_path
def get_value(self, key, args, kwargs):
ret = super().get_value(key, args, kwargs)
if not isinstance(ret, str):
return ret
return self.sanitize_path(ret)
return self.sanitize_path(ret, self.force_windows_path)
@staticmethod
def sanitize_path(ret: str) -> str:
def sanitize_path(ret: str, force_windows_path: bool = False) -> str:
"""Replaces '/' with similar looking Division Slash and some other illegal filename characters on Windows."""
ret = ret.replace('/', '\u2215')
if ret.startswith('.'):
ret = ret.replace('.', '\u2024', 1)
if platform.system() == 'Windows':
if force_windows_path or platform.system() == 'Windows':
ret = ret.replace(':', '\uff1a').replace('<', '\ufe64').replace('>', '\ufe65').replace('\"', '\uff02')
ret = ret.replace('\\', '\ufe68').replace('|', '\uff5c').replace('?', '\ufe16').replace('*', '\uff0a')
ret = ret.replace('\n', ' ').replace('\r', ' ')
root, ext = os.path.splitext(ret)
if root.upper() in _PostPathFormatter.RESERVED:
root += '_'
if ext == '.':
ext = '\u2024'
ret = root + ext
return ret
@ -187,6 +201,7 @@ class Instaloader:
:param slide: :option:`--slide`
:param fatal_status_codes: :option:`--abort-on`
:param iphone_support: not :option:`--no-iphone`
:param sanitize_paths: :option:`--sanitize-paths`
.. attribute:: context
@ -216,7 +231,8 @@ class Instaloader:
slide: Optional[str] = None,
fatal_status_codes: Optional[List[int]] = None,
iphone_support: bool = True,
title_pattern: Optional[str] = None):
title_pattern: Optional[str] = None,
sanitize_paths: bool = False):
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts,
request_timeout, rate_controller, fatal_status_codes,
@ -233,6 +249,7 @@ class Instaloader:
self.title_pattern = '{date_utc}_UTC_{typename}'
else:
self.title_pattern = '{target}_{date_utc}_UTC_{typename}'
self.sanitize_paths = sanitize_paths
self.download_pictures = download_pictures
self.download_videos = download_videos
self.download_video_thumbnails = download_video_thumbnails
@ -296,7 +313,8 @@ class Instaloader:
check_resume_bbd=self.check_resume_bbd,
slide=self.slide,
fatal_status_codes=self.context.fatal_status_codes,
iphone_support=self.context.iphone_support)
iphone_support=self.context.iphone_support,
sanitize_paths=self.sanitize_paths)
yield new_loader
self.context.error_log.extend(new_loader.context.error_log)
new_loader.context.error_log = [] # avoid double-printing of errors
@ -506,9 +524,10 @@ class Instaloader:
pic_bytes = http_response.content
ig_filename = url.split('/')[-1].split('?')[0]
pic_data = TitlePic(owner_profile, target, name_suffix, ig_filename, date_object)
dirname = _PostPathFormatter(pic_data).format(self.dirname_pattern, target=target)
filename_template = os.path.join(dirname,
_PostPathFormatter(pic_data).format(self.title_pattern, target=target))
dirname = _PostPathFormatter(pic_data, self.sanitize_paths).format(self.dirname_pattern, target=target)
filename_template = os.path.join(
dirname,
_PostPathFormatter(pic_data, self.sanitize_paths).format(self.title_pattern, target=target))
filename = self.__prepare_filename(filename_template, lambda: url) + ".jpg"
content_length = http_response.headers.get('Content-Length', None)
if os.path.isfile(filename) and (not self.context.is_logged_in or
@ -633,7 +652,7 @@ class Instaloader:
"""Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter.
.. versionadded:: 4.1"""
return _PostPathFormatter(item).format(self.filename_pattern, target=target)
return _PostPathFormatter(item, self.sanitize_paths).format(self.filename_pattern, target=target)
def download_post(self, post: Post, target: Union[str, Path]) -> bool:
"""
@ -665,7 +684,7 @@ class Instaloader:
return False
return True
dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target)
dirname = _PostPathFormatter(post, self.sanitize_paths).format(self.dirname_pattern, target=target)
filename_template = os.path.join(dirname, self.format_filename(post, target=target))
filename = self.__prepare_filename(filename_template, lambda: post.url)
@ -846,7 +865,7 @@ class Instaloader:
return True
date_local = item.date_local
dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target)
dirname = _PostPathFormatter(item, self.sanitize_paths).format(self.dirname_pattern, target=target)
filename_template = os.path.join(dirname, self.format_filename(item, target=target))
filename = self.__prepare_filename(filename_template, lambda: item.url)
downloaded = False
@ -914,8 +933,9 @@ class Instaloader:
name = user_highlight.owner_username
highlight_target = (filename_target
if filename_target
else (Path(_PostPathFormatter.sanitize_path(name)) /
_PostPathFormatter.sanitize_path(user_highlight.title))) # type: Union[str, Path]
else (Path(_PostPathFormatter.sanitize_path(name, self.sanitize_paths)) /
_PostPathFormatter.sanitize_path(user_highlight.title,
self.sanitize_paths))) # type: Union[str, Path]
self.context.log("Retrieving highlights \"{}\" from profile {}".format(user_highlight.title, name))
self.download_highlight_cover(user_highlight, highlight_target)
totalcount = user_highlight.itemcount
@ -965,7 +985,7 @@ class Instaloader:
else total_count)
sanitized_target = target
if isinstance(target, str):
sanitized_target = _PostPathFormatter.sanitize_path(target)
sanitized_target = _PostPathFormatter.sanitize_path(target, self.sanitize_paths)
if takewhile is None:
takewhile = lambda _: True
with resumable_iteration(
@ -1209,8 +1229,8 @@ class Instaloader:
tagged_posts = profile.get_tagged_posts()
self.posts_download_loop(tagged_posts,
target if target
else (Path(_PostPathFormatter.sanitize_path(profile.username)) /
_PostPathFormatter.sanitize_path(':tagged')),
else (Path(_PostPathFormatter.sanitize_path(profile.username, self.sanitize_paths)) /
_PostPathFormatter.sanitize_path(':tagged', self.sanitize_paths)),
fast_update, post_filter, takewhile=posts_takewhile)
if latest_stamps is not None and tagged_posts.first_item is not None:
latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local)