1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-10-03 22:07:11 +02:00

Save content using original filenames (#893)

Add feature from issue #892 (Save content using original filenames)

- The new feature would download posts and save the images/videos with the original filenames.
- The implementation follows the other options for the filename-pattern parameter. As filename is related to a single file and not the whole post, for sidecar nodes the filename is calculated again.
- Using this option media from one post is only grouped together by the file modification timestamp.
This commit is contained in:
Thomas 2020-12-02 11:22:55 +01:00 committed by GitHub
parent 4c02a186d3
commit a045168c79
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 32 additions and 8 deletions

View File

@ -171,6 +171,9 @@ dirname pattern. The following tokens are defined for usage with
- ``{mediaid}`` - ``{mediaid}``
Integer representation of shortcode. Integer representation of shortcode.
- ``{filename}``
Instagram's internal filename.
- ``{date_utc}`` (same as ``{date}``) - ``{date_utc}`` (same as ``{date}``)
Creation time in UTC timezone. Creation time in UTC timezone.
`strftime()-style formatting options <https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior>`__ `strftime()-style formatting options <https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior>`__

View File

@ -14,6 +14,7 @@ from hashlib import md5
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Any, Callable, IO, Iterator, List, Optional, Set, Union, cast from typing import Any, Callable, IO, Iterator, List, Optional, Set, Union, cast
from urllib.parse import urlparse
import requests import requests
import urllib3 # type: ignore import urllib3 # type: ignore
@ -22,7 +23,7 @@ from .exceptions import *
from .instaloadercontext import InstaloaderContext, RateController from .instaloadercontext import InstaloaderContext, RateController
from .nodeiterator import NodeIterator, resumable_iteration from .nodeiterator import NodeIterator, resumable_iteration
from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem,
load_structure_from_file, save_structure_to_file) load_structure_from_file, save_structure_to_file, PostSidecarNode)
def get_default_session_filename(username: str) -> str: def get_default_session_filename(username: str) -> str:
@ -101,6 +102,8 @@ class _ArbitraryItemFormatter(string.Formatter):
def get_value(self, key, args, kwargs): def get_value(self, key, args, kwargs):
"""Override to substitute {ATTRIBUTE} by attributes of our _item.""" """Override to substitute {ATTRIBUTE} by attributes of our _item."""
if key == 'filename' and isinstance(self._item, (Post, StoryItem, PostSidecarNode)):
return "{filename}"
if hasattr(self._item, key): if hasattr(self._item, key):
return getattr(self._item, key) return getattr(self._item, key)
return super().get_value(key, args, kwargs) return super().get_value(key, args, kwargs)
@ -492,7 +495,16 @@ class Instaloader:
.. versionadded:: 4.2""" .. versionadded:: 4.2"""
self.context.two_factor_login(two_factor_code) self.context.two_factor_login(two_factor_code)
def format_filename(self, item: Union[Post, StoryItem], target: Optional[Union[str, Path]] = None): @staticmethod
def __prepare_filename(filename_template: str, url: str) -> str:
"""Replace filename token inside filename_template with url's filename and assure the directories exist.
.. versionadded:: 4.6"""
filename = filename_template.replace("{filename}", os.path.splitext(os.path.basename(urlparse(url).path))[0])
os.makedirs(os.path.dirname(filename), exist_ok=True)
return filename
def format_filename(self, item: Union[Post, StoryItem, PostSidecarNode], target: Optional[Union[str, Path]] = None):
"""Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter. """Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter.
.. versionadded:: 4.1""" .. versionadded:: 4.1"""
@ -508,8 +520,8 @@ class Instaloader:
""" """
dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target) dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target)
filename = os.path.join(dirname, self.format_filename(post, target=target)) filename_template = os.path.join(dirname, self.format_filename(post, target=target))
os.makedirs(os.path.dirname(filename), exist_ok=True) filename = self.__prepare_filename(filename_template, post.url)
# Download the image(s) / video thumbnail and videos within sidecars if desired # Download the image(s) / video thumbnail and videos within sidecars if desired
downloaded = True downloaded = True
@ -517,13 +529,21 @@ class Instaloader:
if self.download_pictures or self.download_videos: if self.download_pictures or self.download_videos:
for edge_number, sidecar_node in enumerate(post.get_sidecar_nodes(), start=1): for edge_number, sidecar_node in enumerate(post.get_sidecar_nodes(), start=1):
if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails): if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails):
suffix = str(edge_number)
if '{filename}' in self.filename_pattern:
suffix = ''
filename = self.__prepare_filename(filename_template, sidecar_node.display_url)
# Download sidecar picture or video thumbnail (--no-pictures implies --no-video-thumbnails) # Download sidecar picture or video thumbnail (--no-pictures implies --no-video-thumbnails)
downloaded &= self.download_pic(filename=filename, url=sidecar_node.display_url, downloaded &= self.download_pic(filename=filename, url=sidecar_node.display_url,
mtime=post.date_local, filename_suffix=str(edge_number)) mtime=post.date_local, filename_suffix=suffix)
if sidecar_node.is_video and self.download_videos: if sidecar_node.is_video and self.download_videos:
suffix = str(edge_number)
if '{filename}' in self.filename_pattern:
suffix = ''
filename = self.__prepare_filename(filename_template, sidecar_node.video_url)
# Download sidecar video if desired # Download sidecar video if desired
downloaded &= self.download_pic(filename=filename, url=sidecar_node.video_url, downloaded &= self.download_pic(filename=filename, url=sidecar_node.video_url,
mtime=post.date_local, filename_suffix=str(edge_number)) mtime=post.date_local, filename_suffix=suffix)
elif post.typename == 'GraphImage': elif post.typename == 'GraphImage':
# Download picture # Download picture
if self.download_pictures: if self.download_pictures:
@ -638,13 +658,14 @@ class Instaloader:
date_local = item.date_local date_local = item.date_local
dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target) dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target)
filename = os.path.join(dirname, self.format_filename(item, target=target)) filename_template = os.path.join(dirname, self.format_filename(item, target=target))
os.makedirs(os.path.dirname(filename), exist_ok=True) filename = self.__prepare_filename(filename_template, item.url)
downloaded = False downloaded = False
if not item.is_video or self.download_video_thumbnails is True: if not item.is_video or self.download_video_thumbnails is True:
url = item.url url = item.url
downloaded = self.download_pic(filename=filename, url=url, mtime=date_local) downloaded = self.download_pic(filename=filename, url=url, mtime=date_local)
if item.is_video and self.download_videos is True: if item.is_video and self.download_videos is True:
filename = self.__prepare_filename(filename_template, str(item.video_url))
downloaded |= self.download_pic(filename=filename, url=item.video_url, mtime=date_local) downloaded |= self.download_pic(filename=filename, url=item.video_url, mtime=date_local)
# Save caption if desired # Save caption if desired
metadata_string = _ArbitraryItemFormatter(item).format(self.storyitem_metadata_txt_pattern).strip() metadata_string = _ArbitraryItemFormatter(item).format(self.storyitem_metadata_txt_pattern).strip()