1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-10-03 22:07:11 +02:00

Add option --title-pattern for profile pics and covers (#1127)

This commit is contained in:
Eduardo Kalinowski 2021-05-15 11:25:36 -03:00 committed by GitHub
parent a2715cbe03
commit 5345470ebf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 113 additions and 22 deletions

View File

@ -149,27 +149,37 @@ target. The default is ``--dirname-pattern={target}``. In the dirname
pattern, the token ``{target}`` is replaced by the target name, and
``{profile}`` is replaced by the owner of the post which is downloaded.
:option:`--filename-pattern` configures the path of the post's files relative
:option:`--filename-pattern` configures the path of the post and story's files relative
to the target directory that is specified with :option:`--dirname-pattern`.
The default is ``--filename-pattern={date_utc}_UTC``.
The tokens ``{target}`` and ``{profile}`` are replaced like in the
dirname pattern. The following tokens are defined for usage with
:option:`--filename-pattern`:
dirname pattern.
:option:`--title-pattern` is similar to :option:`--filename-pattern`, but for profile
pics, hashtag profile pics, and highlight covers. The default is
``{date_utc}_UTC_{typename}`` if :option:`--dirname-pattern` contains ``{target}`` or
``{profile}``, or ``{target}_{date_utc}_UTC_{typename}`` if it does not. Some tokens
are not supported for this option, see below for details.
The following tokens are defined for usage with
:option:`--filename-pattern` and :option:`--title-pattern`:
- ``{target}``
Target name (as given in Instaloader command line)
- ``{profile}`` (same as ``{owner_username}``)
Owner of the Post / StoryItem.
Owner of the Post / StoryItem / ProfilePic. For hashtag profile pics and
highlight covers, equivalent to ``{target}``.
- ``{owner_id}``
Unique integer ID of owner profile.
Unique integer ID of owner profile. For hashtag profile pics, equivalent to
``{target}``.
- ``{shortcode}``
Shortcode (identifier string).
Shortcode (identifier string). Not available for :option:`--title-pattern`.
- ``{mediaid}``
Integer representation of shortcode.
Integer representation of shortcode. Not available for :option:`--title-pattern`.
- ``{filename}``
Instagram's internal filename.
@ -182,6 +192,10 @@ dirname pattern. The following tokens are defined for usage with
{date_utc:%Y-%m-%d_%H-%M-%S}
- ``{typename}``
Type of media being saved, such as GraphImage, GraphStoryVideo, profile_pic,
etc.
For example, encode the poster's profile name in the filenames with::
instaloader --filename-pattern={date_utc}_UTC_{profile} "#hashtag"

View File

@ -205,12 +205,23 @@ How to Download
.. option:: --filename-pattern FILENAME_PATTERN
Prefix of filenames, relative to the directory given with
Prefix of filenames for posts and stories, relative to the directory given with
:option:`--dirname-pattern`. ``{profile}`` is replaced by the profile name,
``{target}`` is replaced by the target you specified, i.e. either ``:feed``,
``#hashtag`` or the profile name. Defaults to ``{date_utc}_UTC``.
See :ref:`filename-specification` for a list of supported tokens.
.. option:: --title-pattern TITLE_PATTERN
Prefix of filenames for profile pics, hashtag profile pics, and highlight
covers, relative to the directory given with :option:`--dirname-pattern`.
Defaults to ``{date_utc}_UTC_{typename}`` if :option:`--dirname-pattern`
contains ``{target}`` or ``{profile}``, otherwise defaults to
``{target}_{date_utc}_UTC_{typename}``.
See :ref:`filename-specification` for a list of supported tokens.
.. versionadded:: 4.8
.. option:: --resume-prefix prefix
For many targets, Instaloader is capable of resuming a previously-aborted

View File

@ -359,10 +359,15 @@ def main():
'{target} is replaced by the target you specified, i.e. either :feed, #hashtag or the '
'profile name. Defaults to \'{target}\'.')
g_how.add_argument('--filename-pattern',
help='Prefix of filenames, relative to the directory given with '
help='Prefix of filenames for posts and stories, relative to the directory given with '
'--dirname-pattern. {profile} is replaced by the profile name,'
'{target} is replaced by the target you specified, i.e. either :feed'
'#hashtag or the profile name. Defaults to \'{date_utc}_UTC\'')
g_how.add_argument('--title-pattern',
help='Prefix of filenames for profile pics, hashtag profile pics, and highlight covers. '
'Defaults to \'{date_utc}_UTC_{typename}\' if --dirname-pattern contains \'{target}\' '
'or \'{dirname}\', or if --dirname-pattern is not specified. Otherwise defaults to '
'\'{target}_{date_utc}_UTC_{typename}\'.')
g_how.add_argument('--resume-prefix', metavar='PREFIX',
help='Prefix for filenames that are used to save the information to resume an interrupted '
'download.')
@ -445,7 +450,8 @@ def main():
check_resume_bbd=not args.use_aged_resume_files,
slide=args.slide,
fatal_status_codes=args.abort_on,
iphone_support=not args.no_iphone)
iphone_support=not args.no_iphone,
title_pattern=args.title_pattern)
_main(loader,
args.profile,
username=args.login.lower() if args.login is not None else None,

View File

@ -10,7 +10,6 @@ import tempfile
from contextlib import contextmanager, suppress
from datetime import datetime, timezone
from functools import wraps
from hashlib import md5
from io import BytesIO
from pathlib import Path
from typing import Any, Callable, IO, Iterator, List, Optional, Set, Union, cast
@ -23,7 +22,7 @@ from .exceptions import *
from .instaloadercontext import InstaloaderContext, RateController
from .nodeiterator import NodeIterator, resumable_iteration
from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem,
load_structure_from_file, save_structure_to_file, PostSidecarNode)
load_structure_from_file, save_structure_to_file, PostSidecarNode, TitlePic)
def get_default_session_filename(username: str) -> str:
@ -102,7 +101,7 @@ class _ArbitraryItemFormatter(string.Formatter):
def get_value(self, key, args, kwargs):
"""Override to substitute {ATTRIBUTE} by attributes of our _item."""
if key == 'filename' and isinstance(self._item, (Post, StoryItem, PostSidecarNode)):
if key == 'filename' and isinstance(self._item, (Post, StoryItem, PostSidecarNode, TitlePic)):
return "{filename}"
if hasattr(self._item, key):
return getattr(self._item, key)
@ -144,6 +143,9 @@ class Instaloader:
:param user_agent: :option:`--user-agent`
:param dirname_pattern: :option:`--dirname-pattern`, default is ``{target}``
:param filename_pattern: :option:`--filename-pattern`, default is ``{date_utc}_UTC``
:param title_pattern:
:option:`--title-pattern`, default is ``{date_utc}_UTC_{typename}`` if ``dirname_pattern`` contains
``{target}`` or ``{profile}``, ``{target}_{date_utc}_UTC_{typename}`` otherwise.
:param download_pictures: not :option:`--no-pictures`
:param download_videos: not :option:`--no-videos`
:param download_video_thumbnails: not :option:`--no-video-thumbnails`
@ -191,7 +193,8 @@ class Instaloader:
check_resume_bbd: bool = True,
slide: Optional[str] = None,
fatal_status_codes: Optional[List[int]] = None,
iphone_support: bool = True):
iphone_support: bool = True,
title_pattern: Optional[str] = None):
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts,
request_timeout, rate_controller, fatal_status_codes,
@ -200,6 +203,14 @@ class Instaloader:
# configuration parameters
self.dirname_pattern = dirname_pattern or "{target}"
self.filename_pattern = filename_pattern or "{date_utc}_UTC"
if title_pattern is not None:
self.title_pattern = title_pattern
else:
if (format_string_contains_key(self.dirname_pattern, 'profile') or
format_string_contains_key(self.dirname_pattern, 'target')):
self.title_pattern = '{date_utc}_UTC_{typename}'
else:
self.title_pattern = '{target}_{date_utc}_UTC_{typename}'
self.download_pictures = download_pictures
self.download_videos = download_videos
self.download_video_thumbnails = download_video_thumbnails
@ -460,25 +471,26 @@ class Instaloader:
.. versionadded:: 4.3"""
def _epoch_to_string(epoch: datetime) -> str:
return epoch.strftime('%Y-%m-%d_%H-%M-%S_UTC')
http_response = self.context.get_raw(url)
date_object = None # type: Optional[datetime]
if 'Last-Modified' in http_response.headers:
date_object = datetime.strptime(http_response.headers["Last-Modified"], '%a, %d %b %Y %H:%M:%S GMT')
date_object = date_object.replace(tzinfo=timezone.utc)
pic_bytes = None
pic_identifier = _epoch_to_string(date_object)
else:
pic_bytes = http_response.content
pic_identifier = md5(pic_bytes).hexdigest()[:16]
filename = self.format_filename_within_target_path(target, owner_profile, pic_identifier, name_suffix, 'jpg')
ig_filename = url.split('/')[-1].split('?')[0]
pic_data = TitlePic(owner_profile, target, name_suffix, ig_filename, date_object)
dirname = _PostPathFormatter(pic_data).format(self.dirname_pattern, target=target)
filename_template = os.path.join(dirname,
_PostPathFormatter(pic_data).format(self.title_pattern, target=target))
filename = self.__prepare_filename(filename_template, lambda: url) + ".jpg"
content_length = http_response.headers.get('Content-Length', None)
if os.path.isfile(filename) and (not self.context.is_logged_in or
(content_length is not None and
os.path.getsize(filename) >= int(content_length))):
self.context.log(filename + ' already exists')
return None
return
os.makedirs(os.path.dirname(filename), exist_ok=True)
self.context.write_raw(pic_bytes if pic_bytes else http_response, filename)
if date_object:
@ -572,7 +584,8 @@ class Instaloader:
os.makedirs(os.path.dirname(filename), exist_ok=True)
return filename
def format_filename(self, item: Union[Post, StoryItem, PostSidecarNode], target: Optional[Union[str, Path]] = None):
def format_filename(self, item: Union[Post, StoryItem, PostSidecarNode, TitlePic],
target: Optional[Union[str, Path]] = None):
"""Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter.
.. versionadded:: 4.1"""

View File

@ -4,6 +4,7 @@ import re
from base64 import b64decode, b64encode
from collections import namedtuple
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Iterable, Iterator, List, Optional, Union
from . import __version__
@ -1537,6 +1538,52 @@ class TopSearchResults:
return self._searchstring
class TitlePic:
def __init__(self, profile: Optional[Profile], target: Union[str, Path], typename: str,
filename: str, date_utc: Optional[datetime]):
self._profile = profile
self._target = target
self._typename = typename
self._filename = filename
self._date_utc = date_utc
@property
def profile(self) -> Union[str, Path]:
return self._profile.username.lower() if self._profile is not None else self._target
@property
def owner_username(self) -> Union[str, Path]:
return self.profile
@property
def owner_id(self) -> Union[str, Path]:
return str(self._profile.userid) if self._profile is not None else self._target
@property
def target(self) -> Union[str, Path]:
return self._target
@property
def typename(self) -> str:
return self._typename
@property
def filename(self) -> str:
return self._filename
@property
def date_utc(self) -> Optional[datetime]:
return self._date_utc
@property
def date(self) -> Optional[datetime]:
return self.date_utc
@property
def date_local(self) -> Optional[datetime]:
return self._date_utc.astimezone() if self._date_utc is not None else None
JsonExportable = Union[Post, Profile, StoryItem, Hashtag, FrozenNodeIterator]