1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-07-07 03:40:06 +02:00

Merge remote-tracking branch 'origin/upcoming/v4.6'

This commit is contained in:
André Koch-Kramer 2021-01-09 16:11:05 +01:00
commit 3fe6b1e139
9 changed files with 175 additions and 50 deletions

View File

@ -171,6 +171,9 @@ dirname pattern. The following tokens are defined for usage with
- ``{mediaid}``
Integer representation of shortcode.
- ``{filename}``
Instagram's internal filename.
- ``{date_utc}`` (same as ``{date}``)
Creation time in UTC timezone.
`strftime()-style formatting options <https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior>`__

View File

@ -82,6 +82,16 @@ What to Download of each Post
Template to write in txt file for each StoryItem. See
:ref:`metadata-text-files`.
.. option:: --slide
Download only selected images of a sidecar. You can select single images using their
index in the sidecar starting with the leftmost or you can specify a range of images
with the following syntax: ``start_index-end_index``. Example:
``--slide 1`` will select only the first image, ``--slide last`` only the last one and ``--slide 1-3`` will select only
the first three images.
.. versionadded:: 4.6
.. option:: --no-metadata-json
Do not create a JSON file containing the metadata of each post.
@ -238,10 +248,13 @@ How to Download
.. option:: --request-timeout N
Seconds to wait before timing out a connection request.
Seconds to wait before timing out a connection request. Defaults to 300.
.. versionadded:: 4.3
.. versionchanged:: 4.6
Enabled this option by default with a timeout of 300 seconds.
Miscellaneous Options
^^^^^^^^^^^^^^^^^^^^^

View File

@ -1,7 +1,7 @@
"""Download pictures (or videos) along with their captions and other metadata from Instagram."""
__version__ = '4.5.5'
__version__ = '4.6rc1'
try:

View File

@ -267,6 +267,8 @@ def main():
help="Do not download regular posts.")
g_prof.add_argument('--no-profile-pic', action='store_true',
help='Do not download profile picture.')
g_post.add_argument('--slide', action='store',
help='Set what image/interval of a sidecar you want to download.')
g_post.add_argument('--no-pictures', action='store_true',
help='Do not download post pictures. Cannot be used together with --fast-update. '
'Implies --no-video-thumbnails, does not imply --no-videos.')
@ -365,8 +367,8 @@ def main():
'connection fails, it can be manually skipped by hitting CTRL+C. Set this to 0 to retry '
'infinitely.')
g_how.add_argument('--commit-mode', action='store_true', help=SUPPRESS)
g_how.add_argument('--request-timeout', metavar='N', type=float,
help='seconds to wait before timing out a connection request')
g_how.add_argument('--request-timeout', metavar='N', type=float, default=300.0,
help='Seconds to wait before timing out a connection request. Defaults to 300.')
g_misc = parser.add_argument_group('Miscellaneous Options')
g_misc.add_argument('-q', '--quiet', action='store_true',
@ -424,7 +426,8 @@ def main():
max_connection_attempts=args.max_connection_attempts,
request_timeout=args.request_timeout,
resume_prefix=resume_prefix,
check_resume_bbd=not args.use_aged_resume_files)
check_resume_bbd=not args.use_aged_resume_files,
slide=args.slide)
_main(loader,
args.profile,
username=args.login.lower() if args.login is not None else None,

View File

@ -14,6 +14,7 @@ from hashlib import md5
from io import BytesIO
from pathlib import Path
from typing import Any, Callable, IO, Iterator, List, Optional, Set, Union, cast
from urllib.parse import urlparse
import requests
import urllib3 # type: ignore
@ -22,7 +23,7 @@ from .exceptions import *
from .instaloadercontext import InstaloaderContext, RateController
from .nodeiterator import NodeIterator, resumable_iteration
from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem,
load_structure_from_file, save_structure_to_file)
load_structure_from_file, save_structure_to_file, PostSidecarNode)
def get_default_session_filename(username: str) -> str:
@ -101,6 +102,8 @@ class _ArbitraryItemFormatter(string.Formatter):
def get_value(self, key, args, kwargs):
"""Override to substitute {ATTRIBUTE} by attributes of our _item."""
if key == 'filename' and isinstance(self._item, (Post, StoryItem, PostSidecarNode)):
return "{filename}"
if hasattr(self._item, key):
return getattr(self._item, key)
return super().get_value(key, args, kwargs)
@ -157,6 +160,7 @@ class Instaloader:
:param rate_controller: Generator for a :class:`RateController` to override rate controlling behavior
:param resume_prefix: :option:`--resume-prefix`, or None for :option:`--no-resume`.
:param check_resume_bbd: Whether to check the date of expiry of resume files and reject them if expired.
:param slide: :option:`--slide`
.. attribute:: context
@ -179,10 +183,11 @@ class Instaloader:
post_metadata_txt_pattern: str = None,
storyitem_metadata_txt_pattern: str = None,
max_connection_attempts: int = 3,
request_timeout: Optional[float] = None,
request_timeout: float = 300.0,
rate_controller: Optional[Callable[[InstaloaderContext], RateController]] = None,
resume_prefix: Optional[str] = "iterator",
check_resume_bbd: bool = True):
check_resume_bbd: bool = True,
slide: Optional[str] = None):
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts,
request_timeout, rate_controller)
@ -204,6 +209,31 @@ class Instaloader:
self.resume_prefix = resume_prefix
self.check_resume_bbd = check_resume_bbd
self.slide = slide or ""
self.slide_start = 0
self.slide_end = -1
if self.slide != "":
splitted = self.slide.split('-')
if len(splitted) == 1:
if splitted[0] == 'last':
# download only last image of a sidecar
self.slide_start = -1
else:
if int(splitted[0]) > 0:
self.slide_start = self.slide_end = int(splitted[0])-1
else:
raise InvalidArgumentException("--slide parameter must be greater than 0.")
elif len(splitted) == 2:
if splitted[1] == 'last':
self.slide_start = int(splitted[0])-1
elif 0 < int(splitted[0]) < int(splitted[1]):
self.slide_start = int(splitted[0])-1
self.slide_end = int(splitted[1])-1
else:
raise InvalidArgumentException("Invalid data for --slide parameter.")
else:
raise InvalidArgumentException("Invalid data for --slide parameter.")
@contextmanager
def anonymous_copy(self):
"""Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log."""
@ -225,7 +255,8 @@ class Instaloader:
max_connection_attempts=self.context.max_connection_attempts,
request_timeout=self.context.request_timeout,
resume_prefix=self.resume_prefix,
check_resume_bbd=self.check_resume_bbd)
check_resume_bbd=self.check_resume_bbd,
slide=self.slide)
yield new_loader
self.context.error_log.extend(new_loader.context.error_log)
new_loader.context.error_log = [] # avoid double-printing of errors
@ -492,7 +523,20 @@ class Instaloader:
.. versionadded:: 4.2"""
self.context.two_factor_login(two_factor_code)
def format_filename(self, item: Union[Post, StoryItem], target: Optional[Union[str, Path]] = None):
@staticmethod
def __prepare_filename(filename_template: str, url: Callable[[], str]) -> str:
"""Replace filename token inside filename_template with url's filename and assure the directories exist.
.. versionadded:: 4.6"""
if "{filename}" in filename_template:
filename = filename_template.replace("{filename}",
os.path.splitext(os.path.basename(urlparse(url()).path))[0])
else:
filename = filename_template
os.makedirs(os.path.dirname(filename), exist_ok=True)
return filename
def format_filename(self, item: Union[Post, StoryItem, PostSidecarNode], target: Optional[Union[str, Path]] = None):
"""Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter.
.. versionadded:: 4.1"""
@ -508,22 +552,35 @@ class Instaloader:
"""
dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target)
filename = os.path.join(dirname, self.format_filename(post, target=target))
os.makedirs(os.path.dirname(filename), exist_ok=True)
filename_template = os.path.join(dirname, self.format_filename(post, target=target))
filename = self.__prepare_filename(filename_template, lambda: post.url)
# Download the image(s) / video thumbnail and videos within sidecars if desired
downloaded = True
if post.typename == 'GraphSidecar':
if self.download_pictures or self.download_videos:
for edge_number, sidecar_node in enumerate(post.get_sidecar_nodes(), start=1):
for edge_number, sidecar_node in enumerate(
post.get_sidecar_nodes(self.slide_start, self.slide_end),
start=post.mediacount if self.slide_start < 0 else self.slide_start + 1
):
if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails):
suffix = str(edge_number)
if '{filename}' in self.filename_pattern:
suffix = ''
# pylint:disable=cell-var-from-loop
filename = self.__prepare_filename(filename_template, lambda: sidecar_node.display_url)
# Download sidecar picture or video thumbnail (--no-pictures implies --no-video-thumbnails)
downloaded &= self.download_pic(filename=filename, url=sidecar_node.display_url,
mtime=post.date_local, filename_suffix=str(edge_number))
mtime=post.date_local, filename_suffix=suffix)
if sidecar_node.is_video and self.download_videos:
suffix = str(edge_number)
if '{filename}' in self.filename_pattern:
suffix = ''
# pylint:disable=cell-var-from-loop
filename = self.__prepare_filename(filename_template, lambda: sidecar_node.video_url)
# Download sidecar video if desired
downloaded &= self.download_pic(filename=filename, url=sidecar_node.video_url,
mtime=post.date_local, filename_suffix=str(edge_number))
mtime=post.date_local, filename_suffix=suffix)
elif post.typename == 'GraphImage':
# Download picture
if self.download_pictures:
@ -638,13 +695,14 @@ class Instaloader:
date_local = item.date_local
dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target)
filename = os.path.join(dirname, self.format_filename(item, target=target))
os.makedirs(os.path.dirname(filename), exist_ok=True)
filename_template = os.path.join(dirname, self.format_filename(item, target=target))
filename = self.__prepare_filename(filename_template, lambda: item.url)
downloaded = False
if not item.is_video or self.download_video_thumbnails is True:
url = item.url
downloaded = self.download_pic(filename=filename, url=url, mtime=date_local)
if item.is_video and self.download_videos is True:
filename = self.__prepare_filename(filename_template, lambda: str(item.video_url))
downloaded |= self.download_pic(filename=filename, url=item.video_url, mtime=date_local)
# Save caption if desired
metadata_string = _ArbitraryItemFormatter(item).format(self.storyitem_metadata_txt_pattern).strip()

View File

@ -25,10 +25,9 @@ def copy_session(session: requests.Session, request_timeout: Optional[float] = N
new = requests.Session()
new.cookies = requests.utils.cookiejar_from_dict(requests.utils.dict_from_cookiejar(session.cookies))
new.headers = session.headers.copy()
if request_timeout is not None:
# Override default timeout behavior.
# Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427
new.request = partial(new.request, timeout=request_timeout) # type: ignore
# Override default timeout behavior.
# Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427
new.request = partial(new.request, timeout=request_timeout) # type: ignore
return new
@ -53,7 +52,7 @@ class InstaloaderContext:
"""
def __init__(self, sleep: bool = True, quiet: bool = False, user_agent: Optional[str] = None,
max_connection_attempts: int = 3, request_timeout: Optional[float] = None,
max_connection_attempts: int = 3, request_timeout: float = 300.0,
rate_controller: Optional[Callable[["InstaloaderContext"], "RateController"]] = None):
self.user_agent = user_agent if user_agent is not None else default_user_agent()
@ -161,10 +160,9 @@ class InstaloaderContext:
'ig_vw': '1920', 'csrftoken': '',
's_network': '', 'ds_user_id': ''})
session.headers.update(self._default_http_header(empty_session_only=True))
if self.request_timeout is not None:
# Override default timeout behavior.
# Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427
session.request = partial(session.request, timeout=self.request_timeout) # type: ignore
# Override default timeout behavior.
# Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427
session.request = partial(session.request, timeout=self.request_timeout) # type: ignore
return session
def save_session_to_file(self, sessionfile):
@ -177,10 +175,9 @@ class InstaloaderContext:
session.cookies = requests.utils.cookiejar_from_dict(pickle.load(sessionfile))
session.headers.update(self._default_http_header())
session.headers.update({'X-CSRFToken': session.cookies.get_dict()['csrftoken']})
if self.request_timeout is not None:
# Override default timeout behavior.
# Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427
session.request = partial(session.request, timeout=self.request_timeout) # type: ignore
# Override default timeout behavior.
# Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427
session.request = partial(session.request, timeout=self.request_timeout) # type: ignore
self._session = session
self.username = username
@ -206,10 +203,9 @@ class InstaloaderContext:
'ig_vw': '1920', 'ig_cb': '1', 'csrftoken': '',
's_network': '', 'ds_user_id': ''})
session.headers.update(self._default_http_header())
if self.request_timeout is not None:
# Override default timeout behavior.
# Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427
session.request = partial(session.request, timeout=self.request_timeout) # type: ignore
# Override default timeout behavior.
# Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427
session.request = partial(session.request, timeout=self.request_timeout) # type: ignore
session.get('https://www.instagram.com/web/__mid/')
csrf_token = session.cookies.get_dict()['csrftoken']
session.headers.update({'X-CSRFToken': csrf_token})
@ -473,7 +469,7 @@ class InstaloaderContext:
.. versionadded:: 4.2.1"""
with copy_session(self._session, self.request_timeout) as tempsession:
tempsession.headers['User-Agent'] = 'Instagram 123.1.0.26.115 (iPhone12,1; iOS 13_3; en_US; en-US; ' \
tempsession.headers['User-Agent'] = 'Instagram 146.0.0.27.125 (iPhone12,1; iOS 13_3; en_US; en-US; ' \
'scale=2.00; 1656x3584; 190542906)'
for header in ['Host', 'Origin', 'X-Instagram-AJAX', 'X-Requested-With']:
tempsession.headers.pop(header, None)

0
instaloader/py.typed Normal file
View File

View File

@ -252,26 +252,49 @@ class Post:
"""Type of post, GraphImage, GraphVideo or GraphSidecar"""
return self._field('__typename')
def get_sidecar_nodes(self) -> Iterator[PostSidecarNode]:
"""Sidecar nodes of a Post with typename==GraphSidecar."""
@property
def mediacount(self) -> int:
"""
The number of media in a sidecar Post, or 1 if the Post it not a sidecar.
.. versionadded:: 4.6
"""
if self.typename == 'GraphSidecar':
edges = self._field('edge_sidecar_to_children', 'edges')
return len(edges)
return 1
def get_sidecar_nodes(self, start=0, end=-1) -> Iterator[PostSidecarNode]:
"""
Sidecar nodes of a Post with typename==GraphSidecar.
.. versionchanged:: 4.6
Added parameters *start* and *end* to specify a slice of sidecar media.
"""
if self.typename == 'GraphSidecar':
edges = self._field('edge_sidecar_to_children', 'edges')
if any(edge['node']['is_video'] for edge in edges):
# video_url is only present in full metadata, issue #558.
edges = self._full_metadata['edge_sidecar_to_children']['edges']
if end < 0:
end = len(edges)-1
if start < 0:
start = len(edges)-1
for idx, edge in enumerate(edges):
node = edge['node']
is_video = node['is_video']
display_url = node['display_url']
if not is_video and self._context.is_logged_in:
try:
carousel_media = self._iphone_struct['carousel_media']
orig_url = carousel_media[idx]['image_versions2']['candidates'][0]['url']
display_url = re.sub(r'&se=\d+(&?)', r'\1', orig_url)
except (InstaloaderException, KeyError, IndexError) as err:
self._context.error('{} Unable to fetch high quality image version of {}.'.format(err, self))
yield PostSidecarNode(is_video=is_video, display_url=display_url,
video_url=node['video_url'] if is_video else None)
if start <= idx <= end:
node = edge['node']
is_video = node['is_video']
display_url = node['display_url']
if not is_video and self._context.is_logged_in:
try:
carousel_media = self._iphone_struct['carousel_media']
orig_url = carousel_media[idx]['image_versions2']['candidates'][0]['url']
display_url = re.sub(r'&se=\d+(&?)', r'\1', orig_url)
except (InstaloaderException, KeyError, IndexError) as err:
self._context.error('{} Unable to fetch high quality image version of {}.'.format(
err, self))
yield PostSidecarNode(is_video=is_video, display_url=display_url,
video_url=node['video_url'] if is_video else None)
@property
def caption(self) -> Optional[str]:
@ -330,6 +353,12 @@ class Post:
def video_url(self) -> Optional[str]:
"""URL of the video, or None."""
if self.is_video:
if self._context.is_logged_in:
try:
url = self._iphone_struct['video_versions'][0]['url']
return url
except (InstaloaderException, KeyError, IndexError) as err:
self._context.error('{} Unable to fetch high quality video version of {}.'.format(err, self))
return self._field('video_url')
return None
@ -934,11 +963,17 @@ class StoryItem:
self._context = context
self._node = node
self._owner_profile = owner_profile
self._iphone_struct_ = None
if 'iphone_struct' in node:
# if loaded from JSON with load_structure_from_file()
self._iphone_struct_ = node['iphone_struct']
def _asdict(self):
node = self._node
if self._owner_profile:
node['owner'] = self._owner_profile._asdict()
if self._iphone_struct_:
node['iphone_struct'] = self._iphone_struct_
return node
@property
@ -963,6 +998,15 @@ class StoryItem:
def __hash__(self) -> int:
return hash(self.mediaid)
@property
def _iphone_struct(self) -> Dict[str, Any]:
if not self._context.is_logged_in:
raise LoginRequiredException("--login required to access iPhone media info endpoint.")
if not self._iphone_struct_:
data = self._context.get_iphone_json(path='api/v1/media/{}/info/'.format(self.mediaid), params={})
self._iphone_struct_ = data['items'][0]
return self._iphone_struct_
@property
def owner_profile(self) -> Profile:
""":class:`Profile` instance of the story item's owner."""
@ -1014,6 +1058,13 @@ class StoryItem:
@property
def url(self) -> str:
"""URL of the picture / video thumbnail of the StoryItem"""
if self.typename == "GraphStoryImage" and self._context.is_logged_in:
try:
orig_url = self._iphone_struct['image_versions2']['candidates'][0]['url']
url = re.sub(r'&se=\d+(&?)', r'\1', orig_url)
return url
except (InstaloaderException, KeyError, IndexError) as err:
self._context.error('{} Unable to fetch high quality image version of {}.'.format(err, self))
return self._node['display_resources'][-1]['src']
@property

View File

@ -38,6 +38,7 @@ setup(
name='instaloader',
version=get_version(),
packages=['instaloader'],
package_data={'instaloader': ['py.typed']},
url='https://instaloader.github.io/',
license='MIT',
author='Alexander Graf, André Koch-Kramer',
@ -48,7 +49,7 @@ setup(
install_requires=requirements,
python_requires='>=3.5',
entry_points={'console_scripts': ['instaloader=instaloader.__main__:main']},
zip_safe=True,
zip_safe=False,
keywords=keywords,
classifiers=[
'Development Status :: 5 - Production/Stable',