diff --git a/docs/cli-options.rst b/docs/cli-options.rst index 3f7a4bb..d5413a0 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -255,6 +255,17 @@ How to Download .. versionchanged:: 4.6 Enabled this option by default with a timeout of 300 seconds. +.. option:: --abort-on STATUS_CODE_LIST + + Comma-separated list of HTTP status codes that cause Instaloader to abort, + bypassing all retry logic. + + For example, with ``--abort-on=302,400,429``, Instaloader will stop if a + request is responded with a 302 redirect, a Bad Request error, or a Too Many + Requests error. + + .. versionadded:: 4.7 + Miscellaneous Options ^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/module/exceptions.rst b/docs/module/exceptions.rst index e29aa69..892e563 100644 --- a/docs/module/exceptions.rst +++ b/docs/module/exceptions.rst @@ -47,3 +47,5 @@ Exceptions .. autoexception:: TooManyRequestsException + +.. autoexception:: AbortDownloadException diff --git a/instaloader/__main__.py b/instaloader/__main__.py index a086c40..0334606 100644 --- a/instaloader/__main__.py +++ b/instaloader/__main__.py @@ -5,12 +5,12 @@ import datetime import os import re import sys -from argparse import ArgumentParser, SUPPRESS +from argparse import ArgumentParser, ArgumentTypeError, SUPPRESS from typing import List, Optional -from . import (Instaloader, InstaloaderException, InvalidArgumentException, Post, Profile, ProfileNotExistsException, - StoryItem, __version__, load_structure_from_file, TwoFactorAuthRequiredException, - BadCredentialsException) +from . import (AbortDownloadException, BadCredentialsException, Instaloader, InstaloaderException, + InvalidArgumentException, Post, Profile, ProfileNotExistsException, StoryItem, + TwoFactorAuthRequiredException, __version__, load_structure_from_file) from .instaloader import get_default_session_filename from .instaloadercontext import default_user_agent @@ -27,6 +27,14 @@ def usage_string(): {0} --help""".format(argv0, len(argv0), '') +def http_status_code_list(code_list_str: str) -> List[int]: + codes = [int(s) for s in code_list_str.split(',')] + for code in codes: + if not 100 <= code <= 599: + raise ArgumentTypeError("Invalid HTTP status code: {}".format(code)) + return codes + + def filterstr_to_filterfunc(filter_str: str, item_type: type): """Takes an --post-filter=... or --storyitem-filter=... filter specification and makes a filter_func Callable out of it.""" @@ -213,6 +221,8 @@ def _main(instaloader: Instaloader, targetlist: List[str], fast_update=fast_update, post_filter=post_filter) except KeyboardInterrupt: print("\nInterrupted by user.", file=sys.stderr) + except AbortDownloadException as exc: + print("\nDownload aborted: {}.".format(exc), file=sys.stderr) # Save session if it is useful if instaloader.context.is_logged_in: instaloader.save_session_to_file(sessionfile) @@ -369,6 +379,9 @@ def main(): g_how.add_argument('--commit-mode', action='store_true', help=SUPPRESS) g_how.add_argument('--request-timeout', metavar='N', type=float, default=300.0, help='Seconds to wait before timing out a connection request. Defaults to 300.') + g_how.add_argument('--abort-on', type=http_status_code_list, metavar="STATUS_CODES", + help='Comma-separated list of HTTP status codes that cause Instaloader to abort, bypassing all ' + 'retry logic.') g_misc = parser.add_argument_group('Miscellaneous Options') g_misc.add_argument('-q', '--quiet', action='store_true', @@ -427,7 +440,8 @@ def main(): request_timeout=args.request_timeout, resume_prefix=resume_prefix, check_resume_bbd=not args.use_aged_resume_files, - slide=args.slide) + slide=args.slide, + fatal_status_codes=args.abort_on) _main(loader, args.profile, username=args.login.lower() if args.login is not None else None, diff --git a/instaloader/exceptions.py b/instaloader/exceptions.py index e23ba0d..e4a90ff 100644 --- a/instaloader/exceptions.py +++ b/instaloader/exceptions.py @@ -64,3 +64,15 @@ class QueryReturnedNotFoundException(ConnectionException): class TooManyRequestsException(ConnectionException): pass + + +class AbortDownloadException(Exception): + """ + Exception that is not catched in the error catchers inside the download loop and so aborts the + download loop. + + This exception is not a subclass of ``InstaloaderException``. + + .. versionadded:: 4.7 + """ + pass diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index f6dbd9f..e502121 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -161,6 +161,7 @@ class Instaloader: :param resume_prefix: :option:`--resume-prefix`, or None for :option:`--no-resume`. :param check_resume_bbd: Whether to check the date of expiry of resume files and reject them if expired. :param slide: :option:`--slide` + :param fatal_status_codes: :option:`--abort-on` .. attribute:: context @@ -187,10 +188,11 @@ class Instaloader: rate_controller: Optional[Callable[[InstaloaderContext], RateController]] = None, resume_prefix: Optional[str] = "iterator", check_resume_bbd: bool = True, - slide: Optional[str] = None): + slide: Optional[str] = None, + fatal_status_codes: Optional[List[int]] = None): self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts, - request_timeout, rate_controller) + request_timeout, rate_controller, fatal_status_codes) # configuration parameters self.dirname_pattern = dirname_pattern or "{target}" @@ -256,7 +258,8 @@ class Instaloader: request_timeout=self.context.request_timeout, resume_prefix=self.resume_prefix, check_resume_bbd=self.check_resume_bbd, - slide=self.slide) + slide=self.slide, + fatal_status_codes=self.context.fatal_status_codes) yield new_loader self.context.error_log.extend(new_loader.context.error_log) new_loader.context.error_log = [] # avoid double-printing of errors diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py index 68c9b53..8c24690 100644 --- a/instaloader/instaloadercontext.py +++ b/instaloader/instaloadercontext.py @@ -53,7 +53,8 @@ class InstaloaderContext: def __init__(self, sleep: bool = True, quiet: bool = False, user_agent: Optional[str] = None, max_connection_attempts: int = 3, request_timeout: float = 300.0, - rate_controller: Optional[Callable[["InstaloaderContext"], "RateController"]] = None): + rate_controller: Optional[Callable[["InstaloaderContext"], "RateController"]] = None, + fatal_status_codes: Optional[List[int]] = None): self.user_agent = user_agent if user_agent is not None else default_user_agent() self.request_timeout = request_timeout @@ -74,6 +75,9 @@ class InstaloaderContext: # Can be set to True for testing, disables supression of InstaloaderContext._error_catcher self.raise_all_errors = False + # HTTP status codes that should cause an AbortDownloadException + self.fatal_status_codes = fatal_status_codes or [] + # Cache profile from id (mapping from id to Profile) self.profile_id_cache = dict() # type: Dict[int, Any] @@ -316,6 +320,11 @@ class InstaloaderContext: if is_other_query: self._rate_controller.wait_before_query('other') resp = sess.get('https://{0}/{1}'.format(host, path), params=params, allow_redirects=False) + if resp.status_code in self.fatal_status_codes: + redirect = " redirect to {}".format(resp.headers['location']) if 'location' in resp.headers else "" + raise AbortDownloadException("Query to https://{}/{} responded with \"{} {}\"{}".format( + host, path, resp.status_code, resp.reason, redirect + )) while resp.is_redirect: redirect_url = resp.headers['location'] self.log('\nHTTP redirect from https://{0}/{1} to {2}'.format(host, path, redirect_url)) diff --git a/instaloader/nodeiterator.py b/instaloader/nodeiterator.py index 7fbe154..5ddf0b8 100644 --- a/instaloader/nodeiterator.py +++ b/instaloader/nodeiterator.py @@ -7,7 +7,7 @@ from datetime import datetime, timedelta from lzma import LZMAError from typing import Any, Callable, Dict, Iterator, NamedTuple, Optional, Tuple, TypeVar -from .exceptions import InvalidArgumentException, QueryReturnedBadRequestException +from .exceptions import AbortDownloadException, InvalidArgumentException, QueryReturnedBadRequestException from .instaloadercontext import InstaloaderContext FrozenNodeIterator = NamedTuple('FrozenNodeIterator', @@ -211,7 +211,8 @@ def resumable_iteration(context: InstaloaderContext, check_bbd: bool = True, enabled: bool = True) -> Iterator[Tuple[bool, int]]: """ - High-level context manager to handle a resumable iteration that can be interrupted with a KeyboardInterrupt. + High-level context manager to handle a resumable iteration that can be interrupted + with a :class:`KeyboardInterrupt` or an :class:`AbortDownloadException`. It can be used as follows to automatically load a previously-saved state into the iterator, save the iterator's state when interrupted, and delete the resume file upon completion:: @@ -239,6 +240,9 @@ def resumable_iteration(context: InstaloaderContext, :param format_path: Returns the path to the resume file for the given magic. :param check_bbd: Whether to check the best before date and reject an expired FrozenNodeIterator. :param enabled: Set to False to disable all functionality and simply execute the inner body. + + .. versionchanged:: 4.7 + Also interrupt on :class:`AbortDownloadException`. """ if not enabled or not isinstance(iterator, NodeIterator): yield False, 0 @@ -262,7 +266,7 @@ def resumable_iteration(context: InstaloaderContext, context.error("Warning: Not resuming from {}: {}".format(resume_file_path, exc)) try: yield is_resuming, start_index - except KeyboardInterrupt: + except (KeyboardInterrupt, AbortDownloadException): if os.path.dirname(resume_file_path): os.makedirs(os.path.dirname(resume_file_path), exist_ok=True) save(iterator.freeze(), resume_file_path)