1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-11-20 17:22:31 +01:00

Flag --abort-on to abort on given status codes

Closes #920.
This commit is contained in:
Alexander Graf 2021-02-13 19:04:05 +01:00
parent e15d67c065
commit a2d756b177
7 changed files with 67 additions and 12 deletions

View File

@ -255,6 +255,17 @@ How to Download
.. versionchanged:: 4.6
Enabled this option by default with a timeout of 300 seconds.
.. option:: --abort-on STATUS_CODE_LIST
Comma-separated list of HTTP status codes that cause Instaloader to abort,
bypassing all retry logic.
For example, with ``--abort-on=302,400,429``, Instaloader will stop if a
request is responded with a 302 redirect, a Bad Request error, or a Too Many
Requests error.
.. versionadded:: 4.7
Miscellaneous Options
^^^^^^^^^^^^^^^^^^^^^

View File

@ -47,3 +47,5 @@ Exceptions
.. autoexception:: TooManyRequestsException
.. autoexception:: AbortDownloadException

View File

@ -5,12 +5,12 @@ import datetime
import os
import re
import sys
from argparse import ArgumentParser, SUPPRESS
from argparse import ArgumentParser, ArgumentTypeError, SUPPRESS
from typing import List, Optional
from . import (Instaloader, InstaloaderException, InvalidArgumentException, Post, Profile, ProfileNotExistsException,
StoryItem, __version__, load_structure_from_file, TwoFactorAuthRequiredException,
BadCredentialsException)
from . import (AbortDownloadException, BadCredentialsException, Instaloader, InstaloaderException,
InvalidArgumentException, Post, Profile, ProfileNotExistsException, StoryItem,
TwoFactorAuthRequiredException, __version__, load_structure_from_file)
from .instaloader import get_default_session_filename
from .instaloadercontext import default_user_agent
@ -27,6 +27,14 @@ def usage_string():
{0} --help""".format(argv0, len(argv0), '')
def http_status_code_list(code_list_str: str) -> List[int]:
codes = [int(s) for s in code_list_str.split(',')]
for code in codes:
if not 100 <= code <= 599:
raise ArgumentTypeError("Invalid HTTP status code: {}".format(code))
return codes
def filterstr_to_filterfunc(filter_str: str, item_type: type):
"""Takes an --post-filter=... or --storyitem-filter=... filter
specification and makes a filter_func Callable out of it."""
@ -213,6 +221,8 @@ def _main(instaloader: Instaloader, targetlist: List[str],
fast_update=fast_update, post_filter=post_filter)
except KeyboardInterrupt:
print("\nInterrupted by user.", file=sys.stderr)
except AbortDownloadException as exc:
print("\nDownload aborted: {}.".format(exc), file=sys.stderr)
# Save session if it is useful
if instaloader.context.is_logged_in:
instaloader.save_session_to_file(sessionfile)
@ -369,6 +379,9 @@ def main():
g_how.add_argument('--commit-mode', action='store_true', help=SUPPRESS)
g_how.add_argument('--request-timeout', metavar='N', type=float, default=300.0,
help='Seconds to wait before timing out a connection request. Defaults to 300.')
g_how.add_argument('--abort-on', type=http_status_code_list, metavar="STATUS_CODES",
help='Comma-separated list of HTTP status codes that cause Instaloader to abort, bypassing all '
'retry logic.')
g_misc = parser.add_argument_group('Miscellaneous Options')
g_misc.add_argument('-q', '--quiet', action='store_true',
@ -427,7 +440,8 @@ def main():
request_timeout=args.request_timeout,
resume_prefix=resume_prefix,
check_resume_bbd=not args.use_aged_resume_files,
slide=args.slide)
slide=args.slide,
fatal_status_codes=args.abort_on)
_main(loader,
args.profile,
username=args.login.lower() if args.login is not None else None,

View File

@ -64,3 +64,15 @@ class QueryReturnedNotFoundException(ConnectionException):
class TooManyRequestsException(ConnectionException):
pass
class AbortDownloadException(Exception):
"""
Exception that is not catched in the error catchers inside the download loop and so aborts the
download loop.
This exception is not a subclass of ``InstaloaderException``.
.. versionadded:: 4.7
"""
pass

View File

@ -161,6 +161,7 @@ class Instaloader:
:param resume_prefix: :option:`--resume-prefix`, or None for :option:`--no-resume`.
:param check_resume_bbd: Whether to check the date of expiry of resume files and reject them if expired.
:param slide: :option:`--slide`
:param fatal_status_codes: :option:`--abort-on`
.. attribute:: context
@ -187,10 +188,11 @@ class Instaloader:
rate_controller: Optional[Callable[[InstaloaderContext], RateController]] = None,
resume_prefix: Optional[str] = "iterator",
check_resume_bbd: bool = True,
slide: Optional[str] = None):
slide: Optional[str] = None,
fatal_status_codes: Optional[List[int]] = None):
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts,
request_timeout, rate_controller)
request_timeout, rate_controller, fatal_status_codes)
# configuration parameters
self.dirname_pattern = dirname_pattern or "{target}"
@ -256,7 +258,8 @@ class Instaloader:
request_timeout=self.context.request_timeout,
resume_prefix=self.resume_prefix,
check_resume_bbd=self.check_resume_bbd,
slide=self.slide)
slide=self.slide,
fatal_status_codes=self.context.fatal_status_codes)
yield new_loader
self.context.error_log.extend(new_loader.context.error_log)
new_loader.context.error_log = [] # avoid double-printing of errors

View File

@ -53,7 +53,8 @@ class InstaloaderContext:
def __init__(self, sleep: bool = True, quiet: bool = False, user_agent: Optional[str] = None,
max_connection_attempts: int = 3, request_timeout: float = 300.0,
rate_controller: Optional[Callable[["InstaloaderContext"], "RateController"]] = None):
rate_controller: Optional[Callable[["InstaloaderContext"], "RateController"]] = None,
fatal_status_codes: Optional[List[int]] = None):
self.user_agent = user_agent if user_agent is not None else default_user_agent()
self.request_timeout = request_timeout
@ -74,6 +75,9 @@ class InstaloaderContext:
# Can be set to True for testing, disables supression of InstaloaderContext._error_catcher
self.raise_all_errors = False
# HTTP status codes that should cause an AbortDownloadException
self.fatal_status_codes = fatal_status_codes or []
# Cache profile from id (mapping from id to Profile)
self.profile_id_cache = dict() # type: Dict[int, Any]
@ -316,6 +320,11 @@ class InstaloaderContext:
if is_other_query:
self._rate_controller.wait_before_query('other')
resp = sess.get('https://{0}/{1}'.format(host, path), params=params, allow_redirects=False)
if resp.status_code in self.fatal_status_codes:
redirect = " redirect to {}".format(resp.headers['location']) if 'location' in resp.headers else ""
raise AbortDownloadException("Query to https://{}/{} responded with \"{} {}\"{}".format(
host, path, resp.status_code, resp.reason, redirect
))
while resp.is_redirect:
redirect_url = resp.headers['location']
self.log('\nHTTP redirect from https://{0}/{1} to {2}'.format(host, path, redirect_url))

View File

@ -7,7 +7,7 @@ from datetime import datetime, timedelta
from lzma import LZMAError
from typing import Any, Callable, Dict, Iterator, NamedTuple, Optional, Tuple, TypeVar
from .exceptions import InvalidArgumentException, QueryReturnedBadRequestException
from .exceptions import AbortDownloadException, InvalidArgumentException, QueryReturnedBadRequestException
from .instaloadercontext import InstaloaderContext
FrozenNodeIterator = NamedTuple('FrozenNodeIterator',
@ -211,7 +211,8 @@ def resumable_iteration(context: InstaloaderContext,
check_bbd: bool = True,
enabled: bool = True) -> Iterator[Tuple[bool, int]]:
"""
High-level context manager to handle a resumable iteration that can be interrupted with a KeyboardInterrupt.
High-level context manager to handle a resumable iteration that can be interrupted
with a :class:`KeyboardInterrupt` or an :class:`AbortDownloadException`.
It can be used as follows to automatically load a previously-saved state into the iterator, save the iterator's
state when interrupted, and delete the resume file upon completion::
@ -239,6 +240,9 @@ def resumable_iteration(context: InstaloaderContext,
:param format_path: Returns the path to the resume file for the given magic.
:param check_bbd: Whether to check the best before date and reject an expired FrozenNodeIterator.
:param enabled: Set to False to disable all functionality and simply execute the inner body.
.. versionchanged:: 4.7
Also interrupt on :class:`AbortDownloadException`.
"""
if not enabled or not isinstance(iterator, NodeIterator):
yield False, 0
@ -262,7 +266,7 @@ def resumable_iteration(context: InstaloaderContext,
context.error("Warning: Not resuming from {}: {}".format(resume_file_path, exc))
try:
yield is_resuming, start_index
except KeyboardInterrupt:
except (KeyboardInterrupt, AbortDownloadException):
if os.path.dirname(resume_file_path):
os.makedirs(os.path.dirname(resume_file_path), exist_ok=True)
save(iterator.freeze(), resume_file_path)