1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-11-04 09:22:29 +01:00

Allow changing HTTP User Agent string

This commit is contained in:
Alexander Graf 2017-07-20 11:25:46 +02:00
parent 1e10ab8669
commit 58c12d5618
2 changed files with 50 additions and 42 deletions

View File

@ -126,6 +126,8 @@ be downloaded.
--hashtag-username When downloading by #hashtag, lookup the picture's username --hashtag-username When downloading by #hashtag, lookup the picture's username
to decide in which directory to store, rather than storing to decide in which directory to store, rather than storing
all pictures in directory '#hashtag'. all pictures in directory '#hashtag'.
--user-agent STRING Change User Agent for HTTP requests to STRING, rather than
our default user agent (Chrome 51).
To get a list of all flags, run ``instaloader --help``. To get a list of all flags, run ``instaloader --help``.

View File

@ -103,37 +103,9 @@ def copy_session(session: requests.Session) -> requests.Session:
return new return new
def default_http_header(empty_session_only: bool = False) -> Dict[str, str]: def default_user_agent() -> str:
"""Returns default HTTP header we use for requests.""" return 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
'(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36' '(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36'
header = {'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive',
'Content-Length': '0',
'Host': 'www.instagram.com',
'Origin': 'https://www.instagram.com',
'Referer': 'https://www.instagram.com/',
'User-Agent': user_agent,
'X-Instagram-AJAX': '1',
'X-Requested-With': 'XMLHttpRequest'}
if empty_session_only:
del header['Host']
del header['Origin']
del header['Referer']
del header['X-Instagram-AJAX']
del header['X-Requested-With']
return header
def get_anonymous_session() -> requests.Session:
"""Returns our default anonymous requests.Session object."""
session = requests.Session()
session.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1',
'ig_vw': '1920', 'csrftoken': '',
's_network': '', 'ds_user_id': ''})
session.headers.update(default_http_header(empty_session_only=True))
return session
def shortcode_to_mediaid(code: str) -> int: def shortcode_to_mediaid(code: str) -> int:
@ -151,8 +123,10 @@ def mediaid_to_shortcode(mediaid: int) -> str:
class Instaloader: class Instaloader:
def __init__(self, def __init__(self,
sleep: bool = True, quiet: bool = False, shorter_output: bool = False, profile_subdirs: bool = True): sleep: bool = True, quiet: bool = False, shorter_output: bool = False, profile_subdirs: bool = True,
self.session = get_anonymous_session() user_agent: Optional[str] = None):
self.user_agent = user_agent if user_agent is not None else default_user_agent()
self.session = self.get_anonymous_session()
self.username = None self.username = None
self.sleep = sleep self.sleep = sleep
self.quiet = quiet self.quiet = quiet
@ -178,10 +152,39 @@ class Instaloader:
if match is not None: if match is not None:
return json.loads(match.group(0)[21:-2]) return json.loads(match.group(0)[21:-2])
def default_http_header(self, empty_session_only: bool = False) -> Dict[str, str]:
"""Returns default HTTP header we use for requests."""
header = {'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive',
'Content-Length': '0',
'Host': 'www.instagram.com',
'Origin': 'https://www.instagram.com',
'Referer': 'https://www.instagram.com/',
'User-Agent': self.user_agent,
'X-Instagram-AJAX': '1',
'X-Requested-With': 'XMLHttpRequest'}
if empty_session_only:
del header['Host']
del header['Origin']
del header['Referer']
del header['X-Instagram-AJAX']
del header['X-Requested-With']
return header
def get_anonymous_session(self) -> requests.Session:
"""Returns our default anonymous requests.Session object."""
session = requests.Session()
session.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1',
'ig_vw': '1920', 'csrftoken': '',
's_network': '', 'ds_user_id': ''})
session.headers.update(self.default_http_header(empty_session_only=True))
return session
def get_username_by_id(self, profile_id: int) -> str: def get_username_by_id(self, profile_id: int) -> str:
"""To get the current username of a profile, given its unique ID, this function can be used.""" """To get the current username of a profile, given its unique ID, this function can be used."""
tmpsession = copy_session(self.session) tmpsession = copy_session(self.session)
tmpsession.headers.update(default_http_header()) tmpsession.headers.update(self.default_http_header())
del tmpsession.headers['Referer'] del tmpsession.headers['Referer']
del tmpsession.headers['X-Instagram-AJAX'] del tmpsession.headers['X-Instagram-AJAX']
resp = tmpsession.get('https://www.instagram.com/graphql/query/', resp = tmpsession.get('https://www.instagram.com/graphql/query/',
@ -210,7 +213,7 @@ class Instaloader:
def get_id_by_username(self, profile: str) -> int: def get_id_by_username(self, profile: str) -> int:
"""Each Instagram profile has its own unique ID which stays unmodified even if a user changes """Each Instagram profile has its own unique ID which stays unmodified even if a user changes
his/her username. To get said ID, given the profile's name, you may call this function.""" his/her username. To get said ID, given the profile's name, you may call this function."""
data = self.get_json(profile, session=get_anonymous_session()) data = self.get_json(profile, session=self.get_anonymous_session())
if "ProfilePage" not in data["entry_data"]: if "ProfilePage" not in data["entry_data"]:
raise ProfileNotExistsException("Profile {0} does not exist.".format(profile)) raise ProfileNotExistsException("Profile {0} does not exist.".format(profile))
return int(data['entry_data']['ProfilePage'][0]['user']['id']) return int(data['entry_data']['ProfilePage'][0]['user']['id'])
@ -245,7 +248,7 @@ class Instaloader:
"++%7D%0A" "++%7D%0A"
"%7D%0A" "%7D%0A"
"&ref=relationships%3A%3Afollow_list"] "&ref=relationships%3A%3Afollow_list"]
tmpsession.headers.update(default_http_header()) tmpsession.headers.update(self.default_http_header())
tmpsession.headers.update({'Referer': 'https://www.instagram.com/' + profile + '/following/'}) tmpsession.headers.update({'Referer': 'https://www.instagram.com/' + profile + '/following/'})
tmpsession.headers.update({'Content-Type': 'application/x-www-form-urlencoded'}) tmpsession.headers.update({'Content-Type': 'application/x-www-form-urlencoded'})
resp = tmpsession.post('https://www.instagram.com/query/', data=query[0] + "first(" + query[1]) resp = tmpsession.post('https://www.instagram.com/query/', data=query[0] + "first(" + query[1])
@ -288,7 +291,7 @@ class Instaloader:
if os.path.isfile(filename): if os.path.isfile(filename):
self._log(outputlabel + ' exists', end=' ', flush=True) self._log(outputlabel + ' exists', end=' ', flush=True)
return False return False
resp = get_anonymous_session().get(url, stream=True) resp = self.get_anonymous_session().get(url, stream=True)
if resp.status_code == 200: if resp.status_code == 200:
self._log(outputlabel, end=' ', flush=True) self._log(outputlabel, end=' ', flush=True)
if self.profile_subdirs: if self.profile_subdirs:
@ -381,7 +384,7 @@ class Instaloader:
index = len(match.group(0)) - 1 index = len(match.group(0)) - 1
offset = 8 if match.group(0)[-1:] == 's' else 0 offset = 8 if match.group(0)[-1:] == 's' else 0
url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index + offset:] url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index + offset:]
resp = get_anonymous_session().get(url, stream=True) resp = self.get_anonymous_session().get(url, stream=True)
if resp.status_code == 200: if resp.status_code == 200:
self._log(filename) self._log(filename)
if self.profile_subdirs: if self.profile_subdirs:
@ -418,7 +421,7 @@ class Instaloader:
with open(filename, 'rb') as sessionfile: with open(filename, 'rb') as sessionfile:
session = requests.Session() session = requests.Session()
session.cookies = requests.utils.cookiejar_from_dict(pickle.load(sessionfile)) session.cookies = requests.utils.cookiejar_from_dict(pickle.load(sessionfile))
session.headers.update(default_http_header()) session.headers.update(self.default_http_header())
session.headers.update({'X-CSRFToken': session.cookies.get_dict()['csrftoken']}) session.headers.update({'X-CSRFToken': session.cookies.get_dict()['csrftoken']})
self._log("Loaded session from %s." % filename) self._log("Loaded session from %s." % filename)
self.session = session self.session = session
@ -440,7 +443,7 @@ class Instaloader:
session.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1', session.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1',
'ig_vw': '1920', 'csrftoken': '', 'ig_vw': '1920', 'csrftoken': '',
's_network': '', 'ds_user_id': ''}) 's_network': '', 'ds_user_id': ''})
session.headers.update(default_http_header()) session.headers.update(self.default_http_header())
resp = session.get('https://www.instagram.com/') resp = session.get('https://www.instagram.com/')
session.headers.update({'X-CSRFToken': resp.cookies['csrftoken']}) session.headers.update({'X-CSRFToken': resp.cookies['csrftoken']})
time.sleep(9 * random.random() + 3) time.sleep(9 * random.random() + 3)
@ -491,7 +494,7 @@ class Instaloader:
"%7D%0A++++++++%7D%2C%0A++++++++video_url%2C%0A++++++++" + \ "%7D%0A++++++++%7D%2C%0A++++++++video_url%2C%0A++++++++" + \
"video_views%0A++++++%7D%2C%0A++++++page_info%0A++++%7D%0A++%7D%2C%0A++id%2C%0A++" + \ "video_views%0A++++++%7D%2C%0A++++++page_info%0A++++%7D%0A++%7D%2C%0A++id%2C%0A++" + \
"profile_pic_url%2C%0A++username%0A%7D%0A&ref=feed::show" "profile_pic_url%2C%0A++username%0A%7D%0A&ref=feed::show"
tmpsession.headers.update(default_http_header()) tmpsession.headers.update(self.default_http_header())
tmpsession.headers.update({'Referer': 'https://www.instagram.com/'}) tmpsession.headers.update({'Referer': 'https://www.instagram.com/'})
tmpsession.headers.update({'Content-Type': 'application/x-www-form-urlencoded'}) tmpsession.headers.update({'Content-Type': 'application/x-www-form-urlencoded'})
resp = tmpsession.post('https://www.instagram.com/query/', data=query) resp = tmpsession.post('https://www.instagram.com/query/', data=query)
@ -908,6 +911,8 @@ def main():
'file\'s path or filename (if --no-profile-subdir). Without this option, the #hashtag is ' 'file\'s path or filename (if --no-profile-subdir). Without this option, the #hashtag is '
'used instead. This requires an additional request to the Instagram server for each ' 'used instead. This requires an additional request to the Instagram server for each '
'picture, which is why it is disabled by default.') 'picture, which is why it is disabled by default.')
parser.add_argument('--user-agent',
help='User Agent to use for HTTP requests. Defaults to \'{}\'.'.format(default_user_agent()))
parser.add_argument('-S', '--no-sleep', action='store_true', parser.add_argument('-S', '--no-sleep', action='store_true',
help='Do not sleep between actual downloads of pictures') help='Do not sleep between actual downloads of pictures')
parser.add_argument('-O', '--shorter-output', action='store_true', parser.add_argument('-O', '--shorter-output', action='store_true',
@ -917,7 +922,8 @@ def main():
'if login credentials are needed but not given.') 'if login credentials are needed but not given.')
args = parser.parse_args() args = parser.parse_args()
try: try:
loader = Instaloader(not args.no_sleep, args.quiet, args.shorter_output, not args.no_profile_subdir) loader = Instaloader(sleep = not args.no_sleep, quiet = args.quiet, shorter_output = args.shorter_output,
profile_subdirs = not args.no_profile_subdir, user_agent = args.user_agent)
loader.download_profiles(args.profile, args.login, args.password, args.sessionfile, loader.download_profiles(args.profile, args.login, args.password, args.sessionfile,
int(args.count) if args.count is not None else None, int(args.count) if args.count is not None else None,
args.profile_pic_only, not args.skip_videos, args.geotags, args.fast_update, args.profile_pic_only, not args.skip_videos, args.geotags, args.fast_update,