mirror of
https://github.com/instaloader/instaloader.git
synced 2024-11-04 09:22:29 +01:00
Allow changing HTTP User Agent string
This commit is contained in:
parent
1e10ab8669
commit
58c12d5618
@ -126,6 +126,8 @@ be downloaded.
|
|||||||
--hashtag-username When downloading by #hashtag, lookup the picture's username
|
--hashtag-username When downloading by #hashtag, lookup the picture's username
|
||||||
to decide in which directory to store, rather than storing
|
to decide in which directory to store, rather than storing
|
||||||
all pictures in directory '#hashtag'.
|
all pictures in directory '#hashtag'.
|
||||||
|
--user-agent STRING Change User Agent for HTTP requests to STRING, rather than
|
||||||
|
our default user agent (Chrome 51).
|
||||||
|
|
||||||
To get a list of all flags, run ``instaloader --help``.
|
To get a list of all flags, run ``instaloader --help``.
|
||||||
|
|
||||||
|
@ -103,37 +103,9 @@ def copy_session(session: requests.Session) -> requests.Session:
|
|||||||
return new
|
return new
|
||||||
|
|
||||||
|
|
||||||
def default_http_header(empty_session_only: bool = False) -> Dict[str, str]:
|
def default_user_agent() -> str:
|
||||||
"""Returns default HTTP header we use for requests."""
|
return 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
|
||||||
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
|
|
||||||
'(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36'
|
'(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36'
|
||||||
header = {'Accept-Encoding': 'gzip, deflate',
|
|
||||||
'Accept-Language': 'en-US,en;q=0.8',
|
|
||||||
'Connection': 'keep-alive',
|
|
||||||
'Content-Length': '0',
|
|
||||||
'Host': 'www.instagram.com',
|
|
||||||
'Origin': 'https://www.instagram.com',
|
|
||||||
'Referer': 'https://www.instagram.com/',
|
|
||||||
'User-Agent': user_agent,
|
|
||||||
'X-Instagram-AJAX': '1',
|
|
||||||
'X-Requested-With': 'XMLHttpRequest'}
|
|
||||||
if empty_session_only:
|
|
||||||
del header['Host']
|
|
||||||
del header['Origin']
|
|
||||||
del header['Referer']
|
|
||||||
del header['X-Instagram-AJAX']
|
|
||||||
del header['X-Requested-With']
|
|
||||||
return header
|
|
||||||
|
|
||||||
|
|
||||||
def get_anonymous_session() -> requests.Session:
|
|
||||||
"""Returns our default anonymous requests.Session object."""
|
|
||||||
session = requests.Session()
|
|
||||||
session.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1',
|
|
||||||
'ig_vw': '1920', 'csrftoken': '',
|
|
||||||
's_network': '', 'ds_user_id': ''})
|
|
||||||
session.headers.update(default_http_header(empty_session_only=True))
|
|
||||||
return session
|
|
||||||
|
|
||||||
|
|
||||||
def shortcode_to_mediaid(code: str) -> int:
|
def shortcode_to_mediaid(code: str) -> int:
|
||||||
@ -151,8 +123,10 @@ def mediaid_to_shortcode(mediaid: int) -> str:
|
|||||||
|
|
||||||
class Instaloader:
|
class Instaloader:
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
sleep: bool = True, quiet: bool = False, shorter_output: bool = False, profile_subdirs: bool = True):
|
sleep: bool = True, quiet: bool = False, shorter_output: bool = False, profile_subdirs: bool = True,
|
||||||
self.session = get_anonymous_session()
|
user_agent: Optional[str] = None):
|
||||||
|
self.user_agent = user_agent if user_agent is not None else default_user_agent()
|
||||||
|
self.session = self.get_anonymous_session()
|
||||||
self.username = None
|
self.username = None
|
||||||
self.sleep = sleep
|
self.sleep = sleep
|
||||||
self.quiet = quiet
|
self.quiet = quiet
|
||||||
@ -178,10 +152,39 @@ class Instaloader:
|
|||||||
if match is not None:
|
if match is not None:
|
||||||
return json.loads(match.group(0)[21:-2])
|
return json.loads(match.group(0)[21:-2])
|
||||||
|
|
||||||
|
def default_http_header(self, empty_session_only: bool = False) -> Dict[str, str]:
|
||||||
|
"""Returns default HTTP header we use for requests."""
|
||||||
|
header = {'Accept-Encoding': 'gzip, deflate',
|
||||||
|
'Accept-Language': 'en-US,en;q=0.8',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Content-Length': '0',
|
||||||
|
'Host': 'www.instagram.com',
|
||||||
|
'Origin': 'https://www.instagram.com',
|
||||||
|
'Referer': 'https://www.instagram.com/',
|
||||||
|
'User-Agent': self.user_agent,
|
||||||
|
'X-Instagram-AJAX': '1',
|
||||||
|
'X-Requested-With': 'XMLHttpRequest'}
|
||||||
|
if empty_session_only:
|
||||||
|
del header['Host']
|
||||||
|
del header['Origin']
|
||||||
|
del header['Referer']
|
||||||
|
del header['X-Instagram-AJAX']
|
||||||
|
del header['X-Requested-With']
|
||||||
|
return header
|
||||||
|
|
||||||
|
def get_anonymous_session(self) -> requests.Session:
|
||||||
|
"""Returns our default anonymous requests.Session object."""
|
||||||
|
session = requests.Session()
|
||||||
|
session.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1',
|
||||||
|
'ig_vw': '1920', 'csrftoken': '',
|
||||||
|
's_network': '', 'ds_user_id': ''})
|
||||||
|
session.headers.update(self.default_http_header(empty_session_only=True))
|
||||||
|
return session
|
||||||
|
|
||||||
def get_username_by_id(self, profile_id: int) -> str:
|
def get_username_by_id(self, profile_id: int) -> str:
|
||||||
"""To get the current username of a profile, given its unique ID, this function can be used."""
|
"""To get the current username of a profile, given its unique ID, this function can be used."""
|
||||||
tmpsession = copy_session(self.session)
|
tmpsession = copy_session(self.session)
|
||||||
tmpsession.headers.update(default_http_header())
|
tmpsession.headers.update(self.default_http_header())
|
||||||
del tmpsession.headers['Referer']
|
del tmpsession.headers['Referer']
|
||||||
del tmpsession.headers['X-Instagram-AJAX']
|
del tmpsession.headers['X-Instagram-AJAX']
|
||||||
resp = tmpsession.get('https://www.instagram.com/graphql/query/',
|
resp = tmpsession.get('https://www.instagram.com/graphql/query/',
|
||||||
@ -210,7 +213,7 @@ class Instaloader:
|
|||||||
def get_id_by_username(self, profile: str) -> int:
|
def get_id_by_username(self, profile: str) -> int:
|
||||||
"""Each Instagram profile has its own unique ID which stays unmodified even if a user changes
|
"""Each Instagram profile has its own unique ID which stays unmodified even if a user changes
|
||||||
his/her username. To get said ID, given the profile's name, you may call this function."""
|
his/her username. To get said ID, given the profile's name, you may call this function."""
|
||||||
data = self.get_json(profile, session=get_anonymous_session())
|
data = self.get_json(profile, session=self.get_anonymous_session())
|
||||||
if "ProfilePage" not in data["entry_data"]:
|
if "ProfilePage" not in data["entry_data"]:
|
||||||
raise ProfileNotExistsException("Profile {0} does not exist.".format(profile))
|
raise ProfileNotExistsException("Profile {0} does not exist.".format(profile))
|
||||||
return int(data['entry_data']['ProfilePage'][0]['user']['id'])
|
return int(data['entry_data']['ProfilePage'][0]['user']['id'])
|
||||||
@ -245,7 +248,7 @@ class Instaloader:
|
|||||||
"++%7D%0A"
|
"++%7D%0A"
|
||||||
"%7D%0A"
|
"%7D%0A"
|
||||||
"&ref=relationships%3A%3Afollow_list"]
|
"&ref=relationships%3A%3Afollow_list"]
|
||||||
tmpsession.headers.update(default_http_header())
|
tmpsession.headers.update(self.default_http_header())
|
||||||
tmpsession.headers.update({'Referer': 'https://www.instagram.com/' + profile + '/following/'})
|
tmpsession.headers.update({'Referer': 'https://www.instagram.com/' + profile + '/following/'})
|
||||||
tmpsession.headers.update({'Content-Type': 'application/x-www-form-urlencoded'})
|
tmpsession.headers.update({'Content-Type': 'application/x-www-form-urlencoded'})
|
||||||
resp = tmpsession.post('https://www.instagram.com/query/', data=query[0] + "first(" + query[1])
|
resp = tmpsession.post('https://www.instagram.com/query/', data=query[0] + "first(" + query[1])
|
||||||
@ -288,7 +291,7 @@ class Instaloader:
|
|||||||
if os.path.isfile(filename):
|
if os.path.isfile(filename):
|
||||||
self._log(outputlabel + ' exists', end=' ', flush=True)
|
self._log(outputlabel + ' exists', end=' ', flush=True)
|
||||||
return False
|
return False
|
||||||
resp = get_anonymous_session().get(url, stream=True)
|
resp = self.get_anonymous_session().get(url, stream=True)
|
||||||
if resp.status_code == 200:
|
if resp.status_code == 200:
|
||||||
self._log(outputlabel, end=' ', flush=True)
|
self._log(outputlabel, end=' ', flush=True)
|
||||||
if self.profile_subdirs:
|
if self.profile_subdirs:
|
||||||
@ -381,7 +384,7 @@ class Instaloader:
|
|||||||
index = len(match.group(0)) - 1
|
index = len(match.group(0)) - 1
|
||||||
offset = 8 if match.group(0)[-1:] == 's' else 0
|
offset = 8 if match.group(0)[-1:] == 's' else 0
|
||||||
url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index + offset:]
|
url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index + offset:]
|
||||||
resp = get_anonymous_session().get(url, stream=True)
|
resp = self.get_anonymous_session().get(url, stream=True)
|
||||||
if resp.status_code == 200:
|
if resp.status_code == 200:
|
||||||
self._log(filename)
|
self._log(filename)
|
||||||
if self.profile_subdirs:
|
if self.profile_subdirs:
|
||||||
@ -418,7 +421,7 @@ class Instaloader:
|
|||||||
with open(filename, 'rb') as sessionfile:
|
with open(filename, 'rb') as sessionfile:
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
session.cookies = requests.utils.cookiejar_from_dict(pickle.load(sessionfile))
|
session.cookies = requests.utils.cookiejar_from_dict(pickle.load(sessionfile))
|
||||||
session.headers.update(default_http_header())
|
session.headers.update(self.default_http_header())
|
||||||
session.headers.update({'X-CSRFToken': session.cookies.get_dict()['csrftoken']})
|
session.headers.update({'X-CSRFToken': session.cookies.get_dict()['csrftoken']})
|
||||||
self._log("Loaded session from %s." % filename)
|
self._log("Loaded session from %s." % filename)
|
||||||
self.session = session
|
self.session = session
|
||||||
@ -440,7 +443,7 @@ class Instaloader:
|
|||||||
session.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1',
|
session.cookies.update({'sessionid': '', 'mid': '', 'ig_pr': '1',
|
||||||
'ig_vw': '1920', 'csrftoken': '',
|
'ig_vw': '1920', 'csrftoken': '',
|
||||||
's_network': '', 'ds_user_id': ''})
|
's_network': '', 'ds_user_id': ''})
|
||||||
session.headers.update(default_http_header())
|
session.headers.update(self.default_http_header())
|
||||||
resp = session.get('https://www.instagram.com/')
|
resp = session.get('https://www.instagram.com/')
|
||||||
session.headers.update({'X-CSRFToken': resp.cookies['csrftoken']})
|
session.headers.update({'X-CSRFToken': resp.cookies['csrftoken']})
|
||||||
time.sleep(9 * random.random() + 3)
|
time.sleep(9 * random.random() + 3)
|
||||||
@ -491,7 +494,7 @@ class Instaloader:
|
|||||||
"%7D%0A++++++++%7D%2C%0A++++++++video_url%2C%0A++++++++" + \
|
"%7D%0A++++++++%7D%2C%0A++++++++video_url%2C%0A++++++++" + \
|
||||||
"video_views%0A++++++%7D%2C%0A++++++page_info%0A++++%7D%0A++%7D%2C%0A++id%2C%0A++" + \
|
"video_views%0A++++++%7D%2C%0A++++++page_info%0A++++%7D%0A++%7D%2C%0A++id%2C%0A++" + \
|
||||||
"profile_pic_url%2C%0A++username%0A%7D%0A&ref=feed::show"
|
"profile_pic_url%2C%0A++username%0A%7D%0A&ref=feed::show"
|
||||||
tmpsession.headers.update(default_http_header())
|
tmpsession.headers.update(self.default_http_header())
|
||||||
tmpsession.headers.update({'Referer': 'https://www.instagram.com/'})
|
tmpsession.headers.update({'Referer': 'https://www.instagram.com/'})
|
||||||
tmpsession.headers.update({'Content-Type': 'application/x-www-form-urlencoded'})
|
tmpsession.headers.update({'Content-Type': 'application/x-www-form-urlencoded'})
|
||||||
resp = tmpsession.post('https://www.instagram.com/query/', data=query)
|
resp = tmpsession.post('https://www.instagram.com/query/', data=query)
|
||||||
@ -908,6 +911,8 @@ def main():
|
|||||||
'file\'s path or filename (if --no-profile-subdir). Without this option, the #hashtag is '
|
'file\'s path or filename (if --no-profile-subdir). Without this option, the #hashtag is '
|
||||||
'used instead. This requires an additional request to the Instagram server for each '
|
'used instead. This requires an additional request to the Instagram server for each '
|
||||||
'picture, which is why it is disabled by default.')
|
'picture, which is why it is disabled by default.')
|
||||||
|
parser.add_argument('--user-agent',
|
||||||
|
help='User Agent to use for HTTP requests. Defaults to \'{}\'.'.format(default_user_agent()))
|
||||||
parser.add_argument('-S', '--no-sleep', action='store_true',
|
parser.add_argument('-S', '--no-sleep', action='store_true',
|
||||||
help='Do not sleep between actual downloads of pictures')
|
help='Do not sleep between actual downloads of pictures')
|
||||||
parser.add_argument('-O', '--shorter-output', action='store_true',
|
parser.add_argument('-O', '--shorter-output', action='store_true',
|
||||||
@ -917,7 +922,8 @@ def main():
|
|||||||
'if login credentials are needed but not given.')
|
'if login credentials are needed but not given.')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
try:
|
try:
|
||||||
loader = Instaloader(not args.no_sleep, args.quiet, args.shorter_output, not args.no_profile_subdir)
|
loader = Instaloader(sleep = not args.no_sleep, quiet = args.quiet, shorter_output = args.shorter_output,
|
||||||
|
profile_subdirs = not args.no_profile_subdir, user_agent = args.user_agent)
|
||||||
loader.download_profiles(args.profile, args.login, args.password, args.sessionfile,
|
loader.download_profiles(args.profile, args.login, args.password, args.sessionfile,
|
||||||
int(args.count) if args.count is not None else None,
|
int(args.count) if args.count is not None else None,
|
||||||
args.profile_pic_only, not args.skip_videos, args.geotags, args.fast_update,
|
args.profile_pic_only, not args.skip_videos, args.geotags, args.fast_update,
|
||||||
|
Loading…
Reference in New Issue
Block a user