mirror of
https://github.com/instaloader/instaloader.git
synced 2024-09-11 16:22:24 +02:00
refactor get_session()
This commit is contained in:
parent
bd0505098e
commit
115f1c9133
98
instagram.py
98
instagram.py
@ -28,9 +28,7 @@ def log(*msg, sep='', end='\n', flush=False, quiet=False):
|
|||||||
if not quiet:
|
if not quiet:
|
||||||
print(*msg, sep=sep, end=end, flush=flush)
|
print(*msg, sep=sep, end=end, flush=flush)
|
||||||
|
|
||||||
def get_json(name, max_id = 0, session=None, sleep_min_max=[1,5]):
|
def get_json(name, session, max_id=0, sleep_min_max=[1,5]):
|
||||||
if session is None:
|
|
||||||
session = get_session(None, None, True)
|
|
||||||
resp = session.get('http://www.instagram.com/'+name, \
|
resp = session.get('http://www.instagram.com/'+name, \
|
||||||
params={'max_id': max_id})
|
params={'max_id': max_id})
|
||||||
time.sleep(abs(sleep_min_max[1]-sleep_min_max[0])*random.random()+abs(sleep_min_max[0]))
|
time.sleep(abs(sleep_min_max[1]-sleep_min_max[0])*random.random()+abs(sleep_min_max[0]))
|
||||||
@ -66,7 +64,7 @@ def download_pic(name, url, date_epoch, outputlabel=None, quiet=False):
|
|||||||
if os.path.isfile(filename):
|
if os.path.isfile(filename):
|
||||||
log(outputlabel + ' exists', end=' ', flush=True, quiet=quiet)
|
log(outputlabel + ' exists', end=' ', flush=True, quiet=quiet)
|
||||||
return False
|
return False
|
||||||
resp = get_session(None, None, True).get(url, stream=True)
|
resp = get_anonymous_session().get(url, stream=True)
|
||||||
if resp.status_code == 200:
|
if resp.status_code == 200:
|
||||||
log(outputlabel, end=' ', flush=True, quiet=quiet)
|
log(outputlabel, end=' ', flush=True, quiet=quiet)
|
||||||
os.makedirs(name.lower(), exist_ok=True)
|
os.makedirs(name.lower(), exist_ok=True)
|
||||||
@ -116,7 +114,7 @@ def download_profilepic(name, url, quiet=False):
|
|||||||
index = len(match.group(0))-1
|
index = len(match.group(0))-1
|
||||||
offset = 8 if match.group(0)[-1:] == 's' else 0
|
offset = 8 if match.group(0)[-1:] == 's' else 0
|
||||||
url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index+offset:]
|
url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index+offset:]
|
||||||
resp = get_session(None, None, True).get(url, stream=True)
|
resp = get_anonymous_session().get(url, stream=True)
|
||||||
if resp.status_code == 200:
|
if resp.status_code == 200:
|
||||||
log(filename, quiet=quiet)
|
log(filename, quiet=quiet)
|
||||||
os.makedirs(name.lower(), exist_ok=True)
|
os.makedirs(name.lower(), exist_ok=True)
|
||||||
@ -140,21 +138,9 @@ def load_session(filename, quiet=False):
|
|||||||
filename = DEFAULTSESSIONFILE
|
filename = DEFAULTSESSIONFILE
|
||||||
if os.path.isfile(filename):
|
if os.path.isfile(filename):
|
||||||
with open(filename, 'rb') as sessionfile:
|
with open(filename, 'rb') as sessionfile:
|
||||||
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
|
|
||||||
'(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36'
|
|
||||||
header = { 'Accept-Encoding' : 'gzip, deflate', \
|
|
||||||
'Accept-Language' : 'en-US,en;q=0.8', \
|
|
||||||
'Connection' : 'keep-alive', \
|
|
||||||
'Content-Length' : '0', \
|
|
||||||
'Host' : 'www.instagram.com', \
|
|
||||||
'Origin' : 'https://www.instagram.com', \
|
|
||||||
'Referer' : 'https://www.instagram.com/', \
|
|
||||||
'User-Agent' : user_agent, \
|
|
||||||
'X-Instagram-AJAX' : '1', \
|
|
||||||
'X-Requested-With' : 'XMLHttpRequest'}
|
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
session.cookies = requests.utils.cookiejar_from_dict(pickle.load(sessionfile))
|
session.cookies = requests.utils.cookiejar_from_dict(pickle.load(sessionfile))
|
||||||
session.headers.update(header)
|
session.headers.update(default_http_header())
|
||||||
log("Loaded session from %s." % filename, quiet=quiet)
|
log("Loaded session from %s." % filename, quiet=quiet)
|
||||||
return session
|
return session
|
||||||
|
|
||||||
@ -165,35 +151,42 @@ def test_login(user, session):
|
|||||||
time.sleep(4 * random.random() + 1)
|
time.sleep(4 * random.random() + 1)
|
||||||
return resp.text.find(user.lower()) != -1
|
return resp.text.find(user.lower()) != -1
|
||||||
|
|
||||||
def get_session(user, passwd, empty_session_only=False, session=None):
|
def default_http_header(empty_session_only=False):
|
||||||
def instaheader():
|
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
|
||||||
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
|
'(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36'
|
||||||
'(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36'
|
header = { 'Accept-Encoding' : 'gzip, deflate', \
|
||||||
header = { 'Accept-Encoding' : 'gzip, deflate', \
|
'Accept-Language' : 'en-US,en;q=0.8', \
|
||||||
'Accept-Language' : 'en-US,en;q=0.8', \
|
'Connection' : 'keep-alive', \
|
||||||
'Connection' : 'keep-alive', \
|
'Content-Length' : '0', \
|
||||||
'Content-Length' : '0', \
|
'Host' : 'www.instagram.com', \
|
||||||
'Host' : 'www.instagram.com', \
|
'Origin' : 'https://www.instagram.com', \
|
||||||
'Origin' : 'https://www.instagram.com', \
|
'Referer' : 'https://www.instagram.com/', \
|
||||||
'Referer' : 'https://www.instagram.com/', \
|
'User-Agent' : user_agent, \
|
||||||
'User-Agent' : user_agent, \
|
'X-Instagram-AJAX' : '1', \
|
||||||
'X-Instagram-AJAX' : '1', \
|
'X-Requested-With' : 'XMLHttpRequest'}
|
||||||
'X-Requested-With' : 'XMLHttpRequest'}
|
|
||||||
if empty_session_only:
|
|
||||||
del header['Host']
|
|
||||||
del header['Origin']
|
|
||||||
del header['Referer']
|
|
||||||
del header['X-Instagram-AJAX']
|
|
||||||
del header['X-Requested-With']
|
|
||||||
return header
|
|
||||||
if session is None:
|
|
||||||
session = requests.Session()
|
|
||||||
session.cookies.update({'sessionid' : '', 'mid' : '', 'ig_pr' : '1', \
|
|
||||||
'ig_vw' : '1920', 'csrftoken' : '', \
|
|
||||||
's_network' : '', 'ds_user_id' : ''})
|
|
||||||
session.headers.update(instaheader())
|
|
||||||
if empty_session_only:
|
if empty_session_only:
|
||||||
return session
|
del header['Host']
|
||||||
|
del header['Origin']
|
||||||
|
del header['Referer']
|
||||||
|
del header['X-Instagram-AJAX']
|
||||||
|
del header['X-Requested-With']
|
||||||
|
return header
|
||||||
|
|
||||||
|
def get_anonymous_session():
|
||||||
|
session = requests.Session()
|
||||||
|
session.cookies.update({'sessionid' : '', 'mid' : '', 'ig_pr' : '1', \
|
||||||
|
'ig_vw' : '1920', 'csrftoken' : '', \
|
||||||
|
's_network' : '', 'ds_user_id' : ''})
|
||||||
|
session.headers.update(default_http_header(empty_session_only=True))
|
||||||
|
return session
|
||||||
|
|
||||||
|
def get_session(user, passwd):
|
||||||
|
"""Log in to instagram with given username and password and return session object"""
|
||||||
|
session = requests.Session()
|
||||||
|
session.cookies.update({'sessionid' : '', 'mid' : '', 'ig_pr' : '1', \
|
||||||
|
'ig_vw' : '1920', 'csrftoken' : '', \
|
||||||
|
's_network' : '', 'ds_user_id' : ''})
|
||||||
|
session.headers.update(default_http_header())
|
||||||
resp = session.get('https://www.instagram.com/')
|
resp = session.get('https://www.instagram.com/')
|
||||||
session.headers.update({'X-CSRFToken':resp.cookies['csrftoken']})
|
session.headers.update({'X-CSRFToken':resp.cookies['csrftoken']})
|
||||||
time.sleep(9 * random.random() + 3)
|
time.sleep(9 * random.random() + 3)
|
||||||
@ -209,12 +202,12 @@ def get_session(user, passwd, empty_session_only=False, session=None):
|
|||||||
else:
|
else:
|
||||||
raise LoginException('Login error! Connection error!')
|
raise LoginException('Login error! Connection error!')
|
||||||
|
|
||||||
def download(name, session=None, profile_pic_only=False, download_videos=True,
|
def download(name, session, profile_pic_only=False, download_videos=True,
|
||||||
fast_update=False, sleep_min_max=[0.25,2], quiet=False):
|
fast_update=False, sleep_min_max=[0.25,2], quiet=False):
|
||||||
"""Download one profile"""
|
"""Download one profile"""
|
||||||
# pylint:disable=too-many-arguments
|
# pylint:disable=too-many-arguments
|
||||||
# Get profile main page json
|
# Get profile main page json
|
||||||
data = get_json(name, session=session)
|
data = get_json(name, session)
|
||||||
if len(data["entry_data"]) == 0 or "ProfilePage" not in data["entry_data"]:
|
if len(data["entry_data"]) == 0 or "ProfilePage" not in data["entry_data"]:
|
||||||
raise ProfileNotExistsException("user %s does not exist" % name)
|
raise ProfileNotExistsException("user %s does not exist" % name)
|
||||||
# Download profile picture
|
# Download profile picture
|
||||||
@ -246,18 +239,18 @@ def download(name, session=None, profile_pic_only=False, download_videos=True,
|
|||||||
if "caption" in node:
|
if "caption" in node:
|
||||||
save_caption(name, node["date"], node["caption"], quiet=quiet)
|
save_caption(name, node["date"], node["caption"], quiet=quiet)
|
||||||
if node["is_video"] and download_videos:
|
if node["is_video"] and download_videos:
|
||||||
video_data = get_json('p/' + node["code"], session=session)
|
video_data = get_json('p/' + node["code"], session)
|
||||||
download_pic(name, \
|
download_pic(name, \
|
||||||
video_data['entry_data']['PostPage'][0]['media']['video_url'], \
|
video_data['entry_data']['PostPage'][0]['media']['video_url'], \
|
||||||
node["date"], 'mp4', quiet=quiet)
|
node["date"], 'mp4', quiet=quiet)
|
||||||
log(quiet=quiet)
|
log(quiet=quiet)
|
||||||
if fast_update and not downloaded:
|
if fast_update and not downloaded:
|
||||||
return
|
return
|
||||||
data = get_json(name, get_last_id(data), session)
|
data = get_json(name, session, max_id=get_last_id(data))
|
||||||
time.sleep(abs(sleep_min_max[1]-sleep_min_max[0])*random.random()+abs(sleep_min_max[0]))
|
time.sleep(abs(sleep_min_max[1]-sleep_min_max[0])*random.random()+abs(sleep_min_max[0]))
|
||||||
|
|
||||||
def get_logged_in_session(username, password=None, quiet=False):
|
def get_logged_in_session(username, password=None, quiet=False):
|
||||||
"""Logs in and returns session"""
|
"""Logs in and returns session, asking user for password if needed"""
|
||||||
if password is not None:
|
if password is not None:
|
||||||
return get_session(username, password)
|
return get_session(username, password)
|
||||||
if quiet:
|
if quiet:
|
||||||
@ -277,12 +270,13 @@ def download_profiles(targets, username=None, password=None, sessionfile=None,
|
|||||||
"""Download set of profiles and handle sessions"""
|
"""Download set of profiles and handle sessions"""
|
||||||
# pylint:disable=too-many-arguments
|
# pylint:disable=too-many-arguments
|
||||||
# Login, if desired
|
# Login, if desired
|
||||||
session = None
|
|
||||||
if username is not None:
|
if username is not None:
|
||||||
session = load_session(sessionfile, quiet=quiet)
|
session = load_session(sessionfile, quiet=quiet)
|
||||||
if not test_login(username, session):
|
if not test_login(username, session):
|
||||||
session = get_logged_in_session(username, password, quiet)
|
session = get_logged_in_session(username, password, quiet)
|
||||||
log("Logged in as %s." % username, quiet=quiet)
|
log("Logged in as %s." % username, quiet=quiet)
|
||||||
|
else:
|
||||||
|
session = get_anonymous_session()
|
||||||
# Iterate through targets list and download them
|
# Iterate through targets list and download them
|
||||||
failedtargets = []
|
failedtargets = []
|
||||||
for target in targets:
|
for target in targets:
|
||||||
|
Loading…
Reference in New Issue
Block a user