1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-11-04 09:22:29 +01:00
instaloader/instagram.py
2016-06-17 21:40:55 +02:00

144 lines
6.1 KiB
Python
Executable File

#!/usr/bin/env python3
import requests, re, json, datetime, shutil, os
class DownloaderException(Exception):
pass
def get_json(name, id = 0):
r = requests.get('http://www.instagram.com/'+name, \
params={'max_id': id})
m = re.search('window\._sharedData = .*<', r.text)
if m is None:
return None
else:
return json.loads(m.group(0)[21:-2])
def get_last_id(data):
if len(data["entry_data"]) == 0 or \
len(data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]) == 0:
return None
else:
data = data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]
return int(data[len(data)-1]["id"])
def epochToString(epoch):
return datetime.datetime.fromtimestamp(epoch).strftime('%Y-%m-%d_%H-%M-%S')
def get_fileExtension(url):
m = re.search('\.[a-z]*\?', url)
if m is None:
return url[-3:]
else:
return m.group(0)[1:-1]
def download_pic(name, url, date_epoch, outputlabel=None):
# Returns true, if file was actually downloaded, i.e. updated
if outputlabel is None:
outputlabel = epochToString(date_epoch)
filename = name.lower() + '/' + epochToString(date_epoch) + '.' + get_fileExtension(url)
if os.path.isfile(filename):
print(outputlabel + ' exists', end=' ', flush=True)
return False
r = requests.get(url, stream=True)
if r.status_code == 200:
print(outputlabel, end=' ', flush=True)
os.makedirs(name.lower(), exist_ok=True)
with open(filename, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
os.utime(filename, (datetime.datetime.now().timestamp(), date_epoch))
return True
else:
raise DownloaderException("file \'" + url + "\' could not be downloaded")
def saveCaption(name, date_epoch, caption):
filename = name.lower() + '/' + epochToString(date_epoch) + '.txt'
if os.path.isfile(filename):
with open(filename, 'r') as f:
fileCaption = f.read()
if fileCaption == caption:
print('txt unchanged', end=' ', flush=True)
return None
else:
def get_filename(index):
return filename if index==0 else (filename[:-4] + '_old_' + \
(str(0) if index<10 else str()) + str(index) + filename[-4:])
i = 0
while os.path.isfile(get_filename(i)):
i = i + 1
for index in range(i, 0, -1):
os.rename(get_filename(index-1), get_filename(index));
print('txt updated', end=' ', flush=True)
print('txt', end=' ', flush=True)
os.makedirs(name.lower(), exist_ok=True)
with open(filename, 'w') as text_file:
text_file.write(caption)
os.utime(filename, (datetime.datetime.now().timestamp(), date_epoch))
def download_profilepic(name, url):
date_object = datetime.datetime.strptime(requests.head(url).headers["Last-Modified"], \
'%a, %d %b %Y %H:%M:%S GMT')
filename = name.lower() + '/' + epochToString(date_object.timestamp()) + \
'_UTC_profile_pic.' + url[-3:]
if os.path.isfile(filename):
print(filename + ' already exists')
return None
m = re.search('http.*://.*instagram\.com/[^/]+/.', url)
index = len(m.group(0))-1
offset = 8 if m.group(0)[-1:] == 's' else 0
url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index+offset:]
r = requests.get(url, stream=True)
if r.status_code == 200:
print(filename)
os.makedirs(name.lower(), exist_ok=True)
with open(filename, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
os.utime(filename, (datetime.datetime.now().timestamp(), date_object.timestamp()))
else:
raise DownloaderException("file \'" + url + "\' could not be downloaded")
def download(name, ProfilePicOnly = False, DownloadVideos = True, FastUpdate = False):
data = get_json(name)
totalcount = data["entry_data"]["ProfilePage"][0]["user"]["media"]["count"]
if len(data["entry_data"]) == 0:
raise DownloaderException("user does not exist")
else:
download_profilepic(name, data["entry_data"]["ProfilePage"][0]["user"]["profile_pic_url"])
if len(data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]) == 0 \
and not ProfilePicOnly:
raise DownloaderException("no pics found")
if not ProfilePicOnly:
count = 1
while not get_last_id(data) is None:
for node in data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]:
print("[%3i/%3i] " % (count, totalcount), end="", flush=True)
count = count + 1
downloaded = download_pic(name, node["display_src"], node["date"])
if "caption" in node:
saveCaption(name, node["date"], node["caption"])
if node["is_video"] and DownloadVideos:
video_data = get_json('p/' + node["code"])
download_pic(name, \
video_data['entry_data']['PostPage'][0]['media']['video_url'], \
node["date"], 'mp4')
print()
if FastUpdate and not downloaded:
return
data = get_json(name, get_last_id(data))
if __name__ == "__main__":
from argparse import ArgumentParser
parser = ArgumentParser(description='Simple downloader to fetch all Instagram pics and '\
'captions from a given public profile')
parser.add_argument('name', help='Name of profile to download')
parser.add_argument('-P', '--profile-pic-only', action='store_true',
help='Only download profile picture')
parser.add_argument('-V', '--skip-videos', action='store_true',
help='Do not download videos')
parser.add_argument('-F', '--fast-update', action='store_true',
help='Abort at encounter of first already-downloaded picture')
args = parser.parse_args()
download(args.name, args.profile_pic_only, not args.skip_videos, args.fast_update)