From 0df54143e5491862878be3ea7c077202675fa817 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=CC=81=20Koch-Kramer?= Date: Wed, 15 Jun 2016 12:42:08 +0200 Subject: [PATCH] Initial commit --- instagram.py | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 instagram.py diff --git a/instagram.py b/instagram.py new file mode 100644 index 0000000..f85d6e5 --- /dev/null +++ b/instagram.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 + +import requests, re, json, datetime, shutil, os + +def get_json(name, id = 0): + r = requests.get('http://www.instagram.com/'+name, \ + params={'max_id': id}) + m = re.search('window\._sharedData = .*<', r.text) + if m is None: + return None + else: + return json.loads(m.group(0)[21:-2]) + +def get_last_id(data): + if len(data["entry_data"]) == 0 or \ + len(data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]) == 0: + return None + else: + data = data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"] + return int(data[len(data)-1]["id"]) + +def epochToString(epoch): + return datetime.datetime.fromtimestamp(epoch).strftime('%Y-%m-%d_%H-%M-%S') + +def get_fileExtension(url): + m = re.search('\.[a-z]*\?', url) + if m is None: + return url[-3:] + else: + return m.group(0)[1:-1] + +def download_pic(name, url, date_epoch, outputlabel=None): + if outputlabel is None: + outputlabel = epochToString(date_epoch) + filename = name.lower() + '/' + epochToString(date_epoch) + '.' + get_fileExtension(url) + if os.path.isfile(filename): + print(outputlabel + ' exists', end=' ', flush=True) + return None + r = requests.get(url, stream=True) + if r.status_code == 200: + print(outputlabel, end=' ', flush=True) + os.makedirs(name.lower(), exist_ok=True) + with open(filename, 'wb') as f: + r.raw.decode_content = True + shutil.copyfileobj(r.raw, f) + os.utime(filename, (datetime.datetime.now().timestamp(), date_epoch)) + else: + print("ERROR: file \'" + url + "\' could not be downloaded") + +def saveCaption(name, date_epoch, caption): + filename = name.lower() + '/' + epochToString(date_epoch) + '.txt' + if os.path.isfile(filename): + with open(filename, 'r') as f: + fileCaption = f.read() + if fileCaption == caption: + print('txt unchanged', end=' ', flush=True) + return None + else: + def get_filename(index): + return filename if index==0 else (filename[:-4] + '_old_' + \ + (str(0) if index<10 else str()) + str(index) + filename[-4:]) + i = 0 + while os.path.isfile(get_filename(i)): + i = i + 1 + for index in range(i, 0, -1): + os.rename(get_filename(index-1), get_filename(index)); + print('txt updated', end=' ', flush=True) + print('txt', end=' ', flush=True) + os.makedirs(name.lower(), exist_ok=True) + with open(filename, 'w') as text_file: + text_file.write(caption) + os.utime(filename, (datetime.datetime.now().timestamp(), date_epoch)) + +def download_profilepic(name, url): + date_object = datetime.datetime.strptime(requests.head(url).headers["Last-Modified"], \ + '%a, %d %b %Y %H:%M:%S GMT') + filename = name.lower() + '/' + epochToString(date_object.timestamp()) + \ + '_UTC_profile_pic.' + url[-3:] + if os.path.isfile(filename): + print(filename + ' already exists') + return None + m = re.search('http.*://.*instagram\.com/[^/]+/.', url) + index = len(m.group(0))-1 + offset = 8 if m.group(0)[-1:] == 's' else 0 + url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index+offset:] + r = requests.get(url, stream=True) + if r.status_code == 200: + print(filename) + os.makedirs(name.lower(), exist_ok=True) + with open(filename, 'wb') as f: + r.raw.decode_content = True + shutil.copyfileobj(r.raw, f) + os.utime(filename, (datetime.datetime.now().timestamp(), date_object.timestamp())) + else: + print("ERROR: file \'" + url + "\' could not be downloaded") + +def download(name, ProfilePicOnly = False, DownloadVideos = True): + data = get_json(name) + totalcount = data["entry_data"]["ProfilePage"][0]["user"]["media"]["count"] + if len(data["entry_data"]) == 0: + print("ERROR: user does not exist") + return None + else: + download_profilepic(name, data["entry_data"]["ProfilePage"][0]["user"]["profile_pic_url"]) + if len(data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]) == 0 \ + and not ProfilePicOnly: + print("ERROR: no pics found") + return None + if not ProfilePicOnly: + count = 1 + while not get_last_id(data) is None: + for node in data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]: + print("[%3i/%3i] " % (count, totalcount), end="", flush=True) + count = count + 1 + download_pic(name, node["display_src"], node["date"]) + if "caption" in node: + saveCaption(name, node["date"], node["caption"]) + if node["is_video"] and DownloadVideos: + video_data = get_json('p/' + node["code"]) + download_pic(name, \ + video_data['entry_data']['PostPage'][0]['media']['video_url'], \ + node["date"], 'mp4') + print() + data = get_json(name, get_last_id(data)) + +if __name__ == "__main__": + from argparse import ArgumentParser + parser = ArgumentParser(description='Simple downloader to fetch all Instagram pics and '\ + 'captions from a given public profile') + parser.add_argument('name', help='Name of profile to download') + parser.add_argument('-P', '--profile-pic-only', action='store_true', + help='Only download profile picture') + parser.add_argument('-V', '--skip-videos', action='store_true', + help='Do not download videos') + args = parser.parse_args() + download(args.name, args.profile_pic_only, not args.skip_videos)