Initial commit

2024-07-07 03:40:06 +02:00 · 2016-06-15 12:42:08 +02:00 · 2016-06-15 12:42:08 +02:00 · 0df54143e5
commit 0df54143e5
1 changed files with 136 additions and 0 deletions
--- a/instagram.py
+++ b/instagram.py
@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+
+import requests, re, json, datetime, shutil, os
+
+def get_json(name, id = 0):
+    r = requests.get('http://www.instagram.com/'+name, \
+        params={'max_id': id})
+    m = re.search('window\._sharedData = .*<', r.text)
+    if m is None:
+        return None
+    else:
+        return json.loads(m.group(0)[21:-2])
+
+def get_last_id(data):
+    if len(data["entry_data"]) == 0 or \
+        len(data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]) == 0:
+            return None
+    else:
+        data = data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]
+        return int(data[len(data)-1]["id"])
+
+def epochToString(epoch):
+    return datetime.datetime.fromtimestamp(epoch).strftime('%Y-%m-%d_%H-%M-%S')
+
+def get_fileExtension(url):
+    m = re.search('\.[a-z]*\?', url)
+    if m is None:
+        return url[-3:]
+    else:
+        return m.group(0)[1:-1]
+
+def download_pic(name, url, date_epoch, outputlabel=None):
+    if outputlabel is None:
+        outputlabel = epochToString(date_epoch)
+    filename = name.lower() + '/' + epochToString(date_epoch) + '.' + get_fileExtension(url)
+    if os.path.isfile(filename):
+        print(outputlabel + ' exists', end='  ', flush=True)
+        return None
+    r = requests.get(url, stream=True)
+    if r.status_code == 200:
+        print(outputlabel, end='  ', flush=True)
+        os.makedirs(name.lower(), exist_ok=True)
+        with open(filename, 'wb') as f:
+            r.raw.decode_content = True
+            shutil.copyfileobj(r.raw, f)
+        os.utime(filename, (datetime.datetime.now().timestamp(), date_epoch))
+    else:
+        print("ERROR: file \'" + url + "\' could not be downloaded")
+
+def saveCaption(name, date_epoch, caption):
+    filename = name.lower() + '/' + epochToString(date_epoch) + '.txt'
+    if os.path.isfile(filename):
+        with open(filename, 'r') as f:
+            fileCaption = f.read()
+        if fileCaption == caption:
+            print('txt unchanged', end=' ', flush=True)
+            return None
+        else:
+            def get_filename(index):
+                return filename if index==0 else (filename[:-4] + '_old_' + \
+                        (str(0) if index<10 else str()) + str(index) + filename[-4:])
+            i = 0
+            while os.path.isfile(get_filename(i)):
+                i = i + 1
+            for index in range(i, 0, -1):
+                os.rename(get_filename(index-1), get_filename(index));
+            print('txt updated', end=' ', flush=True)
+    print('txt', end=' ', flush=True)
+    os.makedirs(name.lower(), exist_ok=True)
+    with open(filename, 'w') as text_file:
+        text_file.write(caption)
+    os.utime(filename, (datetime.datetime.now().timestamp(), date_epoch))
+
+def download_profilepic(name, url):
+    date_object = datetime.datetime.strptime(requests.head(url).headers["Last-Modified"], \
+        '%a, %d %b %Y %H:%M:%S GMT')
+    filename = name.lower() + '/' + epochToString(date_object.timestamp()) + \
+        '_UTC_profile_pic.' + url[-3:]
+    if os.path.isfile(filename):
+        print(filename + ' already exists')
+        return None
+    m = re.search('http.*://.*instagram\.com/[^/]+/.', url)
+    index = len(m.group(0))-1
+    offset = 8 if m.group(0)[-1:] == 's' else 0
+    url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index+offset:]
+    r = requests.get(url, stream=True)
+    if r.status_code == 200:
+        print(filename)
+        os.makedirs(name.lower(), exist_ok=True)
+        with open(filename, 'wb') as f:
+            r.raw.decode_content = True
+            shutil.copyfileobj(r.raw, f)
+        os.utime(filename, (datetime.datetime.now().timestamp(), date_object.timestamp()))
+    else:
+        print("ERROR: file \'" + url + "\' could not be downloaded")
+
+def download(name, ProfilePicOnly = False, DownloadVideos = True):
+    data = get_json(name)
+    totalcount = data["entry_data"]["ProfilePage"][0]["user"]["media"]["count"]
+    if len(data["entry_data"]) == 0:
+        print("ERROR: user does not exist")
+        return None
+    else:
+        download_profilepic(name, data["entry_data"]["ProfilePage"][0]["user"]["profile_pic_url"])
+        if len(data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]) == 0 \
+                and not ProfilePicOnly:
+            print("ERROR: no pics found")
+            return None
+    if not ProfilePicOnly:
+        count = 1
+        while not get_last_id(data) is None:
+            for node in data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]:
+                print("[%3i/%3i] " % (count, totalcount), end="", flush=True)
+                count = count + 1
+                download_pic(name, node["display_src"], node["date"])
+                if "caption" in node:
+                    saveCaption(name, node["date"], node["caption"])
+                if node["is_video"] and DownloadVideos:
+                    video_data = get_json('p/' + node["code"])
+                    download_pic(name, \
+                            video_data['entry_data']['PostPage'][0]['media']['video_url'], \
+                            node["date"], 'mp4')
+                print()
+            data = get_json(name, get_last_id(data))
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+    parser = ArgumentParser(description='Simple downloader to fetch all Instagram pics and '\
+                                        'captions from a given public profile')
+    parser.add_argument('name', help='Name of profile to download')
+    parser.add_argument('-P', '--profile-pic-only', action='store_true',
+            help='Only download profile picture')
+    parser.add_argument('-V', '--skip-videos', action='store_true',
+            help='Do not download videos')
+    args = parser.parse_args()
+    download(args.name, args.profile_pic_only, not args.skip_videos)