From 8dcf65c92ec899a34cf57a02809520698f1d7b66 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 31 Oct 2021 05:08:04 +0000 Subject: [PATCH] [Instagram] Add login to playlist (#1488) Authored by: u-spec-png --- yt_dlp/extractor/instagram.py | 108 ++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 52 deletions(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 8c935c251..6ed20d9c6 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import itertools @@ -25,9 +26,55 @@ ) -class InstagramIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P[^/?#&]+))' +class InstagramBaseIE(InfoExtractor): _NETRC_MACHINE = 'instagram' + _IS_LOGGED_IN = False + + def _login(self): + username, password = self._get_login_info() + if username is None or self._IS_LOGGED_IN: + return + + login_webpage = self._download_webpage( + 'https://www.instagram.com/accounts/login/', None, + note='Downloading login webpage', errnote='Failed to download login webpage') + + shared_data = self._parse_json( + self._search_regex( + r'window\._sharedData\s*=\s*({.+?});', + login_webpage, 'shared data', default='{}'), + None) + + login = self._download_json('https://www.instagram.com/accounts/login/ajax/', None, note='Logging in', headers={ + 'Accept': '*/*', + 'X-IG-App-ID': '936619743392459', + 'X-ASBD-ID': '198387', + 'X-IG-WWW-Claim': '0', + 'X-Requested-With': 'XMLHttpRequest', + 'X-CSRFToken': shared_data['config']['csrf_token'], + 'X-Instagram-AJAX': shared_data['rollout_hash'], + 'Referer': 'https://www.instagram.com/', + }, data=urlencode_postdata({ + 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}', + 'username': username, + 'queryParams': '{}', + 'optIntoOneTap': 'false', + 'stopDeletionNonce': '', + 'trustedDeviceRecords': '{}', + })) + + if not login.get('authenticated'): + if login.get('message'): + raise ExtractorError(f'Unable to login: {login["message"]}') + raise ExtractorError('Unable to login') + InstagramBaseIE._IS_LOGGED_IN = True + + def _real_initialize(self): + self._login() + + +class InstagramIE(InstagramBaseIE): + _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P[^/?#&]+))' _TESTS = [{ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'md5': '0d2da106a9d2631273e192b372806516', @@ -143,47 +190,6 @@ def _extract_embed_url(webpage): if mobj: return mobj.group('link') - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - login_webpage = self._download_webpage( - 'https://www.instagram.com/accounts/login/', None, - note='Downloading login webpage', errnote='Failed to download login webpage') - - shared_data = self._parse_json( - self._search_regex( - r'window\._sharedData\s*=\s*({.+?});', - login_webpage, 'shared data', default='{}'), - None) - - login = self._download_json('https://www.instagram.com/accounts/login/ajax/', None, note='Logging in', headers={ - 'Accept': '*/*', - 'X-IG-App-ID': '936619743392459', - 'X-ASBD-ID': '198387', - 'X-IG-WWW-Claim': '0', - 'X-Requested-With': 'XMLHttpRequest', - 'X-CSRFToken': shared_data['config']['csrf_token'], - 'X-Instagram-AJAX': shared_data['rollout_hash'], - 'Referer': 'https://www.instagram.com/', - }, data=urlencode_postdata({ - 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}', - 'username': username, - 'queryParams': '{}', - 'optIntoOneTap': 'false', - 'stopDeletionNonce': '', - 'trustedDeviceRecords': '{}', - })) - - if not login.get('authenticated'): - if login.get('message'): - raise ExtractorError(f'Unable to login: {login["message"]}') - raise ExtractorError('Unable to login') - - def _real_initialize(self): - self._login() - def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') @@ -333,9 +339,7 @@ def get_count(keys, kind): } -class InstagramPlaylistIE(InfoExtractor): - # A superclass for handling any kind of query based on GraphQL which - # results in a playlist. +class InstagramPlaylistBaseIE(InstagramBaseIE): _gis_tmpl = None # used to cache GIS request type @@ -462,11 +466,11 @@ def _real_extract(self, url): self._extract_graphql(data, url), user_or_tag, user_or_tag) -class InstagramUserIE(InstagramPlaylistIE): +class InstagramUserIE(InstagramPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P[^/]{2,})/?(?:$|[?#])' IE_DESC = 'Instagram user profile' IE_NAME = 'instagram:user' - _TEST = { + _TESTS = [{ 'url': 'https://instagram.com/porsche', 'info_dict': { 'id': 'porsche', @@ -478,7 +482,7 @@ class InstagramUserIE(InstagramPlaylistIE): 'skip_download': True, 'playlistend': 5, } - } + }] _QUERY_HASH = '42323d64886122307be10013ad2dcc44', @@ -496,11 +500,11 @@ def _query_vars_for(data): } -class InstagramTagIE(InstagramPlaylistIE): +class InstagramTagIE(InstagramPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P[^/]+)' IE_DESC = 'Instagram hashtag search' IE_NAME = 'instagram:tag' - _TEST = { + _TESTS = [{ 'url': 'https://instagram.com/explore/tags/lolcats', 'info_dict': { 'id': 'lolcats', @@ -512,7 +516,7 @@ class InstagramTagIE(InstagramPlaylistIE): 'skip_download': True, 'playlistend': 50, } - } + }] _QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314',