From 987d2e079ad0fd45df19b6183d38f83bcd528e9d Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi Date: Fri, 6 Nov 2020 15:15:07 +0900 Subject: [PATCH] [instagram] Fix extractor --- youtube_dlc/extractor/instagram.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/youtube_dlc/extractor/instagram.py b/youtube_dlc/extractor/instagram.py index b061850a1..bbfe23c76 100644 --- a/youtube_dlc/extractor/instagram.py +++ b/youtube_dlc/extractor/instagram.py @@ -126,16 +126,23 @@ def _real_extract(self, url): uploader_id, like_count, comment_count, comments, height, width) = [None] * 11 - shared_data = self._parse_json( - self._search_regex( - r'window\._sharedData\s*=\s*({.+?});', - webpage, 'shared data', default='{}'), - video_id, fatal=False) + shared_data = try_get(webpage, + (lambda x: self._parse_json( + self._search_regex( + r'window\.__additionalDataLoaded\(\'/(?:p|tv)/(?:[^/?#&]+)/\',({.+?})\);', + x, 'additional data', default='{}'), + video_id, fatal=False), + lambda x: self._parse_json( + self._search_regex( + r'window\._sharedData\s*=\s*({.+?});', + x, 'shared data', default='{}'), + video_id, fatal=False)['entry_data']['PostPage'][0]), + None) if shared_data: media = try_get( shared_data, - (lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'], - lambda x: x['entry_data']['PostPage'][0]['media']), + (lambda x: x['graphql']['shortcode_media'], + lambda x: x['media']), dict) if media: video_url = media.get('video_url')