From 3e376d183ede2d9d24a14e4d5afee7a64679cca0 Mon Sep 17 00:00:00 2001 From: Wes Date: Tue, 3 Aug 2021 23:19:44 -0500 Subject: [PATCH] [nbcolympics] Update extractor for 2020 olympics (#621) Fixes: https://github.com/yt-dlp/yt-dlp/issues/617#issuecomment-891834323 Authored by: wesnm --- yt_dlp/extractor/nbc.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index 2f25b9e7b..8c63cf818 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -12,6 +12,7 @@ int_or_none, parse_age_limit, parse_duration, + RegexNotFoundError, smuggle_url, try_get, unified_timestamp, @@ -460,7 +461,7 @@ def _real_extract(self, url): class NBCOlympicsIE(InfoExtractor): IE_NAME = 'nbcolympics' - _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P[a-z-]+)' + _VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P[0-9a-z-]+)' _TEST = { # Geo-restricted to US @@ -483,13 +484,18 @@ def _real_extract(self, url): webpage = self._download_webpage(url, display_id) - drupal_settings = self._parse_json(self._search_regex( - r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', - webpage, 'drupal settings'), display_id) + try: + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) - iframe_url = drupal_settings['vod']['iframe_url'] - theplatform_url = iframe_url.replace( - 'vplayer.nbcolympics.com', 'player.theplatform.com') + iframe_url = drupal_settings['vod']['iframe_url'] + theplatform_url = iframe_url.replace( + 'vplayer.nbcolympics.com', 'player.theplatform.com') + except RegexNotFoundError: + theplatform_url = self._search_regex( + r"([\"'])embedUrl\1: *([\"'])(?P.+)\2", + webpage, 'embedding URL', group="embedUrl") return { '_type': 'url_transparent',