diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e85cfecae..d92d63683 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2314,11 +2314,7 @@ ) from .vidlii import VidLiiIE from .vidly import VidlyIE -from .vidyard import ( - VidyardEmbedIE, - VidyardPlayIE, - VidyardWatchIE, -) +from .vidyard import VidyardIE from .viewlift import ( ViewLiftEmbedIE, ViewLiftIE, diff --git a/yt_dlp/extractor/vidyard.py b/yt_dlp/extractor/vidyard.py index 9664c9b8a..d68667ea0 100644 --- a/yt_dlp/extractor/vidyard.py +++ b/yt_dlp/extractor/vidyard.py @@ -1,8 +1,14 @@ +import re + from .common import InfoExtractor -from ..utils import int_or_none, traverse_obj +from ..utils import ( + extract_attributes, + int_or_none, + traverse_obj, +) -class VidyardBaseInfoExtractor(InfoExtractor): +class VidyardBaseIE(InfoExtractor): def _get_formats_and_subtitles(self, video_source, video_id): video_source = video_source or {} @@ -36,33 +42,13 @@ def _get_direct_subtitles(self, caption_json): def _webpage_url(self, url, video_id): return url - def _real_extract(self, url): - video_id = self._match_valid_url(url).group('id') - webpage = self._download_webpage(self._webpage_url(url, video_id), video_id) - json_data = self._download_json( - f'https://play.vidyard.com/player/{video_id}.json', video_id)['payload']['chapters'][0] - - formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], video_id) - self._merge_subtitles(self._get_direct_subtitles(json_data.get('captions')), target=subtitles) - - return { - 'id': str(json_data['videoId']), - 'title': json_data.get('name') or self._og_search_title(webpage, default=None) or self._html_extract_title(webpage), - 'description': json_data.get('description') or self._og_search_description(webpage, default=None), - 'duration': int_or_none(json_data.get('seconds')), - 'formats': formats, - 'subtitles': subtitles, - 'thumbnails': [{'url': thumbnail_url} - for thumbnail_url in traverse_obj(json_data, ('thumbnailUrls', ...))], - 'http_headers': { - 'referer': 'https://play.vidyard.com/', - }, - } - - -class VidyardWatchIE(VidyardBaseInfoExtractor): - _VALID_URL = r'https?://(?:[\w-]+\.hubs|share)\.vidyard\.com/watch/(?P[\w-]+)' +class VidyardIE(VidyardBaseIE): + _VALID_URL = [ + r'https?://(?:[\w-]+\.hubs|share)\.vidyard\.com/watch/(?P[\w-]+)', + r'https?://embed\.vidyard\.com/share/(?P[\w-]+)', + r'https?://play\.vidyard\.com/(?P[\w-]+)\.html', + ] _TESTS = [ { 'url': 'https://vyexample03.hubs.vidyard.com/watch/oTDMPlUv--51Th455G5u7Q', @@ -86,12 +72,6 @@ class VidyardWatchIE(VidyardBaseInfoExtractor): 'duration': 41, }, }, - ] - - -class VidyardEmbedIE(VidyardBaseInfoExtractor): - _VALID_URL = r'https?://embed\.vidyard\.com/share/(?P[\w-]+)' - _TESTS = [ { 'url': 'https://embed.vidyard.com/share/oTDMPlUv--51Th455G5u7Q', 'info_dict': { @@ -103,12 +83,6 @@ class VidyardEmbedIE(VidyardBaseInfoExtractor): 'duration': 99, }, }, - ] - - -class VidyardPlayIE(VidyardBaseInfoExtractor): - _VALID_URL = r'https?://play\.vidyard\.com/(?P[\w-]+)\.[\w]+' - _TESTS = [ { # URL of iframe embed src 'url': 'https://play.vidyard.com/iDqTwWGrd36vaLuaCY3nTs.html', @@ -121,8 +95,8 @@ class VidyardPlayIE(VidyardBaseInfoExtractor): }, }, { - # URL of inline/lightbox embed src - 'url': 'https://play.vidyard.com/iDqTwWGrd36vaLuaCY3nTs.jpg', + # URL of iframe embed src (protocol relative URL) + 'url': '//play.vidyard.com/iDqTwWGrd36vaLuaCY3nTs.html?', 'info_dict': { 'id': '9281009', 'ext': 'mp4', @@ -134,13 +108,11 @@ class VidyardPlayIE(VidyardBaseInfoExtractor): ] _EMBED_REGEX = [ # iframe embed - r']+?src=(["\'])(?P(?:https?:)?//play\.vidyard\.com/[\w-]+.html)\1', - # inline/lightbox embed - r']+?src=(["\'])(?P(?:https?:)?//play\.vidyard\.com/[\w-]+.jpg)\1', + r']+?src=(["\'])(?P(?:https?:)?//play\.vidyard\.com/[\w-]+.\w+)\1', ] _WEBPAGE_TESTS = [ { - # URL containing embedded video + # URL containing inline/lightbox embedded video 'url': 'https://resources.altium.com/p/2-the-extreme-importance-of-pc-board-stack-up', 'info_dict': { 'id': '3225198', @@ -152,5 +124,41 @@ class VidyardPlayIE(VidyardBaseInfoExtractor): }, ] - def _webpage_url(self, url, video_id): - return f'https://play.vidyard.com/{video_id}.html' + @classmethod + def _extract_embed_urls(cls, url, webpage): + # Handle protocol-less embed URLs + for embed_url in super()._extract_embed_urls(url, webpage): + if embed_url.startswith('//'): + embed_url = f'https:{embed_url}' + yield embed_url + + # Extract inline/lightbox embeds + for embed_elm in re.findall(r'(]+class=(["\'])(?:[^>"\']* )?vidyard-player-embed(?: [^>"\']*)?\2[^>]+[^>]*>)', webpage): + embed = extract_attributes(embed_elm[0]) or {} + uuid = embed.get('data-uuid') + if uuid: + yield f'https://play.vidyard.com/{uuid}.html' + + def _real_extract(self, url): + video_id = self._match_valid_url(url).group('id') + webpage = self._download_webpage(self._webpage_url(url, video_id), video_id) + + json_data = self._download_json( + f'https://play.vidyard.com/player/{video_id}.json', video_id)['payload']['chapters'][0] + + formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], video_id) + self._merge_subtitles(self._get_direct_subtitles(json_data.get('captions')), target=subtitles) + + return { + 'id': str(json_data['videoId']), + 'title': json_data.get('name') or self._og_search_title(webpage, default=None) or self._html_extract_title(webpage), + 'description': json_data.get('description') or self._og_search_description(webpage, default=None), + 'duration': int_or_none(json_data.get('seconds')), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnails': [{'url': thumbnail_url} + for thumbnail_url in traverse_obj(json_data, ('thumbnailUrls', ...))], + 'http_headers': { + 'referer': 'https://play.vidyard.com/', + }, + }