From dab062fb6ecd48e0c243a6d030d89b44cd44bd84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 10 Oct 2015 20:34:06 +0600 Subject: [PATCH] [bbc] Add support for videos in news articles embedded with data-playable --- youtube_dl/extractor/bbc.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index cc2f6fed2..b2b39ff21 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -11,6 +11,7 @@ int_or_none, parse_duration, parse_iso8601, + unescapeHTML, ) from ..compat import compat_HTTPError @@ -682,6 +683,21 @@ def _real_extract(self, url): [r'data-video-player-vpid="([\da-z]{8})"', r']+name="externalIdentifier"[^>]+value="([\da-z]{8})"'], webpage, 'vpid', default=None) + + duration = None + if not programme_id: + # single video in news article embedded with data-playable (e.g. + # http://www.bbc.com/news/world-us-canada-34473351) + data_playable = self._parse_json( + unescapeHTML(self._search_regex( + r'data-playable="({.+?})"', webpage, 'data playable', default='{}')), + programme_id, fatal=False) + if data_playable: + items = data_playable.get('settings', {}).get('playlistObject', {}).get('items') + if items and isinstance(items, list): + duration = int_or_none(items[0].get('duration')) + programme_id = items[0].get('vpid') + if programme_id: formats, subtitles = self._download_media_selector(programme_id) self._sort_formats(formats) @@ -699,6 +715,7 @@ def _real_extract(self, url): 'title': title, 'description': description, 'timestamp': timestamp, + 'duration': duration, 'formats': formats, 'subtitles': subtitles, }