From de66571371e2a9705ecd3aed903880c4846d04ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 10 Oct 2015 20:40:56 +0600 Subject: [PATCH] [bbc] Support multiple videos in articles embedded with playlist.sxml --- youtube_dl/extractor/bbc.py | 38 ++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index b2b39ff21..930637cd7 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -660,23 +660,27 @@ def _real_extract(self, url): r'itemprop="datePublished"[^>]+datetime="([^"]+)"'], webpage, 'date', default=None)) - # single video with playlist.sxml URL (e.g. http://www.bbc.com/sport/0/football/3365340ng) - playlist = self._search_regex( - r']+name="playlist"[^>]+value="([^"]+)"', - webpage, 'playlist', default=None) - if playlist: - programme_id, title, description, duration, formats, subtitles = \ - self._process_legacy_playlist_url(playlist, playlist_id) - self._sort_formats(formats) - return { - 'id': programme_id, - 'title': title, - 'description': description, - 'duration': duration, - 'timestamp': timestamp, - 'formats': formats, - 'subtitles': subtitles, - } + # article with multiple videos embedded with playlist.sxml (e.g. + # http://www.bbc.com/sport/0/football/34475836) + playlists = re.findall(r']+name="playlist"[^>]+value="([^"]+)"', webpage) + if playlists: + entries = [] + for playlist in playlists: + programme_id, title, description, duration, formats, subtitles = \ + self._process_legacy_playlist_url(playlist, playlist_id) + self._sort_formats(formats) + entries.append({ + 'id': programme_id, + 'title': title, + 'description': description, + 'duration': duration, + 'timestamp': timestamp, + 'formats': formats, + 'subtitles': subtitles, + }) + playlist_title = self._og_search_title(webpage) + playlist_description = self._og_search_description(webpage) + return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) programme_id = self._search_regex(