From ecee5724110847b832a6074c66ca4a63758100f4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 19 May 2015 00:50:24 +0800 Subject: [PATCH] [yahoo] Add support for closed captions (closes #5714) --- youtube_dl/extractor/yahoo.py | 18 ++++++++++++++++++ youtube_dl/utils.py | 1 + 2 files changed, 19 insertions(+) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index bf4e659ac..f9afbdbab 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -15,6 +15,7 @@ unescapeHTML, ExtractorError, int_or_none, + mimetype2ext, ) from .nbc import NBCSportsVPlayerIE @@ -236,6 +237,22 @@ def _get_info(self, video_id, display_id, webpage): self._sort_formats(formats) + closed_captions = self._html_search_regex( + r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions', + default='[]') + + cc_json = self._parse_json(closed_captions, video_id, fatal=False) + subtitles = {} + if cc_json: + for closed_caption in cc_json: + lang = closed_caption['lang'] + if lang not in subtitles: + subtitles[lang] = [] + subtitles[lang].append({ + 'url': closed_caption['url'], + 'ext': mimetype2ext(closed_caption['content_type']), + }) + return { 'id': video_id, 'display_id': display_id, @@ -244,6 +261,7 @@ def _get_info(self, video_id, display_id, webpage): 'description': clean_html(meta['description']), 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage), 'duration': int_or_none(meta.get('duration')), + 'subtitles': subtitles, } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 507f07383..52d198fa3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1665,6 +1665,7 @@ def mimetype2ext(mt): return { 'x-ms-wmv': 'wmv', 'x-mp4-fragmented': 'mp4', + 'ttml+xml': 'ttml', }.get(res, res)