From 4e3357717312ac56145ba166a1ae2806f6db8337 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 21 Jun 2015 19:16:59 +0800 Subject: [PATCH] [utils] Support ttaf1 namespace in TTML It's found in bbc.co.uk. See #6038 --- youtube_dl/utils.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 259a9d634..a2746b2d1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1841,7 +1841,10 @@ def srt_subtitles_timecode(seconds): def dfxp2srt(dfxp_data): - _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'}) + _x = functools.partial(xpath_with_ns, ns_map={ + 'ttml': 'http://www.w3.org/ns/ttml', + 'ttaf1': 'http://www.w3.org/2006/10/ttaf1', + }) def parse_node(node): str_or_empty = functools.partial(str_or_none, default='') @@ -1849,9 +1852,9 @@ def parse_node(node): out = str_or_empty(node.text) for child in node: - if child.tag in (_x('ttml:br'), 'br'): + if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'): out += '\n' + str_or_empty(child.tail) - elif child.tag in (_x('ttml:span'), 'span'): + elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'): out += str_or_empty(parse_node(child)) else: out += str_or_empty(xml.etree.ElementTree.tostring(child)) @@ -1860,7 +1863,7 @@ def parse_node(node): dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) out = [] - paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') + paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p') if not paras: raise ValueError('Invalid dfxp/TTML subtitle')