Subtitle extraction from streaming media manifests #247

Authored by fstirlitz Modified from: https://github.com/ytdl-org/youtube-dl/pull/6144 Closes: #73 Fixes: https://github.com/ytdl-org/youtube-dl/issues/6106 https://github.com/ytdl-org/youtube-dl/issues/14977 https://github.com/ytdl-org/youtube-dl/issues/21438 https://github.com/ytdl-org/youtube-dl/issues/23609 https://github.com/ytdl-org/youtube-dl/issues/28132 Might also fix (untested): https://github.com/ytdl-org/youtube-dl/issues/15424 https://github.com/ytdl-org/youtube-dl/issues/18267 https://github.com/ytdl-org/youtube-dl/issues/23899 https://github.com/ytdl-org/youtube-dl/issues/24375 https://github.com/ytdl-org/youtube-dl/issues/24595 https://github.com/ytdl-org/youtube-dl/issues/27899 Related: https://github.com/ytdl-org/youtube-dl/issues/22379 https://github.com/ytdl-org/youtube-dl/pull/24517 https://github.com/ytdl-org/youtube-dl/pull/24886 https://github.com/ytdl-org/youtube-dl/pull/27215 Notes: * The functions `extractor.common._extract_..._formats` are still kept for compatibility * Only some extractors have currently been moved to using `_extract_..._formats_and_subtitles` * Direct subtitle manifests (without a master) are not supported and are wrongly identified as containing video formats * AES support is untested * The fragmented TTML subtitles extracted from DASH/ISM are valid, but are unsupported by `ffmpeg` and most video players * Their XML fragments can be dumped using `ffmpeg -i in.mp4 -f data -map 0 -c copy out.ttml`. Once the unnecessary headers are stripped out of this, it becomes a valid self-contained ttml file * The ttml subs downloaded from DASH manifests can also be directly opened with <https://github.com/SubtitleEdit> * Fragmented WebVTT files extracted from DASH/ISM are also unsupported by most tools * Unlike the ttml files, the XML fragments of these cannot be dumped using `ffmpeg` * The webtt subs extracted from DASH can be parsed by <https://github.com/gpac/gpac> * But validity of the those extracted from ISM are untested
2024-11-02 17:22:31 +01:00 · 2021-04-28 19:02:43 +05:30 · 2021-04-28 19:02:43 +05:30 · be6202f12b
commit be6202f12b
parent db9a564b6a e8f834cd8d
25 changed files with 2730 additions and 267 deletions
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@ -684,17 +684,186 @@ def test_parse_m3u8_formats(self):
                    'width': 1920,
                    'height': 1080,
                    'vcodec': 'avc1.64002a',
-                }]
+                }],
                {}
            ),
            (
                'bipbop_16x9',
                'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
                [{
                    "format_id": "bipbop_audio-BipBop Audio 2",
                    "format_index": None,
                    "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/alternate_audio_aac/prog_index.m3u8",
                    "manifest_url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8",
                    "language": "eng",
                    "ext": "mp4",
                    "protocol": "m3u8",
                    "preference": None,
                    "quality": None,
                    "vcodec": "none",
                    "audio_ext": "mp4",
                    "video_ext": "none",
                }, {
                    "format_id": "41",
                    "format_index": None,
                    "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear0/prog_index.m3u8",
                    "manifest_url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8",
                    "tbr": 41.457,
                    "ext": "mp4",
                    "fps": None,
                    "protocol": "m3u8",
                    "preference": None,
                    "quality": None,
                    "vcodec": "none",
                    "acodec": "mp4a.40.2",
                    "audio_ext": "mp4",
                    "video_ext": "none",
                    "abr": 41.457,
                }, {
                    "format_id": "263",
                    "format_index": None,
                    "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear1/prog_index.m3u8",
                    "manifest_url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8",
                    "tbr": 263.851,
                    "ext": "mp4",
                    "fps": None,
                    "protocol": "m3u8",
                    "preference": None,
                    "quality": None,
                    "width": 416,
                    "height": 234,
                    "vcodec": "avc1.4d400d",
                    "acodec": "mp4a.40.2",
                    "video_ext": "mp4",
                    "audio_ext": "none",
                    "vbr": 263.851,
                    "abr": 0,
                }, {
                    "format_id": "577",
                    "format_index": None,
                    "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear2/prog_index.m3u8",
                    "manifest_url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8",
                    "tbr": 577.61,
                    "ext": "mp4",
                    "fps": None,
                    "protocol": "m3u8",
                    "preference": None,
                    "quality": None,
                    "width": 640,
                    "height": 360,
                    "vcodec": "avc1.4d401e",
                    "acodec": "mp4a.40.2",
                    "video_ext": "mp4",
                    "audio_ext": "none",
                    "vbr": 577.61,
                    "abr": 0,
                }, {
                    "format_id": "915",
                    "format_index": None,
                    "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear3/prog_index.m3u8",
                    "manifest_url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8",
                    "tbr": 915.905,
                    "ext": "mp4",
                    "fps": None,
                    "protocol": "m3u8",
                    "preference": None,
                    "quality": None,
                    "width": 960,
                    "height": 540,
                    "vcodec": "avc1.4d401f",
                    "acodec": "mp4a.40.2",
                    "video_ext": "mp4",
                    "audio_ext": "none",
                    "vbr": 915.905,
                    "abr": 0,
                }, {
                    "format_id": "1030",
                    "format_index": None,
                    "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear4/prog_index.m3u8",
                    "manifest_url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8",
                    "tbr": 1030.138,
                    "ext": "mp4",
                    "fps": None,
                    "protocol": "m3u8",
                    "preference": None,
                    "quality": None,
                    "width": 1280,
                    "height": 720,
                    "vcodec": "avc1.4d401f",
                    "acodec": "mp4a.40.2",
                    "video_ext": "mp4",
                    "audio_ext": "none",
                    "vbr": 1030.138,
                    "abr": 0,
                }, {
                    "format_id": "1924",
                    "format_index": None,
                    "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear5/prog_index.m3u8",
                    "manifest_url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8",
                    "tbr": 1924.009,
                    "ext": "mp4",
                    "fps": None,
                    "protocol": "m3u8",
                    "preference": None,
                    "quality": None,
                    "width": 1920,
                    "height": 1080,
                    "vcodec": "avc1.4d401f",
                    "acodec": "mp4a.40.2",
                    "video_ext": "mp4",
                    "audio_ext": "none",
                    "vbr": 1924.009,
                    "abr": 0,
                }],
                {
                    "en": [{
                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng/prog_index.m3u8",
                        "ext": "vtt",
                        "protocol": "m3u8_native"
                    }, {
                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng_forced/prog_index.m3u8",
                        "ext": "vtt",
                        "protocol": "m3u8_native"
                    }],
                    "fr": [{
                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra/prog_index.m3u8",
                        "ext": "vtt",
                        "protocol": "m3u8_native"
                    }, {
                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra_forced/prog_index.m3u8",
                        "ext": "vtt",
                        "protocol": "m3u8_native"
                    }],
                    "es": [{
                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa/prog_index.m3u8",
                        "ext": "vtt",
                        "protocol": "m3u8_native"
                    }, {
                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa_forced/prog_index.m3u8",
                        "ext": "vtt",
                        "protocol": "m3u8_native"
                    }],
                    "ja": [{
                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn/prog_index.m3u8",
                        "ext": "vtt",
                        "protocol": "m3u8_native"
                    }, {
                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn_forced/prog_index.m3u8",
                        "ext": "vtt",
                        "protocol": "m3u8_native"
                    }],
                }
            ),
        ]
-        for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
+        for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES:
            with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
                         mode='r', encoding='utf-8') as f:
-                formats = self.ie._parse_m3u8_formats(
+                formats, subs = self.ie._parse_m3u8_formats_and_subtitles(
                    f.read(), m3u8_url, ext='mp4')
                self.ie._sort_formats(formats)
                expect_value(self, formats, expected_formats, None)
                expect_value(self, subs, expected_subs, None)
    def test_parse_mpd_formats(self):
        _TEST_CASES = [
@ -780,7 +949,8 @@ def test_parse_mpd_formats(self):
                    'tbr': 5997.485,
                    'width': 1920,
                    'height': 1080,
-                }]
+                }],
                {},
            ), (
                # https://github.com/ytdl-org/youtube-dl/pull/14844
                'urls_only',
@ -863,7 +1033,8 @@ def test_parse_mpd_formats(self):
                    'tbr': 4400,
                    'width': 1920,
                    'height': 1080,
-                }]
+                }],
                {},
            ), (
                # https://github.com/ytdl-org/youtube-dl/issues/20346
                # Media considered unfragmented even though it contains
@ -909,18 +1080,328 @@ def test_parse_mpd_formats(self):
                    'width': 360,
                    'height': 360,
                    'fps': 30,
-                }]
+                }],
                {},
            ), (
                'subtitles',
                'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
                'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/',
                [{
                    "format_id": "audio=128001",
                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
                    "ext": "m4a",
                    "tbr": 128.001,
                    "asr": 48000,
                    "format_note": "DASH audio",
                    "container": "m4a_dash",
                    "vcodec": "none",
                    "acodec": "mp4a.40.2",
                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
                    "fragment_base_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/",
                    "protocol": "http_dash_segments",
                    "audio_ext": "m4a",
                    "video_ext": "none",
                    "abr": 128.001,
                }, {
                    "format_id": "video=100000",
                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
                    "ext": "mp4",
                    "width": 336,
                    "height": 144,
                    "tbr": 100,
                    "format_note": "DASH video",
                    "container": "mp4_dash",
                    "vcodec": "avc1.4D401F",
                    "acodec": "none",
                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
                    "fragment_base_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/",
                    "protocol": "http_dash_segments",
                    "video_ext": "mp4",
                    "audio_ext": "none",
                    "vbr": 100,
                }, {
                    "format_id": "video=326000",
                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
                    "ext": "mp4",
                    "width": 562,
                    "height": 240,
                    "tbr": 326,
                    "format_note": "DASH video",
                    "container": "mp4_dash",
                    "vcodec": "avc1.4D401F",
                    "acodec": "none",
                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
                    "fragment_base_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/",
                    "protocol": "http_dash_segments",
                    "video_ext": "mp4",
                    "audio_ext": "none",
                    "vbr": 326,
                }, {
                    "format_id": "video=698000",
                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
                    "ext": "mp4",
                    "width": 844,
                    "height": 360,
                    "tbr": 698,
                    "format_note": "DASH video",
                    "container": "mp4_dash",
                    "vcodec": "avc1.4D401F",
                    "acodec": "none",
                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
                    "fragment_base_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/",
                    "protocol": "http_dash_segments",
                    "video_ext": "mp4",
                    "audio_ext": "none",
                    "vbr": 698,
                }, {
                    "format_id": "video=1493000",
                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
                    "ext": "mp4",
                    "width": 1126,
                    "height": 480,
                    "tbr": 1493,
                    "format_note": "DASH video",
                    "container": "mp4_dash",
                    "vcodec": "avc1.4D401F",
                    "acodec": "none",
                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
                    "fragment_base_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/",
                    "protocol": "http_dash_segments",
                    "video_ext": "mp4",
                    "audio_ext": "none",
                    "vbr": 1493,
                }, {
                    "format_id": "video=4482000",
                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
                    "ext": "mp4",
                    "width": 1688,
                    "height": 720,
                    "tbr": 4482,
                    "format_note": "DASH video",
                    "container": "mp4_dash",
                    "vcodec": "avc1.4D401F",
                    "acodec": "none",
                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
                    "fragment_base_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/",
                    "protocol": "http_dash_segments",
                    "video_ext": "mp4",
                    "audio_ext": "none",
                    "vbr": 4482,
                }],
                {
                    "en": [
                        {
                            "ext": "mp4",
                            "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
                            "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
                            "fragment_base_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/",
                            "protocol": "http_dash_segments",
                        }
                    ]
                },
            )
        ]
-        for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
+        for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES:
            with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
                         mode='r', encoding='utf-8') as f:
-                formats = self.ie._parse_mpd_formats(
+                formats, subtitles = self.ie._parse_mpd_formats_and_subtitles(
                    compat_etree_fromstring(f.read().encode('utf-8')),
                    mpd_base_url=mpd_base_url, mpd_url=mpd_url)
                self.ie._sort_formats(formats)
                expect_value(self, formats, expected_formats, None)
                expect_value(self, subtitles, expected_subtitles, None)
    def test_parse_ism_formats(self):
        _TEST_CASES = [
            (
                'sintel',
                'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
                [{
                    "format_id": "audio-128",
                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
                    "ext": "isma",
                    "tbr": 128,
                    "asr": 48000,
                    "vcodec": "none",
                    "acodec": "AACL",
                    "protocol": "ism",
                    "_download_params": {
                        "stream_type": "audio",
                        "duration": 8880746666,
                        "timescale": 10000000,
                        "width": 0,
                        "height": 0,
                        "fourcc": "AACL",
                        "codec_private_data": "1190",
                        "sampling_rate": 48000,
                        "channels": 2,
                        "bits_per_sample": 16,
                        "nal_unit_length_field": 4
                    },
                    "audio_ext": "isma",
                    "video_ext": "none",
                    "abr": 128,
                }, {
                    "format_id": "video-100",
                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
                    "ext": "ismv",
                    "width": 336,
                    "height": 144,
                    "tbr": 100,
                    "vcodec": "AVC1",
                    "acodec": "none",
                    "protocol": "ism",
                    "_download_params": {
                        "stream_type": "video",
                        "duration": 8880746666,
                        "timescale": 10000000,
                        "width": 336,
                        "height": 144,
                        "fourcc": "AVC1",
                        "codec_private_data": "00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8",
                        "channels": 2,
                        "bits_per_sample": 16,
                        "nal_unit_length_field": 4
                    },
                    "video_ext": "ismv",
                    "audio_ext": "none",
                    "vbr": 100,
                }, {
                    "format_id": "video-326",
                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
                    "ext": "ismv",
                    "width": 562,
                    "height": 240,
                    "tbr": 326,
                    "vcodec": "AVC1",
                    "acodec": "none",
                    "protocol": "ism",
                    "_download_params": {
                        "stream_type": "video",
                        "duration": 8880746666,
                        "timescale": 10000000,
                        "width": 562,
                        "height": 240,
                        "fourcc": "AVC1",
                        "codec_private_data": "00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8",
                        "channels": 2,
                        "bits_per_sample": 16,
                        "nal_unit_length_field": 4
                    },
                    "video_ext": "ismv",
                    "audio_ext": "none",
                    "vbr": 326,
                }, {
                    "format_id": "video-698",
                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
                    "ext": "ismv",
                    "width": 844,
                    "height": 360,
                    "tbr": 698,
                    "vcodec": "AVC1",
                    "acodec": "none",
                    "protocol": "ism",
                    "_download_params": {
                        "stream_type": "video",
                        "duration": 8880746666,
                        "timescale": 10000000,
                        "width": 844,
                        "height": 360,
                        "fourcc": "AVC1",
                        "codec_private_data": "00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8",
                        "channels": 2,
                        "bits_per_sample": 16,
                        "nal_unit_length_field": 4
                    },
                    "video_ext": "ismv",
                    "audio_ext": "none",
                    "vbr": 698,
                }, {
                    "format_id": "video-1493",
                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
                    "ext": "ismv",
                    "width": 1126,
                    "height": 480,
                    "tbr": 1493,
                    "vcodec": "AVC1",
                    "acodec": "none",
                    "protocol": "ism",
                    "_download_params": {
                        "stream_type": "video",
                        "duration": 8880746666,
                        "timescale": 10000000,
                        "width": 1126,
                        "height": 480,
                        "fourcc": "AVC1",
                        "codec_private_data": "00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8",
                        "channels": 2,
                        "bits_per_sample": 16,
                        "nal_unit_length_field": 4
                    },
                    "video_ext": "ismv",
                    "audio_ext": "none",
                    "vbr": 1493,
                }, {
                    "format_id": "video-4482",
                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
                    "ext": "ismv",
                    "width": 1688,
                    "height": 720,
                    "tbr": 4482,
                    "vcodec": "AVC1",
                    "acodec": "none",
                    "protocol": "ism",
                    "_download_params": {
                        "stream_type": "video",
                        "duration": 8880746666,
                        "timescale": 10000000,
                        "width": 1688,
                        "height": 720,
                        "fourcc": "AVC1",
                        "codec_private_data": "00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8",
                        "channels": 2,
                        "bits_per_sample": 16,
                        "nal_unit_length_field": 4
                    },
                    "video_ext": "ismv",
                    "audio_ext": "none",
                    "vbr": 4482,
                }],
                {
                    "eng": [
                        {
                            "ext": "ismt",
                            "protocol": "ism",
                            "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
                            "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
                            "_download_params": {
                                "stream_type": "text",
                                "duration": 8880746666,
                                "timescale": 10000000,
                                "fourcc": "TTML",
                                "codec_private_data": ""
                            }
                        }
                    ]
                },
            ),
        ]
        for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES:
            with io.open('./test/testdata/ism/%s.Manifest' % ism_file,
                         mode='r', encoding='utf-8') as f:
                formats, subtitles = self.ie._parse_ism_formats_and_subtitles(
                    compat_etree_fromstring(f.read().encode('utf-8')), ism_url=ism_url)
                self.ie._sort_formats(formats)
                expect_value(self, formats, expected_formats, None)
                expect_value(self, subtitles, expected_subtitles, None)
    def test_parse_f4m_formats(self):
        _TEST_CASES = [
--- a/test/testdata/ism/sintel.Manifest
+++ b/test/testdata/ism/sintel.Manifest
@ -0,0 +1,988 @@
 <?xml version="1.0" encoding="utf-8"?>
 <!-- Created with Unified Streaming Platform (version=1.10.18-20255) -->
 <SmoothStreamingMedia
  MajorVersion="2"
  MinorVersion="0"
  TimeScale="10000000"
  Duration="8880746666">
  <StreamIndex
    Type="audio"
    QualityLevels="1"
    TimeScale="10000000"
    Name="audio"
    Chunks="445"
    Url="QualityLevels({bitrate})/Fragments(audio={start time})">
    <QualityLevel
      Index="0"
      Bitrate="128001"
      CodecPrivateData="1190"
      SamplingRate="48000"
      Channels="2"
      BitsPerSample="16"
      PacketSize="4"
      AudioTag="255"
      FourCC="AACL" />
    <c t="0" d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="20053333" />
    <c d="20053333" />
    <c d="20053334" />
    <c d="19840000" />
    <c d="746666" />
  </StreamIndex>
  <StreamIndex
    Type="text"
    QualityLevels="1"
    TimeScale="10000000"
    Language="eng"
    Subtype="CAPT"
    Name="textstream_eng"
    Chunks="11"
    Url="QualityLevels({bitrate})/Fragments(textstream_eng={start time})">
    <QualityLevel
      Index="0"
      Bitrate="1000"
      CodecPrivateData=""
      FourCC="TTML" />
    <c t="0" d="600000000" />
    <c d="600000000" />
    <c d="600000000" />
    <c d="600000000" />
    <c d="600000000" />
    <c d="600000000" />
    <c d="600000000" />
    <c d="600000000" />
    <c d="600000000" />
    <c d="600000000" />
    <c d="240000000" />
  </StreamIndex>
  <StreamIndex
    Type="video"
    QualityLevels="5"
    TimeScale="10000000"
    Name="video"
    Chunks="444"
    Url="QualityLevels({bitrate})/Fragments(video={start time})"
    MaxWidth="1688"
    MaxHeight="720"
    DisplayWidth="1689"
    DisplayHeight="720">
    <QualityLevel
      Index="0"
      Bitrate="100000"
      CodecPrivateData="00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8"
      MaxWidth="336"
      MaxHeight="144"
      FourCC="AVC1" />
    <QualityLevel
      Index="1"
      Bitrate="326000"
      CodecPrivateData="00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8"
      MaxWidth="562"
      MaxHeight="240"
      FourCC="AVC1" />
    <QualityLevel
      Index="2"
      Bitrate="698000"
      CodecPrivateData="00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8"
      MaxWidth="844"
      MaxHeight="360"
      FourCC="AVC1" />
    <QualityLevel
      Index="3"
      Bitrate="1493000"
      CodecPrivateData="00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8"
      MaxWidth="1126"
      MaxHeight="480"
      FourCC="AVC1" />
    <QualityLevel
      Index="4"
      Bitrate="4482000"
      CodecPrivateData="00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8"
      MaxWidth="1688"
      MaxHeight="720"
      FourCC="AVC1" />
    <c t="0" d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
    <c d="20000000" />
  </StreamIndex>
 </SmoothStreamingMedia>
--- a/test/testdata/m3u8/bipbop_16x9.m3u8
+++ b/test/testdata/m3u8/bipbop_16x9.m3u8
@ -0,0 +1,38 @@
 #EXTM3U
 #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 1",AUTOSELECT=YES,DEFAULT=YES
 #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 2",AUTOSELECT=NO,DEFAULT=NO,URI="alternate_audio_aac/prog_index.m3u8"
 #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,LANGUAGE="en",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/eng/prog_index.m3u8"
 #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="en",URI="subtitles/eng_forced/prog_index.m3u8"
 #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="fr",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/fra/prog_index.m3u8"
 #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="fr",URI="subtitles/fra_forced/prog_index.m3u8"
 #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="es",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/spa/prog_index.m3u8"
 #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="es",URI="subtitles/spa_forced/prog_index.m3u8"
 #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="ja",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/jpn/prog_index.m3u8"
 #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語 (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="ja",URI="subtitles/jpn_forced/prog_index.m3u8"
 #EXT-X-STREAM-INF:BANDWIDTH=263851,CODECS="mp4a.40.2, avc1.4d400d",RESOLUTION=416x234,AUDIO="bipbop_audio",SUBTITLES="subs"
 gear1/prog_index.m3u8
 #EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=28451,CODECS="avc1.4d400d",URI="gear1/iframe_index.m3u8"
 #EXT-X-STREAM-INF:BANDWIDTH=577610,CODECS="mp4a.40.2, avc1.4d401e",RESOLUTION=640x360,AUDIO="bipbop_audio",SUBTITLES="subs"
 gear2/prog_index.m3u8
 #EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=181534,CODECS="avc1.4d401e",URI="gear2/iframe_index.m3u8"
 #EXT-X-STREAM-INF:BANDWIDTH=915905,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=960x540,AUDIO="bipbop_audio",SUBTITLES="subs"
 gear3/prog_index.m3u8
 #EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=297056,CODECS="avc1.4d401f",URI="gear3/iframe_index.m3u8"
 #EXT-X-STREAM-INF:BANDWIDTH=1030138,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1280x720,AUDIO="bipbop_audio",SUBTITLES="subs"
 gear4/prog_index.m3u8
 #EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=339492,CODECS="avc1.4d401f",URI="gear4/iframe_index.m3u8"
 #EXT-X-STREAM-INF:BANDWIDTH=1924009,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1920x1080,AUDIO="bipbop_audio",SUBTITLES="subs"
 gear5/prog_index.m3u8
 #EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=669554,CODECS="avc1.4d401f",URI="gear5/iframe_index.m3u8"
 #EXT-X-STREAM-INF:BANDWIDTH=41457,CODECS="mp4a.40.2",AUDIO="bipbop_audio",SUBTITLES="subs"
 gear0/prog_index.m3u8
--- a/test/testdata/mpd/subtitles.mpd
+++ b/test/testdata/mpd/subtitles.mpd
@ -0,0 +1,351 @@
 <?xml version="1.0" encoding="utf-8"?>
 <!-- Created with Unified Streaming Platform (version=1.10.18-20255) -->
 <MPD
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xmlns="urn:mpeg:dash:schema:mpd:2011"
  xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-DASH_schema_files/DASH-MPD.xsd"
  type="static"
  mediaPresentationDuration="PT14M48S"
  maxSegmentDuration="PT1M"
  minBufferTime="PT10S"
  profiles="urn:mpeg:dash:profile:isoff-live:2011">
  <Period
    id="1"
    duration="PT14M48S">
    <BaseURL>dash/</BaseURL>
    <AdaptationSet
      id="1"
      group="1"
      contentType="audio"
      segmentAlignment="true"
      audioSamplingRate="48000"
      mimeType="audio/mp4"
      codecs="mp4a.40.2"
      startWithSAP="1">
      <AudioChannelConfiguration
        schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011"
        value="2" />
      <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
      <SegmentTemplate
        timescale="48000"
        initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
        media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
        <SegmentTimeline>
          <S t="0" d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="96256" r="2" />
          <S d="95232" />
          <S d="3584" />
        </SegmentTimeline>
      </SegmentTemplate>
      <Representation
        id="audio=128001"
        bandwidth="128001">
      </Representation>
    </AdaptationSet>
    <AdaptationSet
      id="2"
      group="3"
      contentType="text"
      lang="en"
      mimeType="application/mp4"
      codecs="stpp"
      startWithSAP="1">
      <Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle" />
      <SegmentTemplate
        timescale="1000"
        initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
        media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
        <SegmentTimeline>
          <S t="0" d="60000" r="9" />
          <S d="24000" />
        </SegmentTimeline>
      </SegmentTemplate>
      <Representation
        id="textstream_eng=1000"
        bandwidth="1000">
      </Representation>
    </AdaptationSet>
    <AdaptationSet
      id="3"
      group="2"
      contentType="video"
      par="960:409"
      minBandwidth="100000"
      maxBandwidth="4482000"
      maxWidth="1689"
      maxHeight="720"
      segmentAlignment="true"
      mimeType="video/mp4"
      codecs="avc1.4D401F"
      startWithSAP="1">
      <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
      <SegmentTemplate
        timescale="12288"
        initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
        media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
        <SegmentTimeline>
          <S t="0" d="24576" r="443" />
        </SegmentTimeline>
      </SegmentTemplate>
      <Representation
        id="video=100000"
        bandwidth="100000"
        width="336"
        height="144"
        sar="2880:2863"
        scanType="progressive">
      </Representation>
      <Representation
        id="video=326000"
        bandwidth="326000"
        width="562"
        height="240"
        sar="115200:114929"
        scanType="progressive">
      </Representation>
      <Representation
        id="video=698000"
        bandwidth="698000"
        width="844"
        height="360"
        sar="86400:86299"
        scanType="progressive">
      </Representation>
      <Representation
        id="video=1493000"
        bandwidth="1493000"
        width="1126"
        height="480"
        sar="230400:230267"
        scanType="progressive">
      </Representation>
      <Representation
        id="video=4482000"
        bandwidth="4482000"
        width="1688"
        height="720"
        sar="86400:86299"
        scanType="progressive">
      </Representation>
    </AdaptationSet>
  </Period>
 </MPD>
--- a/yt_dlp/compat.py
+++ b/yt_dlp/compat.py
@ -3018,10 +3018,24 @@ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
        return ctypes.WINFUNCTYPE(*args, **kwargs)
 try:
    compat_Pattern = re.Pattern
 except AttributeError:
    compat_Pattern = type(re.compile(''))
 try:
    compat_Match = re.Match
 except AttributeError:
    compat_Match = type(re.compile('').match(''))
 __all__ = [
    'compat_HTMLParseError',
    'compat_HTMLParser',
    'compat_HTTPError',
    'compat_Match',
    'compat_Pattern',
    'compat_Struct',
    'compat_b64decode',
    'compat_basestring',
--- a/yt_dlp/downloader/fragment.py
+++ b/yt_dlp/downloader/fragment.py
@ -77,7 +77,10 @@ def _read_ytdl_file(self, ctx):
        assert 'ytdl_corrupt' not in ctx
        stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
        try:
-            ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
+            ytdl_data = json.loads(stream.read())
            ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index']
            if 'extra_state' in ytdl_data['downloader']:
                ctx['extra_state'] = ytdl_data['downloader']['extra_state']
        except Exception:
            ctx['ytdl_corrupt'] = True
        finally:
@ -90,6 +93,8 @@ def _write_ytdl_file(self, ctx):
                'index': ctx['fragment_index'],
            },
        }
        if 'extra_state' in ctx:
            downloader['extra_state'] = ctx['extra_state']
        if ctx.get('fragment_count') is not None:
            downloader['fragment_count'] = ctx['fragment_count']
        frag_index_stream.write(json.dumps({'downloader': downloader}))
--- a/yt_dlp/downloader/hls.py
+++ b/yt_dlp/downloader/hls.py
@ -2,6 +2,7 @@
 import errno
 import re
 import io
 import binascii
 try:
    from Crypto.Cipher import AES
@ -27,7 +28,9 @@
    parse_m3u8_attributes,
    sanitize_open,
    update_url_query,
    bug_reports_message,
 )
 from .. import webvtt
 class HlsFD(FragmentFD):
@ -78,6 +81,8 @@ def real_download(self, filename, info_dict):
        man_url = info_dict['url']
        self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
        is_webvtt = info_dict['ext'] == 'vtt'
        urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
        man_url = urlh.geturl()
        s = urlh.read().decode('utf-8', 'ignore')
@ -142,6 +147,8 @@ def is_ad_fragment_end(s):
        else:
            self._prepare_and_start_frag_download(ctx)
        extra_state = ctx.setdefault('extra_state', {})
        fragment_retries = self.params.get('fragment_retries', 0)
        skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
        test = self.params.get('test', False)
@ -308,6 +315,76 @@ def download_fragment(fragment):
                return frag_content, frag_index
            pack_fragment = lambda frag_content, _: frag_content
            if is_webvtt:
                def pack_fragment(frag_content, frag_index):
                    output = io.StringIO()
                    adjust = 0
                    for block in webvtt.parse_fragment(frag_content):
                        if isinstance(block, webvtt.CueBlock):
                            block.start += adjust
                            block.end += adjust
                            dedup_window = extra_state.setdefault('webvtt_dedup_window', [])
                            cue = block.as_json
                            # skip the cue if an identical one appears
                            # in the window of potential duplicates
                            # and prune the window of unviable candidates
                            i = 0
                            skip = True
                            while i < len(dedup_window):
                                window_cue = dedup_window[i]
                                if window_cue == cue:
                                    break
                                if window_cue['end'] >= cue['start']:
                                    i += 1
                                    continue
                                del dedup_window[i]
                            else:
                                skip = False
                            if skip:
                                continue
                            # add the cue to the window
                            dedup_window.append(cue)
                        elif isinstance(block, webvtt.Magic):
                            # take care of MPEG PES timestamp overflow
                            if block.mpegts is None:
                                block.mpegts = 0
                            extra_state.setdefault('webvtt_mpegts_adjust', 0)
                            block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33
                            if block.mpegts < extra_state.get('webvtt_mpegts_last', 0):
                                extra_state['webvtt_mpegts_adjust'] += 1
                                block.mpegts += 1 << 33
                            extra_state['webvtt_mpegts_last'] = block.mpegts
                            if frag_index == 1:
                                extra_state['webvtt_mpegts'] = block.mpegts or 0
                                extra_state['webvtt_local'] = block.local or 0
                                # XXX: block.local = block.mpegts = None ?
                            else:
                                if block.mpegts is not None and block.local is not None:
                                    adjust = (
                                        (block.mpegts - extra_state.get('webvtt_mpegts', 0))
                                        - (block.local - extra_state.get('webvtt_local', 0))
                                    )
                                continue
                        elif isinstance(block, webvtt.HeaderBlock):
                            if frag_index != 1:
                                # XXX: this should probably be silent as well
                                # or verify that all segments contain the same data
                                self.report_warning(bug_reports_message(
                                    'Discarding a %s block found in the middle of the stream; '
                                    'if the subtitles display incorrectly,'
                                    % (type(block).__name__)))
                                continue
                        block.write_into(output)
                    return output.getvalue().encode('utf-8')
            def append_fragment(frag_content, frag_index):
                if frag_content:
                    fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
@ -315,6 +392,7 @@ def append_fragment(frag_content, frag_index):
                        file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
                        ctx['fragment_filename_sanitized'] = frag_sanitized
                        file.close()
                        frag_content = pack_fragment(frag_content, frag_index)
                        self._append_fragment(ctx, frag_content)
                        return True
                    except EnvironmentError as ose:
--- a/yt_dlp/downloader/ism.py
+++ b/yt_dlp/downloader/ism.py
@ -48,7 +48,7 @@ def write_piff_header(stream, params):
    language = params.get('language', 'und')
    height = params.get('height', 0)
    width = params.get('width', 0)
-    is_audio = width == 0 and height == 0
+    stream_type = params['stream_type']
    creation_time = modification_time = int(time.time())
    ftyp_payload = b'isml'  # major brand
@ -77,7 +77,7 @@ def write_piff_header(stream, params):
    tkhd_payload += u32.pack(0) * 2  # reserved
    tkhd_payload += s16.pack(0)  # layer
    tkhd_payload += s16.pack(0)  # alternate group
-    tkhd_payload += s88.pack(1 if is_audio else 0)  # volume
+    tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0)  # volume
    tkhd_payload += u16.pack(0)  # reserved
    tkhd_payload += unity_matrix
    tkhd_payload += u1616.pack(width)
@ -93,19 +93,34 @@ def write_piff_header(stream, params):
    mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload)  # Media Header Box
    hdlr_payload = u32.pack(0)  # pre defined
-    hdlr_payload += b'soun' if is_audio else b'vide'  # handler type
+    if stream_type == 'audio':  # handler type
-    hdlr_payload += u32.pack(0) * 3  # reserved
+        hdlr_payload += b'soun'
-    hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0'  # name
+        hdlr_payload += u32.pack(0) * 3  # reserved
        hdlr_payload += b'SoundHandler\0'  # name
    elif stream_type == 'video':
        hdlr_payload += b'vide'
        hdlr_payload += u32.pack(0) * 3  # reserved
        hdlr_payload += b'VideoHandler\0'  # name
    elif stream_type == 'text':
        hdlr_payload += b'subt'
        hdlr_payload += u32.pack(0) * 3  # reserved
        hdlr_payload += b'SubtitleHandler\0'  # name
    else:
        assert False
    mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload)  # Handler Reference Box
-    if is_audio:
+    if stream_type == 'audio':
        smhd_payload = s88.pack(0)  # balance
        smhd_payload += u16.pack(0)  # reserved
        media_header_box = full_box(b'smhd', 0, 0, smhd_payload)  # Sound Media Header
-    else:
+    elif stream_type == 'video':
        vmhd_payload = u16.pack(0)  # graphics mode
        vmhd_payload += u16.pack(0) * 3  # opcolor
        media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload)  # Video Media Header
    elif stream_type == 'text':
        media_header_box = full_box(b'sthd', 0, 0, b'')  # Subtitle Media Header
    else:
        assert False
    minf_payload = media_header_box
    dref_payload = u32.pack(1)  # entry count
@ -117,7 +132,7 @@ def write_piff_header(stream, params):
    sample_entry_payload = u8.pack(0) * 6  # reserved
    sample_entry_payload += u16.pack(1)  # data reference index
-    if is_audio:
+    if stream_type == 'audio':
        sample_entry_payload += u32.pack(0) * 2  # reserved
        sample_entry_payload += u16.pack(params.get('channels', 2))
        sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
@ -127,7 +142,7 @@ def write_piff_header(stream, params):
        if fourcc == 'AACL':
            sample_entry_box = box(b'mp4a', sample_entry_payload)
-    else:
+    elif stream_type == 'video':
        sample_entry_payload += u16.pack(0)  # pre defined
        sample_entry_payload += u16.pack(0)  # reserved
        sample_entry_payload += u32.pack(0) * 3  # pre defined
@ -155,6 +170,18 @@ def write_piff_header(stream, params):
            avcc_payload += pps
            sample_entry_payload += box(b'avcC', avcc_payload)  # AVC Decoder Configuration Record
            sample_entry_box = box(b'avc1', sample_entry_payload)  # AVC Simple Entry
        else:
            assert False
    elif stream_type == 'text':
        if fourcc == 'TTML':
            sample_entry_payload += b'http://www.w3.org/ns/ttml\0'  # namespace
            sample_entry_payload += b'\0'  # schema location
            sample_entry_payload += b'\0'  # auxilary mime types(??)
            sample_entry_box = box(b'stpp', sample_entry_payload)
        else:
            assert False
    else:
        assert False
    stsd_payload += sample_entry_box
    stbl_payload = full_box(b'stsd', 0, 0, stsd_payload)  # Sample Description Box
@ -221,10 +248,13 @@ def real_download(self, filename, info_dict):
        self._prepare_and_start_frag_download(ctx)
        extra_state = ctx.setdefault('extra_state', {
            'ism_track_written': False,
        })
        fragment_retries = self.params.get('fragment_retries', 0)
        skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
        track_written = False
        frag_index = 0
        for i, segment in enumerate(segments):
            frag_index += 1
@ -236,11 +266,11 @@ def real_download(self, filename, info_dict):
                    success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
                    if not success:
                        return False
-                    if not track_written:
+                    if not extra_state['ism_track_written']:
                        tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
                        info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
                        write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
-                        track_written = True
+                        extra_state['ism_track_written'] = True
                    self._append_fragment(ctx, frag_content)
                    break
                except compat_urllib_error.HTTPError as err:
--- a/yt_dlp/extractor/atresplayer.py
+++ b/yt_dlp/extractor/atresplayer.py
@ -86,18 +86,19 @@ def _real_extract(self, url):
        title = episode['titulo']
        formats = []
        subtitles = {}
        for source in episode.get('sources', []):
            src = source.get('src')
            if not src:
                continue
            src_type = source.get('type')
            if src_type == 'application/vnd.apple.mpegurl':
-                formats.extend(self._extract_m3u8_formats(
+                formats, subtitles = self._extract_m3u8_formats(
                    src, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id='hls', fatal=False))
+                    m3u8_id='hls', fatal=False)
            elif src_type == 'application/dash+xml':
-                formats.extend(self._extract_mpd_formats(
+                formats, subtitles = self._extract_mpd_formats(
-                    src, video_id, mpd_id='dash', fatal=False))
+                    src, video_id, mpd_id='dash', fatal=False)
        self._sort_formats(formats)
        heartbeat = episode.get('heartbeat') or {}
@ -115,4 +116,5 @@ def _real_extract(self, url):
            'channel': get_meta('channel'),
            'season': get_meta('season'),
            'episode_number': int_or_none(get_meta('episodeNumber')),
            'subtitles': subtitles,
        }
--- a/yt_dlp/extractor/byutv.py
+++ b/yt_dlp/extractor/byutv.py
@ -82,6 +82,7 @@ def _real_extract(self, url):
        info = {}
        formats = []
        subtitles = {}
        for format_id, ep in video.items():
            if not isinstance(ep, dict):
                continue
@ -90,12 +91,16 @@ def _real_extract(self, url):
                continue
            ext = determine_ext(video_url)
            if ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
+                m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False))
+                    m3u8_id='hls', fatal=False)
                formats.extend(m3u8_fmts)
                subtitles = self._merge_subtitles(subtitles, m3u8_subs)
            elif ext == 'mpd':
-                formats.extend(self._extract_mpd_formats(
+                mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles(
-                    video_url, video_id, mpd_id='dash', fatal=False))
+                    video_url, video_id, mpd_id='dash', fatal=False)
                formats.extend(mpd_fmts)
                subtitles = self._merge_subtitles(subtitles, mpd_subs)
            else:
                formats.append({
                    'url': video_url,
@ -114,4 +119,5 @@ def _real_extract(self, url):
            'display_id': display_id,
            'title': display_id,
            'formats': formats,
            'subtitles': subtitles,
        })
--- a/yt_dlp/extractor/canvas.py
+++ b/yt_dlp/extractor/canvas.py
@ -83,24 +83,31 @@ def _real_extract(self, url):
        description = data.get('description')
        formats = []
        subtitles = {}
        for target in data['targetUrls']:
            format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
            if not format_url or not format_type:
                continue
            format_type = format_type.upper()
            if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
-                formats.extend(self._extract_m3u8_formats(
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(
                    format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
-                    m3u8_id=format_type, fatal=False))
+                    m3u8_id=format_type, fatal=False)
                formats.extend(fmts)
                subtitles = self._merge_subtitles(subtitles, subs)
            elif format_type == 'HDS':
                formats.extend(self._extract_f4m_formats(
                    format_url, video_id, f4m_id=format_type, fatal=False))
            elif format_type == 'MPEG_DASH':
-                formats.extend(self._extract_mpd_formats(
+                fmts, subs = self._extract_mpd_formats_and_subtitles(
-                    format_url, video_id, mpd_id=format_type, fatal=False))
+                    format_url, video_id, mpd_id=format_type, fatal=False)
                formats.extend(fmts)
                subtitles = self._merge_subtitles(subtitles, subs)
            elif format_type == 'HSS':
-                formats.extend(self._extract_ism_formats(
+                fmts, subs = self._extract_ism_formats_and_subtitles(
-                    format_url, video_id, ism_id='mss', fatal=False))
+                    format_url, video_id, ism_id='mss', fatal=False)
                formats.extend(fmts)
                subtitles = self._merge_subtitles(subtitles, subs)
            else:
                formats.append({
                    'format_id': format_type,
@ -108,7 +115,6 @@ def _real_extract(self, url):
                })
        self._sort_formats(formats)
        subtitles = {}
        subtitle_urls = data.get('subtitleUrls')
        if isinstance(subtitle_urls, list):
            for subtitle in subtitle_urls:
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -1879,11 +1879,21 @@ def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, quality=None, m
            'format_note': 'Quality selection URL',
        }
-    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
+    def _extract_m3u8_formats(self, *args, **kwargs):
-                              entry_protocol='m3u8', preference=None, quality=None,
+        fmts, subs = self._extract_m3u8_formats_and_subtitles(*args, **kwargs)
-                              m3u8_id=None, note=None, errnote=None,
+        if subs:
-                              fatal=True, live=False, data=None, headers={},
+            self.report_warning(bug_reports_message(
-                              query={}):
+                "Ignoring subtitle tracks found in the HLS manifest; "
                "if any subtitle tracks are missing,"
            ))
        return fmts
    def _extract_m3u8_formats_and_subtitles(
            self, m3u8_url, video_id, ext=None, entry_protocol='m3u8',
            preference=None, quality=None, m3u8_id=None, note=None,
            errnote=None, fatal=True, live=False, data=None, headers={},
            query={}):
        res = self._download_webpage_handle(
            m3u8_url, video_id,
            note=note or 'Downloading m3u8 information',
@ -1891,30 +1901,34 @@ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
            fatal=fatal, data=data, headers=headers, query=query)
        if res is False:
-            return []
+            return [], {}
        m3u8_doc, urlh = res
        m3u8_url = urlh.geturl()
-        return self._parse_m3u8_formats(
+        return self._parse_m3u8_formats_and_subtitles(
            m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
            preference=preference, quality=quality, m3u8_id=m3u8_id,
            note=note, errnote=errnote, fatal=fatal, live=live, data=data,
            headers=headers, query=query, video_id=video_id)
-    def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
+    def _parse_m3u8_formats_and_subtitles(
-                            entry_protocol='m3u8', preference=None, quality=None,
+            self, m3u8_doc, m3u8_url, ext=None, entry_protocol='m3u8',
-                            m3u8_id=None, live=False, note=None, errnote=None,
+            preference=None, quality=None, m3u8_id=None, live=False, note=None,
-                            fatal=True, data=None, headers={}, query={}, video_id=None):
+            errnote=None, fatal=True, data=None, headers={}, query={},
            video_id=None):
        if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
-            return []
+            return [], {}
        if (not self._downloader.params.get('allow_unplayable_formats')
                and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)):  # Apple FairPlay
-            return []
+            return [], {}
        formats = []
        subtitles = {}
        format_url = lambda u: (
            u
            if re.match(r'^https?://', u)
@ -2001,7 +2015,7 @@ def _extract_m3u8_playlist_formats(format_url=None, m3u8_doc=None, video_id=None
                }
                formats.append(f)
-            return formats
+            return formats, subtitles
        groups = {}
        last_stream_inf = {}
@ -2013,6 +2027,21 @@ def extract_media(x_media_line):
            if not (media_type and group_id and name):
                return
            groups.setdefault(group_id, []).append(media)
            # <https://tools.ietf.org/html/rfc8216#section-4.3.4.1>
            if media_type == 'SUBTITLES':
                lang = media['LANGUAGE']  # XXX: normalise?
                url = format_url(media['URI'])
                sub_info = {
                    'url': url,
                    'ext': determine_ext(url),
                }
                if sub_info['ext'] == 'm3u8':
                    # Per RFC 8216 §3.1, the only possible subtitle format m3u8
                    # files may contain is WebVTT:
                    # <https://tools.ietf.org/html/rfc8216#section-3.1>
                    sub_info['ext'] = 'vtt'
                    sub_info['protocol'] = 'm3u8_native'
                subtitles.setdefault(lang, []).append(sub_info)
            if media_type not in ('VIDEO', 'AUDIO'):
                return
            media_url = media.get('URI')
@ -2160,7 +2189,7 @@ def build_stream_name():
                        formats.append(http_f)
                last_stream_inf = {}
-        return formats
+        return formats, subtitles
    @staticmethod
    def _xpath_ns(path, namespace=None):
@ -2403,23 +2432,44 @@ def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
            })
        return entries
-    def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+    def _extract_mpd_formats(self, *args, **kwargs):
        fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs)
        if subs:
            self.report_warning(bug_reports_message(
                "Ignoring subtitle tracks found in the DASH manifest; "
                "if any subtitle tracks are missing,"
            ))
        return fmts
    def _extract_mpd_formats_and_subtitles(
            self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
            fatal=True, data=None, headers={}, query={}):
        res = self._download_xml_handle(
            mpd_url, video_id,
            note=note or 'Downloading MPD manifest',
            errnote=errnote or 'Failed to download MPD manifest',
            fatal=fatal, data=data, headers=headers, query=query)
        if res is False:
-            return []
+            return [], {}
        mpd_doc, urlh = res
        if mpd_doc is None:
-            return []
+            return [], {}
        mpd_base_url = base_url(urlh.geturl())
-        return self._parse_mpd_formats(
+        return self._parse_mpd_formats_and_subtitles(
            mpd_doc, mpd_id, mpd_base_url, mpd_url)
-    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
+    def _parse_mpd_formats(self, *args, **kwargs):
        fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
        if subs:
            self.report_warning(bug_reports_message(
                "Ignoring subtitle tracks found in the DASH manifest; "
                "if any subtitle tracks are missing,"
            ))
        return fmts
    def _parse_mpd_formats_and_subtitles(
            self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
        """
        Parse formats from MPD manifest.
        References:
@ -2429,7 +2479,7 @@ def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None
        """
        if not self._downloader.params.get('dynamic_mpd', True):
            if mpd_doc.get('type') == 'dynamic':
-                return []
+                return [], {}
        namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
@ -2501,6 +2551,7 @@ def extract_Initialization(source):
        mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
        formats = []
        subtitles = {}
        for period in mpd_doc.findall(_add_ns('Period')):
            period_duration = parse_duration(period.get('duration')) or mpd_duration
            period_ms_info = extract_multisegment_info(period, {
@ -2518,11 +2569,9 @@ def extract_Initialization(source):
                    representation_attrib.update(representation.attrib)
                    # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
                    mime_type = representation_attrib['mimeType']
-                    content_type = mime_type.split('/')[0]
+                    content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
-                    if content_type == 'text':
+
-                        # TODO implement WebVTT downloading
+                    if content_type in ('video', 'audio', 'text'):
                        pass
                    elif content_type in ('video', 'audio'):
                        base_url = ''
                        for element in (representation, adaptation_set, period, mpd_doc):
                            base_url_e = element.find(_add_ns('BaseURL'))
@ -2539,21 +2588,28 @@ def extract_Initialization(source):
                        url_el = representation.find(_add_ns('BaseURL'))
                        filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
                        bandwidth = int_or_none(representation_attrib.get('bandwidth'))
-                        f = {
+                        if content_type in ('video', 'audio'):
-                            'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
+                            f = {
-                            'manifest_url': mpd_url,
+                                'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
-                            'ext': mimetype2ext(mime_type),
+                                'manifest_url': mpd_url,
-                            'width': int_or_none(representation_attrib.get('width')),
+                                'ext': mimetype2ext(mime_type),
-                            'height': int_or_none(representation_attrib.get('height')),
+                                'width': int_or_none(representation_attrib.get('width')),
-                            'tbr': float_or_none(bandwidth, 1000),
+                                'height': int_or_none(representation_attrib.get('height')),
-                            'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
+                                'tbr': float_or_none(bandwidth, 1000),
-                            'fps': int_or_none(representation_attrib.get('frameRate')),
+                                'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
-                            'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
+                                'fps': int_or_none(representation_attrib.get('frameRate')),
-                            'format_note': 'DASH %s' % content_type,
+                                'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
-                            'filesize': filesize,
+                                'format_note': 'DASH %s' % content_type,
-                            'container': mimetype2ext(mime_type) + '_dash',
+                                'filesize': filesize,
-                        }
+                                'container': mimetype2ext(mime_type) + '_dash',
-                        f.update(parse_codecs(representation_attrib.get('codecs')))
+                            }
                            f.update(parse_codecs(representation_attrib.get('codecs')))
                        elif content_type == 'text':
                            f = {
                                'ext': mimetype2ext(mime_type),
                                'manifest_url': mpd_url,
                                'filesize': filesize,
                            }
                        representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
                        def prepare_template(template_name, identifiers):
@ -2700,26 +2756,38 @@ def add_segment_url():
                        else:
                            # Assuming direct URL to unfragmented media.
                            f['url'] = base_url
-                        formats.append(f)
+                        if content_type in ('video', 'audio'):
                            formats.append(f)
                        elif content_type == 'text':
                            subtitles.setdefault(lang or 'und', []).append(f)
                    else:
                        self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
-        return formats
+        return formats, subtitles
-    def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+    def _extract_ism_formats(self, *args, **kwargs):
        fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
        if subs:
            self.report_warning(bug_reports_message(
                "Ignoring subtitle tracks found in the ISM manifest; "
                "if any subtitle tracks are missing,"
            ))
        return fmts
    def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
        res = self._download_xml_handle(
            ism_url, video_id,
            note=note or 'Downloading ISM manifest',
            errnote=errnote or 'Failed to download ISM manifest',
            fatal=fatal, data=data, headers=headers, query=query)
        if res is False:
-            return []
+            return [], {}
        ism_doc, urlh = res
        if ism_doc is None:
-            return []
+            return [], {}
-        return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
+        return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
-    def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
+    def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
        """
        Parse formats from ISM manifest.
        References:
@ -2727,26 +2795,28 @@ def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
            https://msdn.microsoft.com/en-us/library/ff469518.aspx
        """
        if ism_doc.get('IsLive') == 'TRUE':
-            return []
+            return [], {}
        if (not self._downloader.params.get('allow_unplayable_formats')
                and ism_doc.find('Protection') is not None):
-            return []
+            return [], {}
        duration = int(ism_doc.attrib['Duration'])
        timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
        formats = []
        subtitles = {}
        for stream in ism_doc.findall('StreamIndex'):
            stream_type = stream.get('Type')
-            if stream_type not in ('video', 'audio'):
+            if stream_type not in ('video', 'audio', 'text'):
                continue
            url_pattern = stream.attrib['Url']
            stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
            stream_name = stream.get('Name')
            stream_language = stream.get('Language', 'und')
            for track in stream.findall('QualityLevel'):
                fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
                # TODO: add support for WVC1 and WMAP
-                if fourcc not in ('H264', 'AVC1', 'AACL'):
+                if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML'):
                    self.report_warning('%s is not a supported codec' % fourcc)
                    continue
                tbr = int(track.attrib['Bitrate']) // 1000
@ -2789,33 +2859,52 @@ def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
                    format_id.append(stream_name)
                format_id.append(compat_str(tbr))
-                formats.append({
+                if stream_type == 'text':
-                    'format_id': '-'.join(format_id),
+                    subtitles.setdefault(stream_language, []).append({
-                    'url': ism_url,
+                        'ext': 'ismt',
-                    'manifest_url': ism_url,
+                        'protocol': 'ism',
-                    'ext': 'ismv' if stream_type == 'video' else 'isma',
+                        'url': ism_url,
-                    'width': width,
+                        'manifest_url': ism_url,
-                    'height': height,
+                        'fragments': fragments,
-                    'tbr': tbr,
+                        '_download_params': {
-                    'asr': sampling_rate,
+                            'stream_type': stream_type,
-                    'vcodec': 'none' if stream_type == 'audio' else fourcc,
+                            'duration': duration,
-                    'acodec': 'none' if stream_type == 'video' else fourcc,
+                            'timescale': stream_timescale,
-                    'protocol': 'ism',
+                            'fourcc': fourcc,
-                    'fragments': fragments,
+                            'language': stream_language,
-                    '_download_params': {
+                            'codec_private_data': track.get('CodecPrivateData'),
-                        'duration': duration,
+                        }
-                        'timescale': stream_timescale,
+                    })
-                        'width': width or 0,
+                elif stream_type in ('video', 'audio'):
-                        'height': height or 0,
+                    formats.append({
-                        'fourcc': fourcc,
+                        'format_id': '-'.join(format_id),
-                        'codec_private_data': track.get('CodecPrivateData'),
+                        'url': ism_url,
-                        'sampling_rate': sampling_rate,
+                        'manifest_url': ism_url,
-                        'channels': int_or_none(track.get('Channels', 2)),
+                        'ext': 'ismv' if stream_type == 'video' else 'isma',
-                        'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
+                        'width': width,
-                        'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
+                        'height': height,
-                    },
+                        'tbr': tbr,
-                })
+                        'asr': sampling_rate,
-        return formats
+                        'vcodec': 'none' if stream_type == 'audio' else fourcc,
                        'acodec': 'none' if stream_type == 'video' else fourcc,
                        'protocol': 'ism',
                        'fragments': fragments,
                        '_download_params': {
                            'stream_type': stream_type,
                            'duration': duration,
                            'timescale': stream_timescale,
                            'width': width or 0,
                            'height': height or 0,
                            'fourcc': fourcc,
                            'language': stream_language,
                            'codec_private_data': track.get('CodecPrivateData'),
                            'sampling_rate': sampling_rate,
                            'channels': int_or_none(track.get('Channels', 2)),
                            'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
                            'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
                        },
                    })
        return formats, subtitles
    def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None, quality=None):
        def absolute_url(item_url):
@ -2940,7 +3029,16 @@ def _media_formats(src, cur_media_type, type_info={}):
                entries.append(media_info)
        return entries
-    def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
+    def _extract_akamai_formats(self, *args, **kwargs):
        fmts, subs = self._extract_akamai_formats_and_subtitles(*args, **kwargs)
        if subs:
            self.report_warning(bug_reports_message(
                "Ignoring subtitle tracks found in the manifests; "
                "if any subtitle tracks are missing,"
            ))
        return fmts
    def _extract_akamai_formats_and_subtitles(self, manifest_url, video_id, hosts={}):
        signed = 'hdnea=' in manifest_url
        if not signed:
            # https://learn.akamai.com/en-us/webhelp/media-services-on-demand/stream-packaging-user-guide/GUID-BE6C0F73-1E06-483B-B0EA-57984B91B7F9.html
@ -2949,6 +3047,7 @@ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
                '', manifest_url).strip('?')
        formats = []
        subtitles = {}
        hdcore_sign = 'hdcore=3.7.0'
        f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
@ -2967,10 +3066,11 @@ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
        hls_host = hosts.get('hls')
        if hls_host:
            m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
-        m3u8_formats = self._extract_m3u8_formats(
+        m3u8_formats, m3u8_subtitles = self._extract_m3u8_formats_and_subtitles(
            m3u8_url, video_id, 'mp4', 'm3u8_native',
            m3u8_id='hls', fatal=False)
        formats.extend(m3u8_formats)
        subtitles = self._merge_subtitles(subtitles, m3u8_subtitles)
        http_host = hosts.get('http')
        if http_host and m3u8_formats and not signed:
@ -2994,7 +3094,7 @@ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
                            formats.append(http_f)
                        i += 1
-        return formats
+        return formats, subtitles
    def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
        query = compat_urlparse.urlparse(url).query
@ -3319,12 +3419,22 @@ def _merge_subtitle_items(subtitle_list1, subtitle_list2):
        return ret
    @classmethod
-    def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
+    def _merge_subtitles(cls, *dicts, **kwargs):
-        """ Merge two subtitle dictionaries, language by language. """
+        """ Merge subtitle dictionaries, language by language. """
-        ret = dict(subtitle_dict1)
+
-        for lang in subtitle_dict2:
+        target = (lambda target=None: target)(**kwargs)
-            ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
+        # The above lambda extracts the keyword argument 'target' from kwargs
-        return ret
+        # while ensuring there are no stray ones. When Python 2 support
        # is dropped, remove it and change the function signature to:
        #
        #     def _merge_subtitles(cls, *dicts, target=None):
        if target is None:
            target = {}
        for d in dicts:
            for lang, subs in d.items():
                target[lang] = cls._merge_subtitle_items(target.get(lang, []), subs)
        return target
    def extract_automatic_captions(self, *args, **kwargs):
        if (self._downloader.params.get('writeautomaticsub', False)
--- a/yt_dlp/extractor/elonet.py
+++ b/yt_dlp/extractor/elonet.py
@ -1,9 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import os
 import re
 import tempfile
 from .common import InfoExtractor
 from ..utils import (
@ -12,12 +10,12 @@
    try_get,
 )
 from ..compat import compat_str
 from ..downloader.hls import HlsFD
 class ElonetIE(InfoExtractor):
    _VALID_URL = r'https?://elonet\.finna\.fi/Record/kavi\.elonet_elokuva_(?P<id>[0-9]+)'
-    _TEST = {
+    _TESTS = [{
        # m3u8 with subtitles
        'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_107867',
        'md5': '8efc954b96c543711707f87de757caea',
        'info_dict': {
@ -27,62 +25,17 @@ class ElonetIE(InfoExtractor):
            'description': 'Valkoinen peura (1952) on Erik Blombergin ohjaama ja yhdessä Mirjami Kuosmasen kanssa käsikirjoittama tarunomainen kertomus valkoisen peuran hahmossa lii...',
            'thumbnail': 'https://elonet.finna.fi/Cover/Show?id=kavi.elonet_elokuva_107867&index=0&size=large',
        },
-    }
+    }, {
-
+        # DASH with subtitles
-    def _download_m3u8_chunked_subtitle(self, chunklist_url):
+        'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_116539',
-        """
+        'info_dict': {
-        Download VTT subtitles from pieces in manifest URL.
+            'id': '116539',
-        Return a string containing joined chunks with extra headers removed.
+            'ext': 'mp4',
-        """
+            'title': 'Minulla on tiikeri',
-        with tempfile.NamedTemporaryFile(delete=True) as outfile:
+            'description': 'Pienellä pojalla, joka asuu kerrostalossa, on kotieläimenä tiikeri. Se on kuitenkin salaisuus. Kerrostalon räpätäti on Kotilaisen täti, joka on aina vali...',
-            fname = outfile.name
+            'thumbnail': 'https://elonet.finna.fi/Cover/Show?id=kavi.elonet_elokuva_116539&index=0&size=large&source=Solr',
-        hlsdl = HlsFD(self._downloader, {})
+        }
-        hlsdl.download(compat_str(fname), {"url": chunklist_url})
+    }]
        with open(fname, 'r') as fin:
            # Remove (some) headers
            fdata = re.sub(r'X-TIMESTAMP-MAP.*\n+|WEBVTT\n+', '', fin.read())
        os.remove(fname)
        return "WEBVTT\n\n" + fdata
    def _parse_m3u8_subtitles(self, m3u8_doc, m3u8_url):
        """
        Parse subtitles from HLS / m3u8 manifest.
        """
        subtitles = {}
        baseurl = m3u8_url[:m3u8_url.rindex('/') + 1]
        for line in m3u8_doc.split('\n'):
            if 'EXT-X-MEDIA:TYPE=SUBTITLES' in line:
                lang = self._search_regex(
                    r'LANGUAGE="(.+?)"', line, 'lang', default=False)
                uri = self._search_regex(
                    r'URI="(.+?)"', line, 'uri', default=False)
                if lang and uri:
                    data = self._download_m3u8_chunked_subtitle(baseurl + uri)
                    subtitles[lang] = [{'ext': 'vtt', 'data': data}]
        return subtitles
    def _parse_mpd_subtitles(self, mpd_doc):
        """
        Parse subtitles from MPD manifest.
        """
        ns = '{urn:mpeg:dash:schema:mpd:2011}'
        subtitles = {}
        for aset in mpd_doc.findall(".//%sAdaptationSet[@mimeType='text/vtt']" % (ns)):
            lang = aset.attrib.get('lang', 'unk')
            url = aset.find("./%sRepresentation/%sBaseURL" % (ns, ns)).text
            subtitles[lang] = [{'ext': 'vtt', 'url': url}]
        return subtitles
    def _get_subtitles(self, fmt, doc, url):
        if fmt == 'm3u8':
            subs = self._parse_m3u8_subtitles(doc, url)
        elif fmt == 'mpd':
            subs = self._parse_mpd_subtitles(doc)
        else:
            self.report_warning(
                "Cannot download subtitles from '%s' streams." % (fmt))
            subs = {}
        return subs
    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -101,8 +54,8 @@ def _real_extract(self, url):
            self._parse_json(json_s, video_id),
            lambda x: x[0]["src"], compat_str)
        formats = []
        subtitles = {}
        if re.search(r'\.m3u8\??', src):
            fmt = 'm3u8'
            res = self._download_webpage_handle(
                # elonet servers have certificate problems
                src.replace('https:', 'http:'), video_id,
@ -111,11 +64,10 @@ def _real_extract(self, url):
            if res:
                doc, urlh = res
                url = urlh.geturl()
-                formats = self._parse_m3u8_formats(doc, url)
+                formats, subtitles = self._parse_m3u8_formats_and_subtitles(doc, url)
                for f in formats:
                    f['ext'] = 'mp4'
        elif re.search(r'\.mpd\??', src):
            fmt = 'mpd'
            res = self._download_xml_handle(
                src, video_id,
                note='Downloading MPD manifest',
@ -123,7 +75,7 @@ def _real_extract(self, url):
            if res:
                doc, urlh = res
                url = base_url(urlh.geturl())
-                formats = self._parse_mpd_formats(doc, mpd_base_url=url)
+                formats, subtitles = self._parse_mpd_formats_and_subtitles(doc, mpd_base_url=url)
        else:
            raise ExtractorError("Unknown streaming format")
@ -133,5 +85,5 @@ def _real_extract(self, url):
            'description': description,
            'thumbnail': thumbnail,
            'formats': formats,
-            'subtitles': self.extract_subtitles(fmt, doc, url),
+            'subtitles': subtitles,
        }
--- a/yt_dlp/extractor/francetv.py
+++ b/yt_dlp/extractor/francetv.py
@ -151,6 +151,7 @@ def sign(manifest_url, manifest_id):
                    videos.append(fallback_info['video'])
        formats = []
        subtitles = {}
        for video in videos:
            video_url = video.get('url')
            if not video_url:
@ -171,10 +172,12 @@ def sign(manifest_url, manifest_id):
                    sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
                    video_id, f4m_id=format_id, fatal=False))
            elif ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
+                m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
                    sign(video_url, format_id), video_id, 'mp4',
                    entry_protocol='m3u8_native', m3u8_id=format_id,
-                    fatal=False))
+                    fatal=False)
                formats.extend(m3u8_fmts)
                subtitles = self._merge_subtitles(subtitles, m3u8_subs)
            elif ext == 'mpd':
                formats.extend(self._extract_mpd_formats(
                    sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
@ -199,13 +202,12 @@ def sign(manifest_url, manifest_id):
            title += ' - %s' % subtitle
        title = title.strip()
-        subtitles = {}
+        subtitles.setdefault('fr', []).extend(
-        subtitles_list = [{
+            [{
-            'url': subformat['url'],
+                'url': subformat['url'],
-            'ext': subformat.get('format'),
+                'ext': subformat.get('format'),
-        } for subformat in info.get('subtitles', []) if subformat.get('url')]
+            } for subformat in info.get('subtitles', []) if subformat.get('url')]
-        if subtitles_list:
+        )
            subtitles['fr'] = subtitles_list
        return {
            'id': video_id,
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@ -2444,8 +2444,9 @@ def _real_extract(self, url):
        m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
        if m:
            format_id = compat_str(m.group('format_id'))
            subtitles = {}
            if format_id.endswith('mpegurl'):
-                formats = self._extract_m3u8_formats(url, video_id, 'mp4')
+                formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
            elif format_id == 'f4m':
                formats = self._extract_f4m_formats(url, video_id)
            else:
@ -2457,6 +2458,7 @@ def _real_extract(self, url):
                info_dict['direct'] = True
            self._sort_formats(formats)
            info_dict['formats'] = formats
            info_dict['subtitles'] = subtitles
            return info_dict
        if not self._downloader.params.get('test', False) and not is_intentional:
@ -2510,7 +2512,7 @@ def _real_extract(self, url):
            if doc.tag == 'rss':
                return self._extract_rss(url, video_id, doc)
            elif doc.tag == 'SmoothStreamingMedia':
-                info_dict['formats'] = self._parse_ism_formats(doc, url)
+                info_dict['formats'], info_dict['subtitles'] = self._parse_ism_formats_and_subtitles(doc, url)
                self._sort_formats(info_dict['formats'])
                return info_dict
            elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
@ -2524,7 +2526,7 @@ def _real_extract(self, url):
                        xspf_base_url=full_response.geturl()),
                    video_id)
            elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
-                info_dict['formats'] = self._parse_mpd_formats(
+                info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
                    doc,
                    mpd_base_url=full_response.geturl().rpartition('/')[0],
                    mpd_url=url)
--- a/yt_dlp/extractor/nytimes.py
+++ b/yt_dlp/extractor/nytimes.py
@ -46,6 +46,7 @@ def get_file_size(file_size):
        urls = []
        formats = []
        subtitles = {}
        for video in video_data.get('renditions', []):
            video_url = video.get('url')
            format_id = video.get('type')
@ -54,9 +55,11 @@ def get_file_size(file_size):
            urls.append(video_url)
            ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url)
            if ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
+                m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
                    video_url, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id=format_id or 'hls', fatal=False))
+                    m3u8_id=format_id or 'hls', fatal=False)
                formats.extend(m3u8_fmts)
                subtitles = self._merge_subtitles(subtitles, m3u8_subs)
            elif ext == 'mpd':
                continue
            #     formats.extend(self._extract_mpd_formats(
@ -96,6 +99,7 @@ def get_file_size(file_size):
            'uploader': video_data.get('byline'),
            'duration': float_or_none(video_data.get('duration'), 1000),
            'formats': formats,
            'subtitles': subtitles,
            'thumbnails': thumbnails,
        }
--- a/yt_dlp/extractor/roosterteeth.py
+++ b/yt_dlp/extractor/roosterteeth.py
@ -103,7 +103,7 @@ def _real_extract(self, url):
                api_episode_url + '/videos', display_id,
                'Downloading video JSON metadata')['data'][0]
            m3u8_url = video_data['attributes']['url']
-            subtitle_m3u8_url = video_data['links']['download']
+            # XXX: additional URL at video_data['links']['download']
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                if self._parse_json(e.cause.read().decode(), display_id).get('access') is False:
@ -111,7 +111,7 @@ def _real_extract(self, url):
                        '%s is only available for FIRST members' % display_id)
            raise
-        formats = self._extract_m3u8_formats(
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
            m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
        self._sort_formats(formats)
@ -134,33 +134,6 @@ def _real_extract(self, url):
                            'url': img_url,
                        })
        subtitles = {}
        res = self._download_webpage_handle(
            subtitle_m3u8_url, display_id,
            'Downloading m3u8 information',
            'Failed to download m3u8 information',
            fatal=True, data=None, headers={}, query={})
        if res is not False:
            subtitle_m3u8_doc, _ = res
            for line in subtitle_m3u8_doc.split('\n'):
                if 'EXT-X-MEDIA:TYPE=SUBTITLES' in line:
                    parts = line.split(',')
                    for part in parts:
                        if 'LANGUAGE' in part:
                            lang = part[part.index('=') + 2:-1]
                        elif 'URI' in part:
                            uri = part[part.index('=') + 2:-1]
                    res = self._download_webpage_handle(
                        uri, display_id,
                        'Downloading m3u8 information',
                        'Failed to download m3u8 information',
                        fatal=True, data=None, headers={}, query={})
                    doc, _ = res
                    for l in doc.split('\n'):
                        if not l.startswith('#'):
                            subtitles[lang] = [{'url': uri[:-uri[::-1].index('/')] + l}]
                            break
        return {
            'id': video_id,
            'display_id': display_id,
--- a/yt_dlp/extractor/srgssr.py
+++ b/yt_dlp/extractor/srgssr.py
@ -87,6 +87,7 @@ def _real_extract(self, url):
        title = media_data['title']
        formats = []
        subtitles = {}
        q = qualities(['SD', 'HD'])
        for source in (media_data.get('resourceList') or []):
            format_url = source.get('url')
@ -104,12 +105,16 @@ def _real_extract(self, url):
                if source.get('tokenType') == 'AKAMAI':
                    format_url = self._get_tokenized_src(
                        format_url, media_id, format_id)
-                    formats.extend(self._extract_akamai_formats(
+                    fmts, subs = self._extract_akamai_formats_and_subtitles(
-                        format_url, media_id))
+                        format_url, media_id)
                    formats.extend(fmts)
                    subtitles = self._merge_subtitles(subtitles, subs)
                elif protocol == 'HLS':
-                    formats.extend(self._extract_m3u8_formats(
+                    m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
                        format_url, media_id, 'mp4', 'm3u8_native',
-                        m3u8_id=format_id, fatal=False))
+                        m3u8_id=format_id, fatal=False)
                    formats.extend(m3u8_fmts)
                    subtitles = self._merge_subtitles(subtitles, m3u8_subs)
            elif protocol in ('HTTP', 'HTTPS'):
                formats.append({
                    'format_id': format_id,
@ -133,7 +138,6 @@ def _real_extract(self, url):
                })
        self._sort_formats(formats)
        subtitles = {}
        if media_type == 'video':
            for sub in (media_data.get('subtitleList') or []):
                sub_url = sub.get('url')
--- a/yt_dlp/extractor/threeqsdn.py
+++ b/yt_dlp/extractor/threeqsdn.py
@ -99,16 +99,21 @@ def _real_extract(self, url):
        aspect = float_or_none(config.get('aspect'))
        formats = []
        subtitles = {}
        for source_type, source in (config.get('sources') or {}).items():
            if not source:
                continue
            if source_type == 'dash':
-                formats.extend(self._extract_mpd_formats(
+                fmts, subs = self._extract_mpd_formats_and_subtitles(
-                    source, video_id, mpd_id='mpd', fatal=False))
+                    source, video_id, mpd_id='mpd', fatal=False)
                formats.extend(fmts)
                subtitles = self._merge_subtitles(subtitles, subs)
            elif source_type == 'hls':
-                formats.extend(self._extract_m3u8_formats(
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(
                    source, video_id, 'mp4', 'm3u8' if live else 'm3u8_native',
-                    m3u8_id='hls', fatal=False))
+                    m3u8_id='hls', fatal=False)
                formats.extend(fmts)
                subtitles = self._merge_subtitles(subtitles, subs)
            elif source_type == 'progressive':
                for s in source:
                    src = s.get('src')
@ -138,7 +143,6 @@ def _real_extract(self, url):
        # behaviour is being kept as-is
        self._sort_formats(formats, ('res', 'source_preference'))
        subtitles = {}
        for subtitle in (config.get('subtitles') or []):
            src = subtitle.get('src')
            if not src:
--- a/yt_dlp/extractor/tv4.py
+++ b/yt_dlp/extractor/tv4.py
@ -93,18 +93,31 @@ def _real_extract(self, url):
                'device': 'browser',
                'protocol': 'hls',
            })['playbackItem']['manifestUrl']
-        formats = self._extract_m3u8_formats(
+        formats = []
        subtitles = {}
        fmts, subs = self._extract_m3u8_formats_and_subtitles(
            manifest_url, video_id, 'mp4',
            'm3u8_native', m3u8_id='hls', fatal=False)
-        formats.extend(self._extract_mpd_formats(
+        formats.extend(fmts)
        subtitles = self._merge_subtitles(subtitles, subs)
        fmts, subs = self._extract_mpd_formats_and_subtitles(
            manifest_url.replace('.m3u8', '.mpd'),
-            video_id, mpd_id='dash', fatal=False))
+            video_id, mpd_id='dash', fatal=False)
-        formats.extend(self._extract_f4m_formats(
+        formats.extend(fmts)
        subtitles = self._merge_subtitles(subtitles, subs)
        fmts = self._extract_f4m_formats(
            manifest_url.replace('.m3u8', '.f4m'),
-            video_id, f4m_id='hds', fatal=False))
+            video_id, f4m_id='hds', fatal=False)
-        formats.extend(self._extract_ism_formats(
+        formats.extend(fmts)
        fmts, subs = self._extract_ism_formats_and_subtitles(
            re.sub(r'\.ism/.*?\.m3u8', r'.ism/Manifest', manifest_url),
-            video_id, ism_id='mss', fatal=False))
+            video_id, ism_id='mss', fatal=False)
        formats.extend(fmts)
        subtitles = self._merge_subtitles(subtitles, subs)
        if not formats and info.get('is_geo_restricted'):
            self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
@ -115,7 +128,7 @@ def _real_extract(self, url):
            'id': video_id,
            'title': title,
            'formats': formats,
-            # 'subtitles': subtitles,
+            'subtitles': subtitles,
            'description': info.get('description'),
            'timestamp': parse_iso8601(info.get('broadcast_date_time')),
            'duration': int_or_none(info.get('duration')),
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@ -36,9 +36,9 @@ class TwitterBaseIE(InfoExtractor):
    def _extract_variant_formats(self, variant, video_id):
        variant_url = variant.get('url')
        if not variant_url:
-            return []
+            return [], {}
        elif '.m3u8' in variant_url:
-            return self._extract_m3u8_formats(
+            return self._extract_m3u8_formats_and_subtitles(
                variant_url, video_id, 'mp4', 'm3u8_native',
                m3u8_id='hls', fatal=False)
        else:
@ -49,22 +49,27 @@ def _extract_variant_formats(self, variant, video_id):
                'tbr': tbr,
            }
            self._search_dimensions_in_video_url(f, variant_url)
-            return [f]
+            return [f], {}
    def _extract_formats_from_vmap_url(self, vmap_url, video_id):
        vmap_data = self._download_xml(vmap_url, video_id)
        formats = []
        subtitles = {}
        urls = []
        for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
            video_variant.attrib['url'] = compat_urllib_parse_unquote(
                video_variant.attrib['url'])
            urls.append(video_variant.attrib['url'])
-            formats.extend(self._extract_variant_formats(
+            fmts, subs = self._extract_variant_formats(
-                video_variant.attrib, video_id))
+                video_variant.attrib, video_id)
            formats.extend(fmts)
            subtitles = self._merge_subtitles(subtitles, subs)
        video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
        if video_url not in urls:
-            formats.extend(self._extract_variant_formats({'url': video_url}, video_id))
+            fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
-        return formats
+            formats.extend(fmts)
            subtitles = self._merge_subtitles(subtitles, subs)
        return formats, subtitles
    @staticmethod
    def _search_dimensions_in_video_url(a_format, video_url):
@ -471,8 +476,11 @@ def extract_from_video_info(media):
            video_info = media.get('video_info') or {}
            formats = []
            subtitles = {}
            for variant in video_info.get('variants', []):
-                formats.extend(self._extract_variant_formats(variant, twid))
+                fmts, subs = self._extract_variant_formats(variant, twid)
                subtitles = self._merge_subtitles(subtitles, subs)
                formats.extend(fmts)
            self._sort_formats(formats)
            thumbnails = []
@ -491,6 +499,7 @@ def add_thumbnail(name, size):
            info.update({
                'formats': formats,
                'subtitles': subtitles,
                'thumbnails': thumbnails,
                'duration': float_or_none(video_info.get('duration_millis'), 1000),
            })
@ -540,7 +549,7 @@ def get_binding_value(k):
                    is_amplify = card_name == 'amplify'
                    vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
                    content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
-                    formats = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
+                    formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
                    self._sort_formats(formats)
                    thumbnails = []
@ -558,6 +567,7 @@ def get_binding_value(k):
                    info.update({
                        'formats': formats,
                        'subtitles': subtitles,
                        'thumbnails': thumbnails,
                        'duration': int_or_none(get_binding_value(
                            'content_duration_seconds')),
--- a/yt_dlp/extractor/uplynk.py
+++ b/yt_dlp/extractor/uplynk.py
@ -30,7 +30,7 @@ class UplynkIE(InfoExtractor):
    def _extract_uplynk_info(self, uplynk_content_url):
        path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups()
        display_id = video_id or external_id
-        formats = self._extract_m3u8_formats(
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
            'http://content.uplynk.com/%s.m3u8' % path,
            display_id, 'mp4', 'm3u8_native')
        if session_id:
@ -48,6 +48,7 @@ def _extract_uplynk_info(self, uplynk_content_url):
            'duration': float_or_none(asset.get('duration')),
            'uploader_id': asset.get('owner'),
            'formats': formats,
            'subtitles': subtitles,
        }
    def _real_extract(self, url):
--- a/yt_dlp/extractor/wat.py
+++ b/yt_dlp/extractor/wat.py
@ -69,19 +69,24 @@ def _real_extract(self, url):
        title = video_info['title']
        formats = []
        subtitles = {}
        def extract_formats(manifest_urls):
            for f, f_url in manifest_urls.items():
                if not f_url:
                    continue
                if f in ('dash', 'mpd'):
-                    formats.extend(self._extract_mpd_formats(
+                    fmts, subs = self._extract_mpd_formats_and_subtitles(
                        f_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
-                        video_id, mpd_id='dash', fatal=False))
+                        video_id, mpd_id='dash', fatal=False)
                elif f == 'hls':
-                    formats.extend(self._extract_m3u8_formats(
+                    fmts, subs = self._extract_m3u8_formats_and_subtitles(
                        f_url, video_id, 'mp4',
-                        'm3u8_native', m3u8_id='hls', fatal=False))
+                        'm3u8_native', m3u8_id='hls', fatal=False)
                else:
                    continue
                formats.extend(fmts)
                self._merge_subtitles(subs, target=subtitles)
        delivery = video_data.get('delivery') or {}
        extract_formats({delivery.get('format'): delivery.get('url')})
@ -103,4 +108,5 @@ def extract_formats(manifest_urls):
                video_data, lambda x: x['mediametrie']['chapters'][0]['estatS4'])),
            'duration': int_or_none(video_info.get('duration')),
            'formats': formats,
            'subtitles': subtitles,
        }
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@ -2340,15 +2340,20 @@ def make_HTTPS_handler(params, **kwargs):
        return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
-def bug_reports_message():
+def bug_reports_message(before=';'):
    if ytdl_is_updateable():
        update_cmd = 'type  yt-dlp -U  to update'
    else:
        update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
-    msg = '; please report this issue on https://github.com/yt-dlp/yt-dlp .'
+    msg = 'please report this issue on  https://github.com/yt-dlp/yt-dlp .'
    msg += ' Make sure you are using the latest version; %s.' % update_cmd
    msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
-    return msg
+
    before = before.rstrip()
    if not before or before.endswith(('.', '!', '?')):
        msg = msg[0].title() + msg[1:]
    return (before + ' ' if before else '') + msg
 class YoutubeDLError(Exception):
--- a/yt_dlp/webvtt.py
+++ b/yt_dlp/webvtt.py
@ -0,0 +1,378 @@
 # coding: utf-8
 from __future__ import unicode_literals, print_function, division
 """
 A partial parser for WebVTT segments. Interprets enough of the WebVTT stream
 to be able to assemble a single stand-alone subtitle file, suitably adjusting
 timestamps on the way, while everything else is passed through unmodified.
 Regular expressions based on the W3C WebVTT specification
 <https://www.w3.org/TR/webvtt1/>. The X-TIMESTAMP-MAP extension is described
 in RFC 8216 §3.5 <https://tools.ietf.org/html/rfc8216#section-3.5>.
 """
 import re
 import io
 from .utils import int_or_none
 from .compat import (
    compat_str as str,
    compat_Pattern,
    compat_Match,
 )
 class _MatchParser(object):
    """
    An object that maintains the current parsing position and allows
    conveniently advancing it as syntax elements are successfully parsed.
    """
    def __init__(self, string):
        self._data = string
        self._pos = 0
    def match(self, r):
        if isinstance(r, compat_Pattern):
            return r.match(self._data, self._pos)
        if isinstance(r, str):
            if self._data.startswith(r, self._pos):
                return len(r)
            return None
        raise ValueError(r)
    def advance(self, by):
        if by is None:
            amt = 0
        elif isinstance(by, compat_Match):
            amt = len(by.group(0))
        elif isinstance(by, str):
            amt = len(by)
        elif isinstance(by, int):
            amt = by
        else:
            raise ValueError(by)
        self._pos += amt
        return by
    def consume(self, r):
        return self.advance(self.match(r))
    def child(self):
        return _MatchChildParser(self)
 class _MatchChildParser(_MatchParser):
    """
    A child parser state, which advances through the same data as
    its parent, but has an independent position. This is useful when
    advancing through syntax elements we might later want to backtrack
    from.
    """
    def __init__(self, parent):
        super(_MatchChildParser, self).__init__(parent._data)
        self.__parent = parent
        self._pos = parent._pos
    def commit(self):
        """
        Advance the parent state to the current position of this child state.
        """
        self.__parent._pos = self._pos
        return self.__parent
 class ParseError(Exception):
    def __init__(self, parser):
        super(ParseError, self).__init__("Parse error at position %u (near %r)" % (
            parser._pos, parser._data[parser._pos:parser._pos + 20]
        ))
 _REGEX_TS = re.compile(r'''(?x)
    (?:([0-9]{2,}):)?
    ([0-9]{2}):
    ([0-9]{2})\.
    ([0-9]{3})?
 ''')
 _REGEX_EOF = re.compile(r'\Z')
 _REGEX_NL = re.compile(r'(?:\r\n|[\r\n])')
 _REGEX_BLANK = re.compile(r'(?:\r\n|[\r\n])+')
 def _parse_ts(ts):
    """
    Convert a parsed WebVTT timestamp (a re.Match obtained from _REGEX_TS)
    into an MPEG PES timestamp: a tick counter at 90 kHz resolution.
    """
    h, min, s, ms = ts.groups()
    return 90 * (
        int(h or 0) * 3600000 +  # noqa: W504,E221,E222
        int(min)    *   60000 +  # noqa: W504,E221,E222
        int(s)      *    1000 +  # noqa: W504,E221,E222
        int(ms)                  # noqa: W504,E221,E222
    )
 def _format_ts(ts):
    """
    Convert an MPEG PES timestamp into a WebVTT timestamp.
    This will lose sub-millisecond precision.
    """
    ts = int((ts + 45) // 90)
    ms , ts = divmod(ts, 1000)  # noqa: W504,E221,E222,E203
    s  , ts = divmod(ts, 60)    # noqa: W504,E221,E222,E203
    min, h  = divmod(ts, 60)    # noqa: W504,E221,E222
    return '%02u:%02u:%02u.%03u' % (h, min, s, ms)
 class Block(object):
    """
    An abstract WebVTT block.
    """
    def __init__(self, **kwargs):
        for key, val in kwargs.items():
            setattr(self, key, val)
    @classmethod
    def parse(cls, parser):
        m = parser.match(cls._REGEX)
        if not m:
            return None
        parser.advance(m)
        return cls(raw=m.group(0))
    def write_into(self, stream):
        stream.write(self.raw)
 class HeaderBlock(Block):
    """
    A WebVTT block that may only appear in the header part of the file,
    i.e. before any cue blocks.
    """
    pass
 class Magic(HeaderBlock):
    _REGEX = re.compile(r'\ufeff?WEBVTT([ \t][^\r\n]*)?(?:\r\n|[\r\n])')
    # XXX: The X-TIMESTAMP-MAP extension is described in RFC 8216 §3.5
    # <https://tools.ietf.org/html/rfc8216#section-3.5>, but the RFC
    # doesn’t specify the exact grammar nor where in the WebVTT
    # syntax it should be placed; the below has been devised based
    # on usage in the wild
    #
    # And strictly speaking, the presence of this extension violates
    # the W3C WebVTT spec. Oh well.
    _REGEX_TSMAP = re.compile(r'X-TIMESTAMP-MAP=')
    _REGEX_TSMAP_LOCAL = re.compile(r'LOCAL:')
    _REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)')
    @classmethod
    def __parse_tsmap(cls, parser):
        parser = parser.child()
        while True:
            m = parser.consume(cls._REGEX_TSMAP_LOCAL)
            if m:
                m = parser.consume(_REGEX_TS)
                if m is None:
                    raise ParseError(parser)
                local = _parse_ts(m)
                if local is None:
                    raise ParseError(parser)
            else:
                m = parser.consume(cls._REGEX_TSMAP_MPEGTS)
                if m:
                    mpegts = int_or_none(m.group(1))
                    if mpegts is None:
                        raise ParseError(parser)
                else:
                    raise ParseError(parser)
            if parser.consume(','):
                continue
            if parser.consume(_REGEX_NL):
                break
            raise ParseError(parser)
        parser.commit()
        return local, mpegts
    @classmethod
    def parse(cls, parser):
        parser = parser.child()
        m = parser.consume(cls._REGEX)
        if not m:
            raise ParseError(parser)
        extra = m.group(1)
        local, mpegts = None, None
        if parser.consume(cls._REGEX_TSMAP):
            local, mpegts = cls.__parse_tsmap(parser)
        if not parser.consume(_REGEX_NL):
            raise ParseError(parser)
        parser.commit()
        return cls(extra=extra, mpegts=mpegts, local=local)
    def write_into(self, stream):
        stream.write('WEBVTT')
        if self.extra is not None:
            stream.write(self.extra)
        stream.write('\n')
        if self.local or self.mpegts:
            stream.write('X-TIMESTAMP-MAP=LOCAL:')
            stream.write(_format_ts(self.local if self.local is not None else 0))
            stream.write(',MPEGTS:')
            stream.write(str(self.mpegts if self.mpegts is not None else 0))
            stream.write('\n')
        stream.write('\n')
 class StyleBlock(HeaderBlock):
    _REGEX = re.compile(r'''(?x)
        STYLE[\ \t]*(?:\r\n|[\r\n])
        ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
        (?:\r\n|[\r\n])
    ''')
 class RegionBlock(HeaderBlock):
    _REGEX = re.compile(r'''(?x)
        REGION[\ \t]*
        ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
        (?:\r\n|[\r\n])
    ''')
 class CommentBlock(Block):
    _REGEX = re.compile(r'''(?x)
        NOTE(?:\r\n|[\ \t\r\n])
        ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
        (?:\r\n|[\r\n])
    ''')
 class CueBlock(Block):
    """
    A cue block. The payload is not interpreted.
    """
    _REGEX_ID = re.compile(r'((?:(?!-->)[^\r\n])+)(?:\r\n|[\r\n])')
    _REGEX_ARROW = re.compile(r'[ \t]+-->[ \t]+')
    _REGEX_SETTINGS = re.compile(r'[ \t]+((?:(?!-->)[^\r\n])+)')
    _REGEX_PAYLOAD = re.compile(r'[^\r\n]+(?:\r\n|[\r\n])?')
    @classmethod
    def parse(cls, parser):
        parser = parser.child()
        id = None
        m = parser.consume(cls._REGEX_ID)
        if m:
            id = m.group(1)
        m0 = parser.consume(_REGEX_TS)
        if not m0:
            return None
        if not parser.consume(cls._REGEX_ARROW):
            return None
        m1 = parser.consume(_REGEX_TS)
        if not m1:
            return None
        m2 = parser.consume(cls._REGEX_SETTINGS)
        if not parser.consume(_REGEX_NL):
            return None
        start = _parse_ts(m0)
        end = _parse_ts(m1)
        settings = m2.group(1) if m2 is not None else None
        text = io.StringIO()
        while True:
            m = parser.consume(cls._REGEX_PAYLOAD)
            if not m:
                break
            text.write(m.group(0))
        parser.commit()
        return cls(
            id=id,
            start=start, end=end, settings=settings,
            text=text.getvalue()
        )
    def write_into(self, stream):
        if self.id is not None:
            stream.write(self.id)
            stream.write('\n')
        stream.write(_format_ts(self.start))
        stream.write(' --> ')
        stream.write(_format_ts(self.end))
        if self.settings is not None:
            stream.write(' ')
            stream.write(self.settings)
        stream.write('\n')
        stream.write(self.text)
        stream.write('\n')
    @property
    def as_json(self):
        return {
            'id': self.id,
            'start': self.start,
            'end': self.end,
            'text': self.text,
            'settings': self.settings,
        }
 def parse_fragment(frag_content):
    """
    A generator that yields (partially) parsed WebVTT blocks when given
    a bytes object containing the raw contents of a WebVTT file.
    """
    parser = _MatchParser(frag_content.decode('utf-8'))
    yield Magic.parse(parser)
    while not parser.match(_REGEX_EOF):
        if parser.consume(_REGEX_BLANK):
            continue
        block = RegionBlock.parse(parser)
        if block:
            yield block
            continue
        block = StyleBlock.parse(parser)
        if block:
            yield block
            continue
        block = CommentBlock.parse(parser)
        if block:
            yield block  # XXX: or skip
            continue
        break
    while not parser.match(_REGEX_EOF):
        if parser.consume(_REGEX_BLANK):
            continue
        block = CommentBlock.parse(parser)
        if block:
            yield block  # XXX: or skip
            continue
        block = CueBlock.parse(parser)
        if block:
            yield block
            continue
        raise ParseError(parser)