Merge branch 'yt-dlp:master' into biliSearchPageIE

2024-10-06 17:37:08 +02:00 · 2024-06-28 00:52:00 +12:00 · 2024-06-28 00:52:00 +12:00 · 31be8d3dbd
commit 31be8d3dbd
parent af0eb72a8b 0953209a85
3 changed files with 44 additions and 34 deletions
--- a/yt_dlp/extractor/cloudycdn.py
+++ b/yt_dlp/extractor/cloudycdn.py
@ -1,3 +1,5 @@
+import re
+
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
@ -35,6 +37,20 @@ class CloudyCDNIE(InfoExtractor):
            'duration': 1205,
            'upload_date': '20221130',
        },
+    }, {
+        # Video-only m3u8 formats need manual fixup
+        'url': 'https://embed.cloudycdn.services/ltv/media/08j_d24-6000-074',
+        'md5': 'fc472e40f6e6238446509be411c920e2',
+        'info_dict': {
+            'id': '08j_d24-6000-074',
+            'ext': 'mp4',
+            'upload_date': '20240620',
+            'duration': 1673,
+            'title': 'D24-6000-074-cetstud',
+            'timestamp': 1718902233,
+            'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg',
+        },
+        'params': {'format': 'bv'},
    }]
    _WEBPAGE_TESTS = [{
        'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
@ -63,6 +79,9 @@ def _real_extract(self, url):
        formats, subtitles = [], {}
        for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
            fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
+            for fmt in fmts:
+                if re.search(r'chunklist_b\d+_vo_', fmt['url']):
+                    fmt['acodec'] = 'none'
            formats.extend(fmts)
            self._merge_subtitles(subs, target=subtitles)

--- a/yt_dlp/extractor/matchtv.py
+++ b/yt_dlp/extractor/matchtv.py
@ -1,51 +1,35 @@
-import random
-
 from .common import InfoExtractor
-from ..utils import xpath_text


 class MatchTVIE(InfoExtractor):
-    _VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)'
+    _VALID_URL = [
+        r'https?://matchtv\.ru/on-air/?(?:$|[?#])',
+        r'https?://video\.matchtv\.ru/iframe/channel/106/?(?:$|[?#])',
+    ]
    _TESTS = [{
-        'url': 'http://matchtv.ru/#live-player',
+        'url': 'http://matchtv.ru/on-air/',
        'info_dict': {
            'id': 'matchtv-live',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
-            'is_live': True,
+            'live_status': 'is_live',
        },
        'params': {
            'skip_download': True,
        },
    }, {
-        'url': 'http://matchtv.ru/on-air/',
+        'url': 'https://video.matchtv.ru/iframe/channel/106',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = 'matchtv-live'
-        video_url = self._download_json(
-            'http://player.matchtv.ntvplus.tv/player/smil', video_id,
-            query={
-                'ts': '',
-                'quality': 'SD',
-                'contentId': '561d2c0df7159b37178b4567',
-                'sign': '',
-                'includeHighlights': '0',
-                'userId': '',
-                'sessionId': random.randint(1, 1000000000),
-                'contentType': 'channel',
-                'timeShift': '0',
-                'platform': 'portal',
-            },
-            headers={
-                'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
-            })['data']['videoUrl']
-        f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
-        formats = self._extract_f4m_formats(f4m_url, video_id)
+        webpage = self._download_webpage('https://video.matchtv.ru/iframe/channel/106', video_id)
+        video_url = self._html_search_regex(
+            r'data-config="config=(https?://[^?"]+)[?"]', webpage, 'video URL').replace('/feed/', '/media/') + '.m3u8'
        return {
            'id': video_id,
            'title': 'Матч ТВ - Прямой эфир',
            'is_live': True,
-            'formats': formats,
+            'formats': self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True),
        }
--- a/yt_dlp/extractor/mediasite.py
+++ b/yt_dlp/extractor/mediasite.py
@ -15,6 +15,7 @@
    url_or_none,
    urljoin,
 )
+from ..utils.traversal import traverse_obj

 _ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'

@ -212,13 +213,14 @@ def _real_extract(self, url):
                stream_type, 'type%u' % stream_type)

            stream_formats = []
-            for unum, video_url in enumerate(video_urls):
-                video_url = url_or_none(video_url.get('Location'))
+            for unum, video in enumerate(video_urls):
+                video_url = url_or_none(video.get('Location'))
                if not video_url:
                    continue
                # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS

-                media_type = video_url.get('MediaType')
+                media_type = video.get('MediaType')
+                ext = mimetype2ext(video.get('MimeType'))
                if media_type == 'SS':
                    stream_formats.extend(self._extract_ism_formats(
                        video_url, resource_id,
@ -229,15 +231,20 @@ def _real_extract(self, url):
                        video_url, resource_id,
                        mpd_id=f'{stream_id}-{snum}.{unum}',
                        fatal=False))
+                elif ext in ('m3u', 'm3u8'):
+                    stream_formats.extend(self._extract_m3u8_formats(
+                        video_url, resource_id,
+                        m3u8_id=f'{stream_id}-{snum}.{unum}',
+                        fatal=False))
                else:
                    stream_formats.append({
                        'format_id': f'{stream_id}-{snum}.{unum}',
                        'url': video_url,
-                        'ext': mimetype2ext(video_url.get('MimeType')),
+                        'ext': ext,
                    })

-            if stream.get('HasSlideContent', False):
-                images = player_options['PlayerLayoutOptions']['Images']
+            images = traverse_obj(player_options, ('PlayerLayoutOptions', 'Images', {dict}))
+            if stream.get('HasSlideContent') and images:
                stream_formats.append(self.__extract_slides(
                    stream_id=stream_id,
                    snum=snum,