[NovaEmbed] Fix extractor

Closes #1570
2024-11-02 17:22:31 +01:00 · 2021-11-12 03:12:53 +05:30 · 2021-11-12 03:12:53 +05:30 · c1dc0ee56e
commit c1dc0ee56e
parent bf5f605e76
1 changed files with 27 additions and 6 deletions
--- a/yt_dlp/extractor/nova.py
+++ b/yt_dlp/extractor/nova.py
@ -10,6 +10,7 @@
    int_or_none,
    js_to_json,
    qualities,
+    traverse_obj,
    unified_strdate,
    url_or_none,
 )
@ -17,30 +18,44 @@

 class NovaEmbedIE(InfoExtractor):
    _VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
-        'md5': 'ee009bafcc794541570edd44b71cbea3',
        'info_dict': {
            'id': '8o0n0r',
-            'ext': 'mp4',
            'title': '2180. díl',
            'thumbnail': r're:^https?://.*\.jpg',
            'duration': 2578,
        },
-    }
+        'params': {
+            'skip_download': True,
+            'ignore_no_formats_error': True,
+        },
+        'expected_warnings': ['DRM protected', 'Requested format is not available'],
+    }, {
+        'url': 'https://media.cms.nova.cz/embed/KybpWYvcgOa',
+        'info_dict': {
+            'id': 'KybpWYvcgOa',
+            'ext': 'mp4',
+            'title': 'Borhyová oslavila 60? Soutěžící z pořadu odboural moderátora Ondřeje Sokola',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 114,
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

+        has_drm = False
        duration = None
        formats = []

        player = self._parse_json(
            self._search_regex(
-                r'Player\.init\s*\([^,]+,\s*(?:\w+\s*\?\s*{.+?}\s*:\s*)?({.+})\s*,\s*{.+?}\s*\)\s*;',
-                webpage, 'player', default='{}'), video_id, fatal=False)
+                r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)',
+                webpage, 'player', default='{}', group='json'), video_id, fatal=False)
        if player:
            for format_id, format_list in player['tracks'].items():
                if not isinstance(format_list, list):
@ -48,6 +63,10 @@ def _real_extract(self, url):
                for format_dict in format_list:
                    if not isinstance(format_dict, dict):
                        continue
+                    if (not self.get_param('allow_unplayable_formats')
+                            and traverse_obj(format_dict, ('drm', 'keySystem'))):
+                        has_drm = True
+                        continue
                    format_url = url_or_none(format_dict.get('src'))
                    format_type = format_dict.get('type')
                    ext = determine_ext(format_url)
@ -104,6 +123,8 @@ def _real_extract(self, url):
                    f['format_id'] = f_id
                    formats.append(f)

+        if not formats and has_drm:
+            self.report_drm(video_id)
        self._sort_formats(formats)

        title = self._og_search_title(