[svtplay:series] Improve extraction (closes #16059)

2024-11-02 17:22:31 +01:00 · 2018-04-04 23:52:00 +07:00 · 2018-04-04 23:52:00 +07:00 · b71bb3ba8b
commit b71bb3ba8b
parent fd97fa7bfc
2 changed files with 18 additions and 20 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1031,7 +1031,7 @@
 from .svt import (
    SVTIE,
    SVTPlayIE,
-    SVTPlaylistIE,
+    SVTSeriesIE,
 )
 from .swrmediathek import SWRMediathekIE
 from .syfy import SyfyIE
--- a/youtube_dl/extractor/svt.py
+++ b/youtube_dl/extractor/svt.py
@ -193,10 +193,8 @@ def _real_extract(self, url):
            return info_dict


-class SVTPlaylistIE(InfoExtractor):
-    IE_DESC = 'SVT Play serie'
+class SVTSeriesIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)'
-    IE_NAME = 'svtplay:serie'
    _TESTS = [{
        'url': 'https://www.svtplay.se/rederiet',
        'info_dict': {
@ -209,33 +207,28 @@ class SVTPlaylistIE(InfoExtractor):

    @classmethod
    def suitable(cls, url):
-        return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPlaylistIE, cls).suitable(url)
+        return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)

    def _real_extract(self, url):
        video_id = self._match_id(url)

-        page = self._download_webpage(
-            url, video_id,
-            note='Downloading serie page',
-            errnote='unable to fetch serie page')
+        webpage = self._download_webpage(
+            url, video_id, 'Downloading serie page')

-        root_json = self._search_regex(
-            r'root\[\'__svtplay\'\]\s*=(.+);\n',
-            page, 'root')
-        root = self._parse_json(root_json, video_id)
-
-        metadata = root.get('metaData', {})
-        related_videos_accordion = root['relatedVideoContent']['relatedVideosAccordion']
+        root = self._parse_json(
+            self._search_regex(
+                r'root\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n',
+                webpage, 'content', group='json'),
+            video_id)

        entries = []
-        for season in related_videos_accordion:
+        for season in root['relatedVideoContent']['relatedVideosAccordion']:
            videos = season.get('videos')
            if not isinstance(videos, list):
                continue
-
            for video in videos:
                content_url = video.get('contentUrl')
-                if not isinstance(content_url, compat_str):
+                if not content_url or not isinstance(content_url, compat_str):
                    continue
                entries.append(
                    self.url_result(
@ -244,5 +237,10 @@ def _real_extract(self, url):
                        video_title=video.get('title')
                    ))

+        metadata = root.get('metaData')
+        if not isinstance(metadata, dict):
+            metadata = {}
+
        return self.playlist_result(
-            entries, video_id, metadata.get('title'), metadata.get('description'))
+            entries, video_id, metadata.get('title'),
+            metadata.get('description'))