yt-dlp/yt_dlp/extractor/spiegel.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from .jwplatform import JWPlatformIE


class SpiegelIE(InfoExtractor):
    _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
    _VALID_URL = r'https?://(?:www\.)?(?:spiegel|manager-magazin)\.de(?:/[^/]+)+/[^/]*-(?P<id>[0-9]+|%s)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$' % _UUID_RE
    _TESTS = [{
        'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
        'md5': '50c7948883ec85a3e431a0a44b7ad1d6',
        'info_dict': {
            'id': 'II0BUyxY',
            'display_id': '1259285',
            'ext': 'mp4',
            'title': 'Vulkan Tungurahua in Ecuador ist wieder aktiv - DER SPIEGEL - Wissenschaft',
            'description': 'md5:8029d8310232196eb235d27575a8b9f4',
            'duration': 48.0,
            'upload_date': '20130311',
            'timestamp': 1362997920,
        },
    }, {
        'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
        'only_matching': True,
    }, {
        'url': 'https://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html',
        'only_matching': True,
    }, {
        'url': 'https://www.spiegel.de/panorama/urteile-im-goldmuenzenprozess-haftstrafen-fuer-clanmitglieder-a-aae8df48-43c1-4c61-867d-23f0a2d254b7',
        'only_matching': True,
    }, {
        'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html',
        'only_matching': True,
    }, {
        'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        media_id = self._html_search_regex(
            r'(&#34;|["\'])mediaId\1\s*:\s*(&#34;|["\'])(?P<id>(?:(?!\2).)+)\2',
            webpage, 'media id', group='id')
        return {
            '_type': 'url_transparent',
            'id': video_id,
            'display_id': video_id,
            'url': 'jwplatform:%s' % media_id,
            'title': self._og_search_title(webpage, default=None),
            'ie_key': JWPlatformIE.ie_key(),
        }
Unify coding cookie 2016-10-02 13:39:18 +02:00			`# coding: utf-8`
[spiegel] Simplify and use unicode_literals 2014-01-31 14:00:55 +01:00			`from __future__ import unicode_literals`

[Spiegel] move into own file 2013-06-23 22:22:08 +02:00			`from .common import InfoExtractor`
Merge 'ytdl-org/youtube-dl/master' release 2020.11.19 Old Extractors left behind: VLivePlaylistIE YoutubeSearchURLIE YoutubeShowIE YoutubeFavouritesIE If removing old extractors, make corresponding changes in docs/supportedsites.md youtube_dlc/extractor/extractors.py Not merged: .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md test/test_all_urls.py youtube_dlc/version.py Changelog 2020-11-19 20:22:59 +01:00			`from .jwplatform import JWPlatformIE`
[Spiegel] move into own file 2013-06-23 22:22:08 +02:00

			`class SpiegelIE(InfoExtractor):`
Merge 'ytdl-org/youtube-dl/master' release 2020.11.19 Old Extractors left behind: VLivePlaylistIE YoutubeSearchURLIE YoutubeShowIE YoutubeFavouritesIE If removing old extractors, make corresponding changes in docs/supportedsites.md youtube_dlc/extractor/extractors.py Not merged: .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md test/test_all_urls.py youtube_dlc/version.py Changelog 2020-11-19 20:22:59 +01:00			`_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'`
			`_VALID_URL = r'https?://(?:www\.)?(?:spiegel\|manager-magazin)\.de(?:/[^/]+)+/[^/]-(?P<id>[0-9]+\|%s)(?:-embed\|-iframe)?(?:\.html)?(?:#.)?$' % _UUID_RE`
[spiegel] Implement format selection 2013-11-16 01:33:12 +01:00			`_TESTS = [{`
[spiegel] Simplify and use unicode_literals 2014-01-31 14:00:55 +01:00			`'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',`
Merge 'ytdl-org/youtube-dl/master' release 2020.11.19 Old Extractors left behind: VLivePlaylistIE YoutubeSearchURLIE YoutubeShowIE YoutubeFavouritesIE If removing old extractors, make corresponding changes in docs/supportedsites.md youtube_dlc/extractor/extractors.py Not merged: .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md test/test_all_urls.py youtube_dlc/version.py Changelog 2020-11-19 20:22:59 +01:00			`'md5': '50c7948883ec85a3e431a0a44b7ad1d6',`
[spiegel] Simplify and use unicode_literals 2014-01-31 14:00:55 +01:00			`'info_dict': {`
Merge 'ytdl-org/youtube-dl/master' release 2020.11.19 Old Extractors left behind: VLivePlaylistIE YoutubeSearchURLIE YoutubeShowIE YoutubeFavouritesIE If removing old extractors, make corresponding changes in docs/supportedsites.md youtube_dlc/extractor/extractors.py Not merged: .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md test/test_all_urls.py youtube_dlc/version.py Changelog 2020-11-19 20:22:59 +01:00			`'id': 'II0BUyxY',`
			`'display_id': '1259285',`
[spiegel] Add description and modernize 2014-06-21 11:31:18 +02:00			`'ext': 'mp4',`
Merge 'ytdl-org/youtube-dl/master' release 2020.11.19 Old Extractors left behind: VLivePlaylistIE YoutubeSearchURLIE YoutubeShowIE YoutubeFavouritesIE If removing old extractors, make corresponding changes in docs/supportedsites.md youtube_dlc/extractor/extractors.py Not merged: .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md test/test_all_urls.py youtube_dlc/version.py Changelog 2020-11-19 20:22:59 +01:00			`'title': 'Vulkan Tungurahua in Ecuador ist wieder aktiv - DER SPIEGEL - Wissenschaft',`
[spiegel] Add description and modernize 2014-06-21 11:31:18 +02:00			`'description': 'md5:8029d8310232196eb235d27575a8b9f4',`
Merge 'ytdl-org/youtube-dl/master' release 2020.11.19 Old Extractors left behind: VLivePlaylistIE YoutubeSearchURLIE YoutubeShowIE YoutubeFavouritesIE If removing old extractors, make corresponding changes in docs/supportedsites.md youtube_dlc/extractor/extractors.py Not merged: .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md test/test_all_urls.py youtube_dlc/version.py Changelog 2020-11-19 20:22:59 +01:00			`'duration': 48.0,`
[spiegel] improve info extraction 2016-07-05 12:46:25 +02:00			`'upload_date': '20130311',`
Merge 'ytdl-org/youtube-dl/master' release 2020.11.19 Old Extractors left behind: VLivePlaylistIE YoutubeSearchURLIE YoutubeShowIE YoutubeFavouritesIE If removing old extractors, make corresponding changes in docs/supportedsites.md youtube_dlc/extractor/extractors.py Not merged: .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md test/test_all_urls.py youtube_dlc/version.py Changelog 2020-11-19 20:22:59 +01:00			`'timestamp': 1362997920,`
[spiegel] Simplify and use unicode_literals 2014-01-31 14:00:55 +01:00			`},`
[spiegel] Add description and modernize 2014-06-21 11:31:18 +02:00			`}, {`
[spiegel] Simplify and use unicode_literals 2014-01-31 14:00:55 +01:00			`'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',`
Merge 'ytdl-org/youtube-dl/master' release 2020.11.19 Old Extractors left behind: VLivePlaylistIE YoutubeSearchURLIE YoutubeShowIE YoutubeFavouritesIE If removing old extractors, make corresponding changes in docs/supportedsites.md youtube_dlc/extractor/extractors.py Not merged: .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md test/test_all_urls.py youtube_dlc/version.py Changelog 2020-11-19 20:22:59 +01:00			`'only_matching': True,`
[spiegel] Add support for embeds 2014-11-13 15:02:31 +01:00			`}, {`
Merge 'ytdl-org/youtube-dl/master' release 2020.11.19 Old Extractors left behind: VLivePlaylistIE YoutubeSearchURLIE YoutubeShowIE YoutubeFavouritesIE If removing old extractors, make corresponding changes in docs/supportedsites.md youtube_dlc/extractor/extractors.py Not merged: .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md test/test_all_urls.py youtube_dlc/version.py Changelog 2020-11-19 20:22:59 +01:00			`'url': 'https://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html',`
			`'only_matching': True,`
[spiegel] Accept iframe urls Closes #6370. 2015-07-26 12:57:06 +02:00			`}, {`
Merge 'ytdl-org/youtube-dl/master' release 2020.11.19 Old Extractors left behind: VLivePlaylistIE YoutubeSearchURLIE YoutubeShowIE YoutubeFavouritesIE If removing old extractors, make corresponding changes in docs/supportedsites.md youtube_dlc/extractor/extractors.py Not merged: .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md test/test_all_urls.py youtube_dlc/version.py Changelog 2020-11-19 20:22:59 +01:00			`'url': 'https://www.spiegel.de/panorama/urteile-im-goldmuenzenprozess-haftstrafen-fuer-clanmitglieder-a-aae8df48-43c1-4c61-867d-23f0a2d254b7',`
[spiegel] Accept iframe urls Closes #6370. 2015-07-26 12:57:06 +02:00			`'only_matching': True,`
[spiegel] Add support for nexx videos (closes #15285) 2018-01-17 16:03:56 +01:00			`}, {`
			`'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html',`
			`'only_matching': True,`
Merge 'ytdl-org/youtube-dl/master' release 2020.11.19 Old Extractors left behind: VLivePlaylistIE YoutubeSearchURLIE YoutubeShowIE YoutubeFavouritesIE If removing old extractors, make corresponding changes in docs/supportedsites.md youtube_dlc/extractor/extractors.py Not merged: .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md test/test_all_urls.py youtube_dlc/version.py Changelog 2020-11-19 20:22:59 +01:00			`}, {`
			`'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',`
			`'only_matching': True,`
[spiegel] Implement format selection 2013-11-16 01:33:12 +01:00			`}]`
[Spiegel] move into own file 2013-06-23 22:22:08 +02:00
			`def _real_extract(self, url):`
[spiegel] Modernize 2014-11-13 14:45:17 +01:00			`video_id = self._match_id(url)`
Merge 'ytdl-org/youtube-dl/master' release 2020.11.19 Old Extractors left behind: VLivePlaylistIE YoutubeSearchURLIE YoutubeShowIE YoutubeFavouritesIE If removing old extractors, make corresponding changes in docs/supportedsites.md youtube_dlc/extractor/extractors.py Not merged: .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md test/test_all_urls.py youtube_dlc/version.py Changelog 2020-11-19 20:22:59 +01:00			`webpage = self._download_webpage(url, video_id)`
			`media_id = self._html_search_regex(`
			`r'("\|["\'])mediaId\1\s:\s("\|["\'])(?P<id>(?:(?!\2).)+)\2',`
			`webpage, 'media id', group='id')`
[spiegel] Simplify and use unicode_literals 2014-01-31 14:00:55 +01:00			`return {`
[spiegel] fix info extraction(#16538) 2018-05-28 01:10:01 +02:00			`'_type': 'url_transparent',`
[Spiegel] move into own file 2013-06-23 22:22:08 +02:00			`'id': video_id,`
Merge 'ytdl-org/youtube-dl/master' release 2020.11.19 Old Extractors left behind: VLivePlaylistIE YoutubeSearchURLIE YoutubeShowIE YoutubeFavouritesIE If removing old extractors, make corresponding changes in docs/supportedsites.md youtube_dlc/extractor/extractors.py Not merged: .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md test/test_all_urls.py youtube_dlc/version.py Changelog 2020-11-19 20:22:59 +01:00			`'display_id': video_id,`
			`'url': 'jwplatform:%s' % media_id,`
			`'title': self._og_search_title(webpage, default=None),`
			`'ie_key': JWPlatformIE.ie_key(),`
[Spiegel] move into own file 2013-06-23 22:22:08 +02:00			`}`