1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-11-07 11:42:43 +01:00

[mediaset] Add playlist support (#1463)

Closes #1372
Authored by: nixxo
This commit is contained in:
nixxo 2021-10-29 21:39:55 +02:00 committed by GitHub
parent aeaf3b2b92
commit e6ff66efc0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 86 additions and 1 deletions

View File

@ -744,7 +744,10 @@
from .medaltv import MedalTVIE from .medaltv import MedalTVIE
from .mediaite import MediaiteIE from .mediaite import MediaiteIE
from .mediaklikk import MediaKlikkIE from .mediaklikk import MediaKlikkIE
from .mediaset import MediasetIE from .mediaset import (
MediasetIE,
MediasetShowIE,
)
from .mediasite import ( from .mediasite import (
MediasiteIE, MediasiteIE,
MediasiteCatalogIE, MediasiteCatalogIE,

View File

@ -1,13 +1,17 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import functools
import re import re
from .theplatform import ThePlatformBaseIE from .theplatform import ThePlatformBaseIE
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
OnDemandPagedList,
parse_qs, parse_qs,
try_get,
urljoin,
update_url_query, update_url_query,
) )
@ -212,3 +216,81 @@ def _real_extract(self, url):
'subtitles': subtitles, 'subtitles': subtitles,
}) })
return info return info
class MediasetShowIE(MediasetIE):
_VALID_URL = r'''(?x)
(?:
https?://
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
(?:
(?:fiction|programmi-tv|serie-tv)/(?:.+?/)?
(?:[a-z]+)_SE(?P<id>\d{12})
(?:,ST(?P<st>\d{12}))?
(?:,sb(?P<sb>\d{9}))?$
)
)
'''
_TESTS = [{
# TV Show webpage (with a single playlist)
'url': 'https://www.mediasetplay.mediaset.it/serie-tv/fireforce/episodi_SE000000001556',
'info_dict': {
'id': '000000001556',
'title': 'Fire Force',
},
'playlist_count': 1,
}, {
# TV Show webpage (with multiple playlists)
'url': 'https://www.mediasetplay.mediaset.it/programmi-tv/leiene/leiene_SE000000000061,ST000000002763',
'info_dict': {
'id': '000000002763',
'title': 'Le Iene',
},
'playlist_count': 7,
}, {
# TV Show specific playlist (single page)
'url': 'https://www.mediasetplay.mediaset.it/serie-tv/fireforce/episodi_SE000000001556,ST000000002738,sb100013107',
'info_dict': {
'id': '100013107',
'title': 'Episodi',
},
'playlist_count': 4,
}, {
# TV Show specific playlist (with multiple pages)
'url': 'https://www.mediasetplay.mediaset.it/programmi-tv/leiene/iservizi_SE000000000061,ST000000002763,sb100013375',
'info_dict': {
'id': '100013375',
'title': 'I servizi',
},
'playlist_count': 53,
}]
_BY_SUBBRAND = 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2?byCustomValue={subBrandId}{%s}&sort=:publishInfo_lastPublished|desc,tvSeasonEpisodeNumber|desc&range=%d-%d'
_PAGE_SIZE = 25
def _fetch_page(self, sb, page):
lower_limit = page * self._PAGE_SIZE + 1
upper_limit = lower_limit + self._PAGE_SIZE - 1
content = self._download_json(
self._BY_SUBBRAND % (sb, lower_limit, upper_limit), sb)
for entry in content.get('entries') or []:
yield self.url_result(
'mediaset:' + entry['guid'],
playlist_title=entry['mediasetprogram$subBrandDescription'])
def _real_extract(self, url):
playlist_id, st, sb = self._match_valid_url(url).group('id', 'st', 'sb')
if not sb:
page = self._download_webpage(url, playlist_id)
entries = [self.url_result(urljoin('https://www.mediasetplay.mediaset.it', url))
for url in re.findall(r'href="([^<>=]+SE\d{12},ST\d{12},sb\d{9})">[^<]+<', page)]
title = (self._html_search_regex(r'(?s)<h1[^>]*>(.+?)</h1>', page, 'title', default=None)
or self._og_search_title(page))
return self.playlist_result(entries, st or playlist_id, title)
entries = OnDemandPagedList(
functools.partial(self._fetch_page, sb),
self._PAGE_SIZE)
title = try_get(entries, lambda x: x[0]['playlist_title'])
return self.playlist_result(entries, sb, title)