From c3c18d7b8a035c0099499147be5fcfe5f603e072 Mon Sep 17 00:00:00 2001 From: nixxo Date: Wed, 28 Oct 2020 16:55:58 +0100 Subject: [PATCH 1/4] [skyitalia] Add new extractor --- youtube_dlc/extractor/extractors.py | 4 + youtube_dlc/extractor/skyitalia.py | 119 ++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 youtube_dlc/extractor/skyitalia.py diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py index d31edd7c8..a0c7d0f42 100644 --- a/youtube_dlc/extractor/extractors.py +++ b/youtube_dlc/extractor/extractors.py @@ -1037,6 +1037,10 @@ SkyNewsIE, SkySportsIE, ) +from .skyitalia import ( + SkyArteItaliaIE, + SkyItaliaIE, +) from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE diff --git a/youtube_dlc/extractor/skyitalia.py b/youtube_dlc/extractor/skyitalia.py new file mode 100644 index 000000000..d9c35c3a1 --- /dev/null +++ b/youtube_dlc/extractor/skyitalia.py @@ -0,0 +1,119 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class SkyItaliaBaseIE(InfoExtractor): + _GET_VIDEO_DATA = 'https://apid.sky.it/vdp/v1/getVideoData?token={token}&caller=sky&rendition=web&id={id}' + _TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk' + _RES = { + 'low': [426, 240], + 'med': [640, 360], + 'high': [854, 480], + 'hd': [1280, 720] + } + + def _extract_video_id(self, url): + webpage = self._download_webpage(url, 'skyitalia') + video_id = self._html_search_regex( + [r'data-videoid=\"(\d+)\"', + r'http://player\.sky\.it/social\?id=(\d+)\&'], + webpage, 'video_id') + if video_id: + return video_id + raise ExtractorError('Video ID not found.') + + def _get_formats(self, video_id, token=_TOKEN): + data_url = self._GET_VIDEO_DATA.replace('{id}', video_id) + data_url = data_url.replace('{token}', token) + video_data = self._parse_json( + self._download_webpage(data_url, video_id), + video_id) + + formats = [] + for q, r in self._RES.items(): + key = 'web_' + q + '_url' + if key not in video_data: + continue + formats.append({ + 'url': video_data[key], + 'format_id': q, + 'width': r[0], + 'height': r[1] + }) + + self._sort_formats(formats) + title = video_data.get('title') + thumb = video_data.get('thumb') + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumb, + 'formats': formats + } + + def _real_extract(self, url): + video_id = self._match_id(url) + if video_id == 'None': + video_id = self._extract_video_id(url) + return self._get_formats(video_id, self._TOKEN) + + +class SkyItaliaIE(SkyItaliaBaseIE): + IE_NAME = 'sky.it' + _VALID_URL = r'''(?x)https?:// + (?Psport|tg24|video) + \.sky\.it/(?:.+?) + (?P[0-9]{6})? + (?:$|\?)''' + + _TESTS = [{ + 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162', + 'md5': '9c03b590b06e5952d8051f0e02b0feca', + 'info_dict': { + 'id': '616162', + 'ext': 'mp4', + 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', + } + }, { + 'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta', + 'md5': '9c03b590b06e5952d8051f0e02b0feca', + 'info_dict': { + 'id': '616162', + 'ext': 'mp4', + 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', + } + }, { + 'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi', + 'md5': 'caa25e62dadb529bc5e0b078da99f854', + 'info_dict': { + 'id': '615904', + 'ext': 'mp4', + 'title': 'Covid-19, al Buzzi di Milano tamponi drive-in per studenti', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/17/1600351405841_error-coronavirus-al-buzzi-di-milano-tamponi_thumbnail_1.jpg', + } + }, { + 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api', + 'only_matching': True, + }] + + +class SkyArteItaliaIE(SkyItaliaBaseIE): + IE_NAME = 'arte.sky.it' + _VALID_URL = r'https?://arte\.sky\.it/video/.+?(?P[0-9]{6})?$' + _TEST = { + 'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/', + 'md5': '2f22513a89f45142f2746f878d690647', + 'info_dict': { + 'id': '612888', + 'ext': 'mp4', + 'title': 'I maestri del cinema Federico Felini', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg', + } + } + _TOKEN = 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd' From 0b72c2bc31a3400134182154f23fdd494c838a5a Mon Sep 17 00:00:00 2001 From: nixxo Date: Wed, 28 Oct 2020 17:04:36 +0100 Subject: [PATCH 2/4] [skyitalia] removed arbitrary parameter --- youtube_dlc/extractor/skyitalia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dlc/extractor/skyitalia.py b/youtube_dlc/extractor/skyitalia.py index d9c35c3a1..262701c6b 100644 --- a/youtube_dlc/extractor/skyitalia.py +++ b/youtube_dlc/extractor/skyitalia.py @@ -25,7 +25,7 @@ def _extract_video_id(self, url): return video_id raise ExtractorError('Video ID not found.') - def _get_formats(self, video_id, token=_TOKEN): + def _get_formats(self, video_id, token): data_url = self._GET_VIDEO_DATA.replace('{id}', video_id) data_url = data_url.replace('{token}', token) video_data = self._parse_json( From 81a20463a44d1039729ed5611d39d0bcb4abeb73 Mon Sep 17 00:00:00 2001 From: nixxo Date: Wed, 28 Oct 2020 17:06:49 +0100 Subject: [PATCH 3/4] [skyitalia] moved token --- youtube_dlc/extractor/skyitalia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dlc/extractor/skyitalia.py b/youtube_dlc/extractor/skyitalia.py index 262701c6b..a4e894044 100644 --- a/youtube_dlc/extractor/skyitalia.py +++ b/youtube_dlc/extractor/skyitalia.py @@ -7,7 +7,6 @@ class SkyItaliaBaseIE(InfoExtractor): _GET_VIDEO_DATA = 'https://apid.sky.it/vdp/v1/getVideoData?token={token}&caller=sky&rendition=web&id={id}' - _TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk' _RES = { 'low': [426, 240], 'med': [640, 360], @@ -101,6 +100,7 @@ class SkyItaliaIE(SkyItaliaBaseIE): 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api', 'only_matching': True, }] + _TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk' class SkyArteItaliaIE(SkyItaliaBaseIE): From 920ad13673b4f60274fe132bf17d8019011dfc9b Mon Sep 17 00:00:00 2001 From: nixxo Date: Thu, 29 Oct 2020 13:37:07 +0100 Subject: [PATCH 4/4] [skyitalia] fixed coding conventions --- youtube_dlc/extractor/skyitalia.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dlc/extractor/skyitalia.py b/youtube_dlc/extractor/skyitalia.py index a4e894044..3c7bd465d 100644 --- a/youtube_dlc/extractor/skyitalia.py +++ b/youtube_dlc/extractor/skyitalia.py @@ -33,11 +33,11 @@ def _get_formats(self, video_id, token): formats = [] for q, r in self._RES.items(): - key = 'web_' + q + '_url' + key = 'web_%s_url' % q if key not in video_data: continue formats.append({ - 'url': video_data[key], + 'url': video_data.get(key), 'format_id': q, 'width': r[0], 'height': r[1]