From 2c4bba96acb64e23470ccae804c659b56ebb93b5 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Mon, 11 Oct 2021 03:36:27 +0530 Subject: [PATCH] [EUScreen] Add Extractor (#1219) Closes #1207 Authored by: Ashish0804 --- yt_dlp/extractor/euscreen.py | 64 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 65 insertions(+) create mode 100644 yt_dlp/extractor/euscreen.py diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py new file mode 100644 index 000000000..3980c2349 --- /dev/null +++ b/yt_dlp/extractor/euscreen.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + parse_duration, + js_to_json, +) + + +class EUScreenIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?euscreen\.eu/item.html\?id=(?P[^&?$/]+)' + + _TESTS = [{ + 'url': 'https://euscreen.eu/item.html?id=EUS_0EBCBF356BFC4E12A014023BA41BD98C', + 'info_dict': { + 'id': 'EUS_0EBCBF356BFC4E12A014023BA41BD98C', + 'ext': 'mp4', + 'title': "L'effondrement du stade du Heysel", + 'alt_title': 'Collapse of the Heysel Stadium', + 'duration': 318.0, + 'description': 'md5:f0ffffdfce6821139357a1b8359d6152', + 'series': 'JA2 DERNIERE', + 'episode': '-', + 'uploader': 'INA / France', + 'thumbnail': 'http://images3.noterik.com/domain/euscreenxl/user/eu_ina/video/EUS_0EBCBF356BFC4E12A014023BA41BD98C/image.jpg' + }, + 'params': {'skip_download': True} + }] + + _payload = b'-1Win32MozillaNetscape5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36true784758undefinedSat, 07 Oct 2021 08:56:50 GMT1633769810758' + + def _real_extract(self, url): + id = self._match_id(url) + args_for_js_request = self._download_webpage( + 'https://euscreen.eu/lou/LouServlet/domain/euscreenxl/html5application/euscreenxlitem', + id, data=self._payload, query={'actionlist': 'itempage', 'id': id}) + info_js = self._download_webpage( + 'https://euscreen.eu/lou/LouServlet/domain/euscreenxl/html5application/euscreenxlitem', + id, data=args_for_js_request.replace('screenid', 'screenId').encode()) + video_json = self._parse_json( + self._search_regex(r'setVideo\(({.+})\)\(\$end\$\)put', info_js, 'Video JSON'), + id, transform_source=js_to_json) + meta_json = self._parse_json( + self._search_regex(r'setData\(({.+})\)\(\$end\$\)', info_js, 'Metadata JSON'), + id, transform_source=js_to_json) + formats = [{ + 'url': source['src'], + } for source in video_json.get('sources', [])] + self._sort_formats(formats) + + return { + 'id': id, + 'title': meta_json.get('originalTitle'), + 'alt_title': meta_json.get('title'), + 'duration': parse_duration(meta_json.get('duration')), + 'description': '%s\n%s' % (meta_json.get('summaryOriginal', ''), meta_json.get('summaryEnglish', '')), + 'series': meta_json.get('series') or meta_json.get('seriesEnglish'), + 'episode': meta_json.get('episodeNumber'), + 'uploader': meta_json.get('provider'), + 'thumbnail': meta_json.get('screenshot') or video_json.get('screenshot'), + 'formats': formats, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 0a761135e..adf54ca7e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -420,6 +420,7 @@ ) from .esri import EsriVideoIE from .europa import EuropaIE +from .euscreen import EUScreenIE from .expotv import ExpoTVIE from .expressen import ExpressenIE from .extremetube import ExtremeTubeIE