From bccdbd22d559cc22b23bbd2ff96075ea5d88c944 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Mon, 6 Sep 2021 06:52:38 +0000 Subject: [PATCH] [Mediaklikk] Add Extractor (#867) Original PR: https://github.com/ytdl-org/youtube-dl/pull/17453, https://github.com/ytdl-org/youtube-dl/pull/25098 Fixes: https://github.com/ytdl-org/youtube-dl/issues/21431 Authored-by: tmarki, mrx23dot, coletdjnz --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/mediaklikk.py | 104 +++++++++++++++++++++++++++++++++ yt_dlp/utils.py | 1 + 3 files changed, 106 insertions(+) create mode 100644 yt_dlp/extractor/mediaklikk.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4ef581b07..c745fd079 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -725,6 +725,7 @@ from .matchtv import MatchTVIE from .mdr import MDRIE from .medaltv import MedalTVIE +from .mediaklikk import MediaKlikkIE from .mediaset import MediasetIE from .mediasite import ( MediasiteIE, diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py new file mode 100644 index 000000000..b9b6d739f --- /dev/null +++ b/yt_dlp/extractor/mediaklikk.py @@ -0,0 +1,104 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from ..utils import ( + unified_strdate +) +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_unquote, + compat_str +) + + +class MediaKlikkIE(InfoExtractor): + _VALID_URL = r'''(?x)^https?:\/\/(?:www\.)? + (?:mediaklikk|m4sport|hirado|petofilive)\.hu\/.*?videok?\/ + (?:(?P[0-9]{4})/(?P[0-9]{1,2})/(?P[0-9]{1,2})/)? + (?P[^/#?_]+)''' + + _TESTS = [{ + # mediaklikk. date in html. + 'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/', + 'info_dict': { + 'id': '4754129', + 'title': 'Hazajáró, DÉLNYUGAT-BÁCSKA – A Duna mentén Palánkától Doroszlóig', + 'ext': 'mp4', + 'upload_date': '20210901', + 'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg' + } + }, { + # m4sport + 'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/', + 'info_dict': { + 'id': '4754999', + 'title': 'Gyémánt Liga, Párizs', + 'ext': 'mp4', + 'upload_date': '20210830', + 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg' + } + }, { + # m4sport with *video/ url and no date + 'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/', + 'info_dict': { + 'id': '4492099', + 'title': 'Real Madrid - Chelsea 1-1', + 'ext': 'mp4', + 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png' + } + }, { + # hirado + 'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/', + 'info_dict': { + 'id': '4760120', + 'title': 'Feltételeket szabott a főváros', + 'ext': 'mp4', + 'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg' + } + }, { + # petofilive + 'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/', + 'info_dict': { + 'id': '4571948', + 'title': 'Tha Shudras az Akusztikban', + 'ext': 'mp4', + 'upload_date': '20210607', + 'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg' + } + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + display_id = mobj.group('id') + webpage = self._download_webpage(url, display_id) + + player_data_str = self._html_search_regex( + r'mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);', webpage, 'player data') + player_data = self._parse_json(player_data_str, display_id, compat_urllib_parse_unquote) + video_id = compat_str(player_data['contentId']) + title = player_data.get('title') or self._og_search_title(webpage, fatal=False) or \ + self._html_search_regex(r']+\bclass="article_title">([^<]+)<', webpage, 'title') + + upload_date = unified_strdate( + '%s-%s-%s' % (mobj.group('year'), mobj.group('month'), mobj.group('day'))) + if not upload_date: + upload_date = unified_strdate(self._html_search_regex( + r']+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None)) + + player_data['video'] = player_data.pop('token') + player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data) + playlist_url = self._proto_relative_url(compat_urllib_parse_unquote( + self._html_search_regex(r'\"file\":\s*\"(\\?/\\?/.*playlist\.m3u8)\"', player_page, 'playlist_url')).replace('\\/', '/')) + + formats = self._extract_wowza_formats( + playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash']) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'display_id': display_id, + 'formats': formats, + 'upload_date': upload_date, + 'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage) + } diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index ce84f7416..983ca6ced 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1740,6 +1740,7 @@ def random_user_agent(): '%b %dth %Y %I:%M', '%Y %m %d', '%Y-%m-%d', + '%Y.%m.%d.', '%Y/%m/%d', '%Y/%m/%d %H:%M', '%Y/%m/%d %H:%M:%S',