From 4e7f375c949cb152ae953aa834098351f8e5a872 Mon Sep 17 00:00:00 2001 From: Dosychev Peter Date: Mon, 11 Jul 2022 02:18:12 +0300 Subject: [PATCH] [extractor/theholetv] Add extractor (#4325) Authored by: dosy4ev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/theholetv.py | 36 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 yt_dlp/extractor/theholetv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index a7a915fa5..24d066fbe 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1718,6 +1718,7 @@ from .testurl import TestURLIE from .tf1 import TF1IE from .tfo import TFOIE +from .theholetv import TheHoleTvIE from .theintercept import TheInterceptIE from .theplatform import ( ThePlatformIE, diff --git a/yt_dlp/extractor/theholetv.py b/yt_dlp/extractor/theholetv.py new file mode 100644 index 000000000..f0a096d41 --- /dev/null +++ b/yt_dlp/extractor/theholetv.py @@ -0,0 +1,36 @@ +from .common import InfoExtractor +from ..utils import extract_attributes, remove_end + + +class TheHoleTvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?the-hole\.tv/episodes/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://the-hole.tv/episodes/gromkii-vopros-sergey-orlov', + 'md5': 'fea6682f47786f3ae5a6cbd635ec4bf9', + 'info_dict': { + 'id': 'gromkii-vopros-sergey-orlov', + 'ext': 'mp4', + 'title': 'Сергей Орлов — Громкий вопрос', + 'thumbnail': 'https://assets-cdn.the-hole.tv/images/t8gan4n6zn627e7wni11b2uemqts', + 'description': 'md5:45741a9202331f995d9fb76996759379' + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + player_attrs = extract_attributes(self._search_regex( + r'(]*\bdata-controller="player"[^>]*>)', webpage, 'video player')) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + player_attrs['data-player-source-value'], video_id, 'mp4') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': remove_end(self._html_extract_title(webpage), ' — The Hole'), + 'description': self._og_search_description(webpage), + 'thumbnail': player_attrs.get('data-player-poster-value'), + 'formats': formats, + 'subtitles': subtitles + }