From 58c920f79aeb9a2c16bb27bf6d2b0780405da097 Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Sun, 22 Sep 2024 01:09:26 +0530 Subject: [PATCH] [ie/subsplash] added SubsplashVideoIE & SubsplashPlaylistIE --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/subsplash.py | 112 ++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 yt_dlp/extractor/subsplash.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index d8abf0b5d..6e5816d5f 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1977,6 +1977,7 @@ from .stretchinternet import StretchInternetIE from .stripchat import StripchatIE from .stv import STVPlayerIE +from .subsplash import SubsplashPlaylistIE, SubsplashVideoIE from .substack import SubstackIE from .sunporno import SunPornoIE from .sverigesradio import ( diff --git a/yt_dlp/extractor/subsplash.py b/yt_dlp/extractor/subsplash.py new file mode 100644 index 000000000..e1d1266fa --- /dev/null +++ b/yt_dlp/extractor/subsplash.py @@ -0,0 +1,112 @@ +import functools +import math + +from .common import InfoExtractor +from ..utils import ( + InAdvancePagedList, + int_or_none, + str_or_none, + traverse_obj, + unified_strdate, + url_or_none, +) + + +class SubsplashVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?subsplash\.com/u/[^/]+/media/d/(?P\w+)' + _TESTS = [{ + 'url': 'https://subsplash.com/u/skywatchtv/media/d/5whnx5s-the-grand-delusion-taking-place-right-now', + 'md5': '2d67c50deac3c6c49c6e25c4a5b25afe', + 'info_dict': { + 'id': '33f8d305-68ab-414c-acf2-f2317a0abe21', + 'ext': 'mp4', + 'title': 'THE GRAND DELUSION TAKING PLACE RIGHT NOW!', + 'description': 'md5:220a630865c3697b0ec9dcb3a70cbc33', + 'upload_date': '20240901', + 'duration': 1710, + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + }, + }, { + 'url': 'https://subsplash.com/u/prophecywatchers/media/d/n4dr8b2-the-transhumanist-plan-for-humanity-billy-crone', + 'md5': 'f7b4109ba7f012dff953391d6b400730', + 'info_dict': { + 'id': 'e16348f1-040e-4596-b922-77b45fa8d253', + 'ext': 'mp4', + 'title': 'The Transhumanist Plan for Humanity | Billy Crone', + 'description': None, + 'upload_date': '20240903', + 'duration': 1709, + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + }, + }] + + def _fetch_json(self, url, display_id, token): + return self._download_json(url, display_id, headers={'Authorization': f'Bearer {token}'}) + + def _extract_metadata(self, data, display_id): + return traverse_obj(data, { + 'id': ('id', {str_or_none}), + 'title': ('title', {str_or_none}), + 'description': ('summary_text', {str_or_none}), + 'thumbnail': ('_embedded', 'images', 0, '_links', 'related', 'href', {url_or_none}), + 'duration': ('_embedded', 'video', 'duration', {lambda x: int_or_none(x, 1000)}), + 'upload_date': ('published_at', {unified_strdate}), + 'formats': ('_embedded', 'video', '_embedded', 'playlists', 0, '_links', 'related', 'href', + {lambda url: self._extract_m3u8_formats(url, display_id)}), + }) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage, urlh = self._download_webpage_handle(url, display_id) + token = urlh.get_header('set-cookie').split(';')[0].split('=')[1].strip() + metadata_url = f'https://core.subsplash.com/media/v1/media-items?filter[short_code]={display_id}&include=images,audio.audio-outputs,audio.video,video.video-outputs,video.playlists,document,broadcast' + metadata = self._fetch_json(metadata_url, display_id, token) + return self._extract_metadata(traverse_obj(metadata, ('_embedded', 'media-items', 0, {dict})), display_id) + + +class SubsplashPlaylistIE(SubsplashVideoIE): + IE_NAME = 'subsplash:playlist' + _VALID_URL = r'https?://(?:www\.)?subsplash\.com/[^/]+/(?:our-videos|media)/ms/\+(?P\w+)' + _PAGE_SIZE = 15 + _TESTS = [{ + 'url': 'https://subsplash.com/skywatchtv/our-videos/ms/+dbyjzp8', + 'info_dict': { + 'id': 'dbyjzp8', + 'title': 'Five in Ten', + }, + 'playlist_mincount': 24, + }, { + 'url': 'https://subsplash.com/prophecywatchers/media/ms/+n42mr48', + 'info_dict': { + 'id': 'n42mr48', + 'title': 'Road to Zion Series', + }, + 'playlist_mincount': 13, + }, { + 'url': 'https://subsplash.com/prophecywatchers/media/ms/+918b9f6', + 'only_matching': True, + }] + + def _get_entries(self, token, series_id, page): + url = f'https://core.subsplash.com/media/v1/media-items?filter[broadcast.status|broadcast.status]=null|on-demand&filter[media_series]={series_id}&filter[status]=published&include=images,audio.audio-outputs,audio.video,video.video-outputs,video.playlists,document&page[number]={page + 1}&page[size]={self._PAGE_SIZE}&sort=-position' + data = self._fetch_json(url, f'{series_id}_{page}', token) + entries = traverse_obj(data, ('_embedded', 'media-items', {list})) + for entry in entries: + yield self._extract_metadata(entry, series_id) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage, urlh = self._download_webpage_handle(url, display_id) + token = urlh.get_header('x-api-token') + + series_url = f'https://core.subsplash.com/media/v1/media-series?filter[short_code]={display_id}' + json_data = self._fetch_json(series_url, display_id, token) + series_data = traverse_obj(json_data, ('_embedded', 'media-series', 0, { + 'id': ('id', {str}), + 'title': ('title', {str}), + 'count': ('media_items_count', {int}), + })) + total_pages = math.ceil(series_data['count'] / self._PAGE_SIZE) + + entries = InAdvancePagedList(functools.partial(self._get_entries, token, series_data['id']), total_pages, self._PAGE_SIZE) + return self.playlist_result(entries, display_id, series_data['title'])