From 16f7e6be3a9d38352c630544b91c1e86b8cf2332 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sat, 11 Sep 2021 18:59:48 +0530 Subject: [PATCH] [bilibili]Add BiliIntlIE and BiliIntlSeriesIE (#907) Closes #611 Authored by: Ashish0804 --- yt_dlp/extractor/bilibili.py | 140 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 2 + 2 files changed, 142 insertions(+) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 8aab6a01b..0a81452c3 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -23,6 +23,7 @@ try_get, smuggle_url, str_or_none, + str_to_int, strip_jsonp, unified_timestamp, unsmuggle_url, @@ -774,3 +775,142 @@ def _real_extract(self, url): return self.url_result( 'http://www.bilibili.tv/video/av%s/' % video_id, ie=BiliBiliIE.ie_key(), video_id=video_id) + + +class BiliIntlBaseIE(InfoExtractor): + _API_URL = 'https://api.bili{}/intl/gateway{}' + + def _call_api(self, type, endpoint, id): + return self._download_json(self._API_URL.format(type, endpoint), id)['data'] + + def _get_subtitles(self, type, ep_id): + sub_json = self._call_api(type, f'/m/subtitle?ep_id={ep_id}&platform=web', ep_id) + subtitles = {} + for sub in sub_json.get('subtitles', []): + sub_url = sub.get('url') + if not sub_url: + continue + subtitles.setdefault(sub.get('key', 'en'), []).append({ + 'url': sub_url, + }) + return subtitles + + def _get_formats(self, type, ep_id): + video_json = self._call_api(type, f'/web/playurl?ep_id={ep_id}&platform=web', ep_id) + if not video_json: + self.raise_login_required(method='cookies') + video_json = video_json['playurl'] + formats = [] + for vid in video_json.get('video', []): + video_res = vid.get('video_resource') or {} + video_info = vid.get('stream_info') or {} + if not video_res.get('url'): + continue + formats.append({ + 'url': video_res['url'], + 'ext': 'mp4', + 'format_note': video_info.get('desc_words'), + 'width': video_res.get('width'), + 'height': video_res.get('height'), + 'vbr': video_res.get('bandwidth'), + 'acodec': 'none', + 'vcodec': video_res.get('codecs'), + 'filesize': video_res.get('size'), + }) + for aud in video_json.get('audio_resource', []): + if not aud.get('url'): + continue + formats.append({ + 'url': aud['url'], + 'ext': 'mp4', + 'abr': aud.get('bandwidth'), + 'acodec': aud.get('codecs'), + 'vcodec': 'none', + 'filesize': aud.get('size'), + }) + + self._sort_formats(formats) + return formats + + def _extract_ep_info(self, type, episode_data, ep_id): + return { + 'id': ep_id, + 'title': episode_data.get('long_title') or episode_data['title'], + 'thumbnail': episode_data.get('cover'), + 'episode_number': str_to_int(episode_data.get('title')), + 'formats': self._get_formats(type, ep_id), + 'subtitles': self._get_subtitles(type, ep_id), + 'extractor_key': BiliIntlIE.ie_key(), + } + + +class BiliIntlIE(BiliIntlBaseIE): + _VALID_URL = r'https?://(?:www\.)?bili(?Pbili\.tv|intl.com)/(?:[a-z]{2}/)?play/(?P\d+)/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.bilibili.tv/en/play/34613/341736', + 'info_dict': { + 'id': '341736', + 'ext': 'mp4', + 'title': 'The First Night', + 'thumbnail': 'https://i0.hdslb.com/bfs/intl/management/91e30e5521235d9b163339a26a0b030ebda54310.png', + 'episode_number': 2, + }, + 'params': { + 'format': 'bv', + }, + }, { + 'url': 'https://www.biliintl.com/en/play/34613/341736', + 'info_dict': { + 'id': '341736', + 'ext': 'mp4', + 'title': 'The First Night', + 'thumbnail': 'https://i0.hdslb.com/bfs/intl/management/91e30e5521235d9b163339a26a0b030ebda54310.png', + 'episode_number': 2, + }, + 'params': { + 'format': 'bv', + }, + }] + + def _real_extract(self, url): + type, season_id, id = self._match_valid_url(url).groups() + data_json = self._call_api(type, f'/web/view/ogv_collection?season_id={season_id}', id) + episode_data = next( + episode for episode in data_json.get('episodes', []) + if str(episode.get('ep_id')) == id) + return self._extract_ep_info(type, episode_data, id) + + +class BiliIntlSeriesIE(BiliIntlBaseIE): + _VALID_URL = r'https?://(?:www\.)?bili(?Pbili\.tv|intl.com)/(?:[a-z]{2}/)?play/(?P\d+)$' + _TESTS = [{ + 'url': 'https://www.bilibili.tv/en/play/34613', + 'playlist_mincount': 15, + 'info_dict': { + 'id': '34613', + }, + 'params': { + 'skip_download': True, + 'format': 'bv', + }, + }, { + 'url': 'https://www.biliintl.com/en/play/34613', + 'playlist_mincount': 15, + 'info_dict': { + 'id': '34613', + }, + 'params': { + 'skip_download': True, + 'format': 'bv', + }, + }] + + def _entries(self, id, type): + data_json = self._call_api(type, f'/web/view/ogv_collection?season_id={id}', id) + for episode in data_json.get('episodes', []): + episode_id = str(episode.get('ep_id')) + yield self._extract_ep_info(type, episode, episode_id) + + def _real_extract(self, url): + type, id = self._match_valid_url(url).groups() + return self.playlist_result(self._entries(id, type), playlist_id=id) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index c56bf5b2b..16bc78ffc 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -147,6 +147,8 @@ BilibiliAudioAlbumIE, BiliBiliPlayerIE, BilibiliChannelIE, + BiliIntlIE, + BiliIntlSeriesIE, ) from .biobiochiletv import BioBioChileTVIE from .bitchute import (