1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-10-04 16:37:09 +02:00

modified: yt_dlp/extractor/bilibili.py

This commit is contained in:
grqx_wsl 2024-06-30 20:53:08 +12:00
parent 72fac58401
commit ddca238423

View File

@ -1530,38 +1530,44 @@ class BiliBiliSearchAllIE(SearchInfoExtractor):
def _search_results(self, query): def _search_results(self, query):
headers = self.geo_verification_headers() headers = self.geo_verification_headers()
page_size = 50
live_room_prefix = 'https://live.bilibili.com/' live_room_prefix = 'https://live.bilibili.com/'
bili_user_prefix = 'https://space.bilibili.com/' bili_user_prefix = 'https://space.bilibili.com/'
if not self._get_cookies('https://api.bilibili.com').get('buvid3'): if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc') self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
for page_num in itertools.count(1): for page_num in itertools.count(1):
query_params = {
'keyword': query,
'page': page_num,
'dynamic_offset': (page_num - 1) * page_size,
'platform': 'pc',
}
api_url = r'https://api.bilibili.com/x/web-interface/search/all/v2'
try: try:
search_all_result = self._download_json( search_all_result = self._download_json(
r'https://api.bilibili.com/x/web-interface/search/all/v2', api_url, video_id=query, query=query_params, headers=headers)
video_id=query, query={
'keyword': query,
'page': page_num,
}, headers=headers)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 412: if isinstance(e.cause, HTTPError) and e.cause.status == 412:
raise ExtractorError('Request is blocked by server (-412).', expected=True) raise ExtractorError('Request is blocked by server (-412).', expected=True)
raise
status_code = search_all_result['code'] status_code = search_all_result['code']
if status_code == -400: if status_code == -400:
raise ExtractorError('Invalid request (-400).', expected=True) raise ExtractorError('Invalid request (-400).', expected=True)
result_list = search_all_result['data'].get('result') result_list = search_all_result['data'].get('result')
if result_list is None: if not result_list:
self.write_debug(f'Response: {search_all_result}') self.write_debug(f'Response: {search_all_result}')
raise ExtractorError(f'Result not found in the response ({status_code}).', raise ExtractorError(f'Result not found in the response ({status_code}).',
expected=True) expected=True)
for result_type_dict in result_list: for result_type_dict in result_list:
for result_data in result_type_dict['data']: for result_data in result_type_dict['data']:
if result_data['type'] == 'video': result_type = result_data.get('type')
if result_type == 'video':
yield self.url_result(result_data['arcurl']) yield self.url_result(result_data['arcurl'])
elif result_data['type'] == 'live_room': elif result_type == 'live_room':
yield self.url_result(live_room_prefix + str(result_data['roomid'])) yield self.url_result(live_room_prefix + str(result_data['roomid']))
elif result_data['type'] in ['media_ft', 'media_bangumi']: elif result_type in ['media_ft', 'media_bangumi']:
yield self.url_result(result_data['url']) yield self.url_result(result_data['url'])
elif result_data['type'] == 'bili_user': elif result_type == 'bili_user':
yield self.url_result(bili_user_prefix + str(result_data['mid'])) yield self.url_result(bili_user_prefix + str(result_data['mid']))
@ -2315,7 +2321,7 @@ class BiliBiliSearchPageIE(BilibiliBaseIE):
_VALID_URL = r'https?://search\.bilibili\.com/(?P<type>all|video|bangumi|pgc|live|upuser).*' _VALID_URL = r'https?://search\.bilibili\.com/(?P<type>all|video|bangumi|pgc|live|upuser).*'
_TESTS = [{ _TESTS = [{
'url': r'https://search.bilibili.com/all?keyword=yt+-+dlp+%E4%B8%8B%E8%BD%BD%E5%99%A8', 'url': r'https://search.bilibili.com/all?keyword=yt+-+dlp+%E4%B8%8B%E8%BD%BD%E5%99%A8',
'playlist_count': 20, 'playlist_count': 36,
'info_dict': { 'info_dict': {
'id': 'yt - dlp 下载器', 'id': 'yt - dlp 下载器',
'title': 'yt - dlp 下载器', 'title': 'yt - dlp 下载器',
@ -2330,7 +2336,7 @@ class BiliBiliSearchPageIE(BilibiliBaseIE):
'skip': 'geo-restricted', 'skip': 'geo-restricted',
}, { }, {
'url': r'https://search.bilibili.com/video?keyword=%E8%AE%A9%E5%AD%90%E5%BC%B9%E9%A3%9E&from_source=webtop_search&spm_id_from=333.1007&search_source=5&order=dm&duration=4&tids=181&page=3&o=72', 'url': r'https://search.bilibili.com/video?keyword=%E8%AE%A9%E5%AD%90%E5%BC%B9%E9%A3%9E&from_source=webtop_search&spm_id_from=333.1007&search_source=5&order=dm&duration=4&tids=181&page=3&o=72',
'playlist_mincount': 5, 'playlist_count': 4,
'info_dict': { 'info_dict': {
'id': '让子弹飞', 'id': '让子弹飞',
'title': '让子弹飞', 'title': '让子弹飞',
@ -2338,15 +2344,20 @@ class BiliBiliSearchPageIE(BilibiliBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
live_room_prefix = 'https://live.bilibili.com/'
bili_user_prefix = 'https://space.bilibili.com/'
headers = self.geo_verification_headers() headers = self.geo_verification_headers()
entries = [] entries = []
params = parse_qs(url) params = parse_qs(url)
query = {} query = {
'platform': 'pc',
'page_size': 36,
}
if not self._get_cookies('https://api.bilibili.com').get('buvid3'): if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc') self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
search_type = self._match_valid_url(url).group('type') search_type = self._match_valid_url(url).group('type')
raw_playlist_id = traverse_obj(params, ('keyword', 0)) raw_playlist_id = traverse_obj(params, ('keyword', 0))
if raw_playlist_id is None: if not raw_playlist_id:
raise ExtractorError('Please specify the keyword to search for!', expected=True) raise ExtractorError('Please specify the keyword to search for!', expected=True)
playlist_id = urllib.parse.unquote_plus(raw_playlist_id) playlist_id = urllib.parse.unquote_plus(raw_playlist_id)
search_type_mapping = { search_type_mapping = {
@ -2355,6 +2366,7 @@ def _real_extract(self, url):
'pgc': 'media_ft', 'pgc': 'media_ft',
'live': 'live_room', 'live': 'live_room',
'upuser': 'bili_user', 'upuser': 'bili_user',
'all': 'video', # 'all' search calls video search after page 1
} }
valid_params = [ valid_params = [
'keyword', 'keyword',
@ -2362,7 +2374,7 @@ def _real_extract(self, url):
'order', 'order',
'duration', 'duration',
'tids', 'tids',
'search_type', 'search_type', # Only when searching for live_room or live_user
'order_sort', 'order_sort',
'user_type', 'user_type',
] ]
@ -2370,11 +2382,17 @@ def _real_extract(self, url):
param_value = traverse_obj(params, (valid_param, 0)) param_value = traverse_obj(params, (valid_param, 0))
if param_value is not None: if param_value is not None:
query[valid_param] = param_value query[valid_param] = param_value
live_room_prefix = 'https://live.bilibili.com/' page_num = int(query.get('page', 1))
bili_user_prefix = 'https://space.bilibili.com/' param_offset = int_or_none(traverse_obj(params, ('o', 0)))
if page_num == 1:
query['dynamic_offset'] = 0
elif param_offset is not None:
query['dynamic_offset'] = param_offset
else:
query['dynamic_offset'] = query['page_size'] * (page_num - 1)
if search_type == 'live' and traverse_obj(params, ('search_type', 0)) == 'live_user': if search_type == 'live' and traverse_obj(params, ('search_type', 0)) == 'live_user':
raise ExtractorError('Live users are not downloadable!', expected=True) raise ExtractorError('Live users are not downloadable!', expected=True)
if search_type == 'all': if search_type == 'all' and page_num == 1:
try: try:
search_all_result = self._download_json( search_all_result = self._download_json(
r'https://api.bilibili.com/x/web-interface/search/all/v2', r'https://api.bilibili.com/x/web-interface/search/all/v2',
@ -2387,18 +2405,20 @@ def _real_extract(self, url):
if status_code == -400: if status_code == -400:
raise ExtractorError('Invalid request (-400).', expected=True) raise ExtractorError('Invalid request (-400).', expected=True)
result_list = search_all_result['data'].get('result') result_list = search_all_result['data'].get('result')
if result_list is None: if not result_list:
self.write_debug(f'Response: {search_all_result}')
raise ExtractorError(f'Result not found in the response ({status_code}).', raise ExtractorError(f'Result not found in the response ({status_code}).',
expected=True) expected=True)
for result_type_dict in result_list: for result_type_dict in result_list:
for result_data in result_type_dict['data']: for result_data in result_type_dict['data']:
if result_data['type'] == 'video': result_type = result_data.get('type')
if result_type == 'video':
entries.append(self.url_result(result_data['arcurl'])) entries.append(self.url_result(result_data['arcurl']))
elif result_data['type'] == 'live_room': elif result_type == 'live_room':
entries.append(self.url_result(live_room_prefix + str(result_data['roomid']))) entries.append(self.url_result(live_room_prefix + str(result_data['roomid'])))
elif result_data['type'] in ['media_ft', 'media_bangumi']: elif result_type in ['media_ft', 'media_bangumi']:
entries.append(self.url_result(result_data['url'])) entries.append(self.url_result(result_data['url']))
elif result_data['type'] == 'bili_user': elif result_type == 'bili_user':
entries.append(self.url_result(bili_user_prefix + str(result_data['mid']))) entries.append(self.url_result(bili_user_prefix + str(result_data['mid'])))
else: else:
try: try:
@ -2416,18 +2436,20 @@ def _real_extract(self, url):
if status_code == -400: if status_code == -400:
raise ExtractorError('Invalid request (-400).') raise ExtractorError('Invalid request (-400).')
result_list = search_type_result['data'].get('result') result_list = search_type_result['data'].get('result')
if result_list is None: if not result_list:
self.write_debug(f'Response: {search_type_result}')
raise ExtractorError( raise ExtractorError(
f'Result not found in the response ({status_code}). ' f'Result not found in the response ({status_code}). '
'You might want to try a VPN or a proxy server (with --proxy)', expected=True) 'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
for result_data in result_list: for result_data in result_list:
if result_data['type'] == 'video': result_type = result_data.get('type')
if result_type == 'video':
entries.append(self.url_result(result_data['arcurl'])) entries.append(self.url_result(result_data['arcurl']))
elif result_data['type'] == 'live_room': elif result_type == 'live_room':
entries.append(self.url_result(live_room_prefix + str(result_data['roomid']))) entries.append(self.url_result(live_room_prefix + str(result_data['roomid'])))
elif result_data['type'] in ['media_ft', 'media_bangumi']: elif result_type in ['media_ft', 'media_bangumi']:
entries.append(self.url_result(result_data['url'])) entries.append(self.url_result(result_data['url']))
elif result_data['type'] == 'bili_user': elif result_type == 'bili_user':
entries.append(self.url_result(bili_user_prefix + str(result_data['mid']))) entries.append(self.url_result(bili_user_prefix + str(result_data['mid'])))
return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=playlist_id) return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=playlist_id)