From df1b9ab6887ea07170290bf118e10658f803cf53 Mon Sep 17 00:00:00 2001
From: c-basalt <117849907+c-basalt@users.noreply.github.com>
Date: Mon, 26 Feb 2024 00:09:06 -0500
Subject: [PATCH] rename and testcase fix

---
 yt_dlp/extractor/bilibili.py | 169 +++++++++++++++++++----------------
 1 file changed, 91 insertions(+), 78 deletions(-)

diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index a3f1cf8d6..767eb7ff8 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -123,6 +123,49 @@ def extract_formats(self, play_info):
             })
         return formats
 
+    def _get_wbi_key(self, video_id):
+        if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
+            return self._wbi_key_cache['key']
+
+        session_data = self._download_json(
+            'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
+
+        lookup = ''.join(traverse_obj(session_data, (
+            'data', 'wbi_img', ('img_url', 'sub_url'),
+            {lambda x: x.rpartition('/')[2].partition('.')[0]})))
+
+        mixin_key_enc_tab = [
+            46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
+            33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
+            61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
+            36, 20, 34, 44, 52
+        ]
+
+        self._wbi_key_cache.update({
+            'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
+            'ts': time.time(),
+        })
+        return self._wbi_key_cache['key']
+
+    def _sign_wbi(self, params, video_id):
+        params['wts'] = round(time.time())
+        params = {
+            k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
+            for k, v in sorted(params.items())
+        }
+        query = urllib.parse.urlencode(params)
+        params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
+        return params
+
+    def _download_playinfo(self, bvid, cid, headers={}, qn=None):
+        params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
+        if qn:
+            params['qn'] = qn
+        return self._download_json(
+            'https://api.bilibili.com/x/player/wbi/playurl', bvid,
+            query=self._sign_wbi(params, bvid), headers=headers,
+            note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
+
     def json2srt(self, json_data):
         srt_data = ''
         for idx, line in enumerate(json_data.get('body') or []):
@@ -199,49 +242,6 @@ def _get_episodes_from_season(self, ss_id, url):
                 lambda _, v: url_or_none(v['share_url']) and v['id'])):
             yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
 
-    def _get_wbi_key(self, video_id):
-        if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
-            return self._wbi_key_cache['key']
-
-        session_data = self._download_json(
-            'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
-
-        lookup = ''.join(traverse_obj(session_data, (
-            'data', 'wbi_img', ('img_url', 'sub_url'),
-            {lambda x: x.rpartition('/')[2].partition('.')[0]})))
-
-        mixin_key_enc_tab = [
-            46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
-            33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
-            61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
-            36, 20, 34, 44, 52
-        ]
-
-        self._wbi_key_cache.update({
-            'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
-            'ts': time.time(),
-        })
-        return self._wbi_key_cache['key']
-
-    def _sign_wbi(self, params, video_id):
-        params['wts'] = round(time.time())
-        params = {
-            k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
-            for k, v in sorted(params.items())
-        }
-        query = urllib.parse.urlencode(params)
-        params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
-        return params
-
-    def _get_play_url(self, bvid, cid, headers={}, qn=None):
-        params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
-        if qn:
-            params['qn'] = qn
-        return self._download_json(
-            'https://api.bilibili.com/x/player/wbi/playurl', bvid,
-            query=self._sign_wbi(params, bvid), headers=headers,
-            note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
-
     def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
         cid_edges = cid_edges or {}
         division_data = self._download_json(
@@ -278,7 +278,7 @@ def _get_interactive_entries(self, video_id, cid, metainfo):
             ('data', 'interaction', 'graph_version', {int_or_none}))
         cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
         for cid, edges in cid_edges.items():
-            play_info = self._get_play_url(video_id, cid, metainfo.get('http_headers', {}))
+            play_info = self._download_playinfo(video_id, cid, metainfo.get('http_headers', {}))
             yield {
                 **metainfo,
                 'id': f'{video_id}_{cid}',
@@ -380,28 +380,6 @@ class BiliBiliIE(BilibiliBaseIE):
             'duration': 90.314,
             '_old_archive_ids': ['bilibili 498159642_part1'],
         }
-    }, {
-        'note': 'video has subtitles',
-        'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
-        'info_dict': {
-            'id': 'BV12N4y1M7rh',
-            'ext': 'mp4',
-            'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
-            'tags': list,
-            'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
-            'duration': 313.557,
-            'upload_date': '20220709',
-            'uploader': '小夫太渴',
-            'timestamp': 1657347907,
-            'uploader_id': '1326814124',
-            'comment_count': int,
-            'view_count': int,
-            'like_count': int,
-            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
-            'subtitles': 'count:2',  # login required for CC subtitle
-            '_old_archive_ids': ['bilibili 898179753_part1'],
-        },
-        'params': {'listsubtitles': True},
     }, {
         'url': 'https://www.bilibili.com/video/av8903802/',
         'info_dict': {
@@ -487,12 +465,11 @@ class BiliBiliIE(BilibiliBaseIE):
         'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
         'info_dict': {
             'id': 'BV1ms411Q7vw_p4',
-            'ext': 'mp4',
             'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
             'timestamp': 1458222815,
             'upload_date': '20160317',
             'description': '云南方言快乐生产线出品',
-            'duration': 6839.289,
+            'duration': float,
             'uploader': '一笑颠天',
             'uploader_id': '3916081',
             'view_count': int,
@@ -505,9 +482,12 @@ class BiliBiliIE(BilibiliBaseIE):
         'params': {'extractor_args': {'bilibili': {'_prefer_multi_flv': ['32']}}},
         'playlist_count': 19,
         'playlist': [{
-            'id': 'BV1ms411Q7vw_p4_0',
-            'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
-            'duration': 399.102,
+            'info_dict': {
+                'id': 'BV1ms411Q7vw_p4_0',
+                'ext': 'flv',
+                'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
+                'duration': 399.102,
+            },
         }],
     }, {
         'note': 'legacy mp4-only video',
@@ -588,6 +568,29 @@ class BiliBiliIE(BilibiliBaseIE):
             'upload_date': '20191021',
             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
         },
+    }, {
+        'note': 'video has subtitles, which requires login',
+        'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
+        'info_dict': {
+            'id': 'BV12N4y1M7rh',
+            'ext': 'mp4',
+            'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
+            'tags': list,
+            'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
+            'duration': 313.557,
+            'upload_date': '20220709',
+            'uploader': '小夫太渴',
+            'timestamp': 1657347907,
+            'uploader_id': '1326814124',
+            'comment_count': int,
+            'view_count': int,
+            'like_count': int,
+            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            'subtitles': 'count:2',  # login required for CC subtitle
+            '_old_archive_ids': ['bilibili 898179753_part1'],
+        },
+        'params': {'listsubtitles': True},
+        'skip': 'login required for subtitle',
     }, {
         'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
         'info_dict': {
@@ -675,7 +678,7 @@ def _real_extract(self, url):
 
         festival_info = {}
         if is_festival:
-            play_info = self._get_play_url(video_id, cid, headers)
+            play_info = self._download_playinfo(video_id, cid, headers)
 
             festival_info = traverse_obj(initial_state, {
                 'uploader': ('videoInfo', 'upName'),
@@ -719,12 +722,12 @@ def _real_extract(self, url):
                 has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
                 for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
                     formats.extend(traverse_obj(
-                        self.extract_formats(self._get_play_url(video_id, cid, headers=headers, qn=qn)),
+                        self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
                         (lambda _, v: not has_qn(v.get('quality')))))
                 self.check_missing_formats(play_info, formats)
                 if traverse_obj(formats, lambda _, v: v['fragments']):
                     if not self._configuration_arg('_prefer_multi_flv'):
-                        # `_prefer_multi_flv` is mainly for writing test case since user can hardly need this
+                        # `_prefer_multi_flv` is mainly for writing test case, user should hardly need this
                         dropping = ', '.join(traverse_obj(formats, (
                             lambda _, v: v['fragments'], {lambda x: f'{x["format"]} ({x["format_id"]})'})))
                         formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
@@ -1150,12 +1153,14 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
             'id': '3985676',
         },
         'playlist_mincount': 178,
+        'skip': 'login required',
     }, {
         'url': 'https://space.bilibili.com/313580179/video',
         'info_dict': {
             'id': '313580179',
         },
         'playlist_mincount': 92,
+        'skip': 'login required',
     }]
 
     def _real_extract(self, url):
@@ -1419,7 +1424,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
     _TESTS = [{
         'url': 'https://www.bilibili.com/watchlater/#/list',
-        'info_dict': {'id': 'watchlater'},
+        'info_dict': {
+            'id': r're:\d+',
+            'title': '稍后再看',
+        },
         'playlist_mincount': 0,
         'skip': 'login required',
     }]
@@ -1495,14 +1503,19 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
         'skip': 'redirect url',
     }, {
         'url': 'https://www.bilibili.com/list/watchlater',
-        'info_dict': {'id': 'watchlater'},
+        'info_dict': {
+            'id': r're:2_\d+',
+            'title': '稍后再看',
+            'uploader': str,
+            'uploader_id': str,
+        },
         'playlist_mincount': 0,
         'skip': 'login required',
     }, {
         'url': 'https://www.bilibili.com/medialist/play/watchlater',
         'info_dict': {'id': 'watchlater'},
         'playlist_mincount': 0,
-        'skip': 'login required',
+        'skip': 'redirect url & login required',
     }]
 
     def _extract_medialist(self, query, list_id):
@@ -1553,7 +1566,7 @@ def _real_extract(self, url):
                 'title': ('title', {str}),
                 'uploader': ('upper', 'name', {str}),
                 'uploader_id': ('upper', 'mid', {str_or_none}),
-                'timestamp': ('ctime', {int_or_none}),
+                'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
                 'thumbnail': ('cover', {url_or_none}),
             })),
         }