From a0d9967f6822fc279e86bce33464194985148727 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 13 Jun 2024 18:22:30 -0500 Subject: [PATCH] [ie/youtube:tab] Fix channel metadata extraction (#10071) Closes #9893, Closes #10090 Authored by: bashonly, shoxie007 Co-authored-by: shoxie007 <74592022+shoxie007@users.noreply.github.com> --- yt_dlp/extractor/youtube.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index a227f2425..a89744eb1 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -885,14 +885,14 @@ def _get_count(self, data, *path_list): return count @staticmethod - def _extract_thumbnails(data, *path_list): + def _extract_thumbnails(data, *path_list, final_key='thumbnails'): """ Extract thumbnails from thumbnails dict @param path_list: path list to level that contains 'thumbnails' key """ thumbnails = [] for path in path_list or [()]: - for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)): + for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)): thumbnail_url = url_or_none(thumbnail.get('url')) if not thumbnail_url: continue @@ -5124,6 +5124,10 @@ def _extract_metadata_from_tabs(self, item_id, data): else: metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict) + # pageHeaderViewModel slow rollout began April 2024 + page_header_view_model = traverse_obj(data, ( + 'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict})) + # We can get the uncropped banner/avatar by replacing the crop params with '=s0' # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714 def _get_uncropped(url): @@ -5139,8 +5143,10 @@ def _get_uncropped(url): 'preference': 1, }) - channel_banners = self._extract_thumbnails( - data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner'))) + channel_banners = ( + self._extract_thumbnails(data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner'))) + or self._extract_thumbnails( + page_header_view_model, ('banner', 'imageBannerViewModel', 'image'), final_key='sources')) for banner in channel_banners: banner['preference'] = -10 @@ -5167,7 +5173,11 @@ def _get_uncropped(url): or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or info['id']), 'availability': self._extract_availability(data), - 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')), + 'channel_follower_count': ( + self._get_count(data, ('header', ..., 'subscriberCountText')) + or traverse_obj(page_header_view_model, ( + 'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts', + lambda _, v: 'subscribers' in v['text']['content'], 'text', 'content', {parse_count}, any))), 'description': try_get(metadata_renderer, lambda x: x.get('description', '')), 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str})) or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),