From e820fbaa6ff41625b6f4d8453253883b86bf9ca4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 18 Oct 2021 15:23:42 +0530 Subject: [PATCH] Do not verify thumbnail URLs by default Partially reverts cca80fe6110653582e8c8a8d06490b4028ffd755 and 0ba692acc8feffd46b6e1085fb4a2849b685945c Unless `--check-formats` is specified, this causes yt-dlp to return incorrect thumbnail urls. See https://github.com/yt-dlp/yt-dlp/issues/340#issuecomment-877909966, #402 But the overhead in general use is not worth it Closes #694, #725 --- yt_dlp/YoutubeDL.py | 17 +++-------------- yt_dlp/extractor/common.py | 1 - yt_dlp/extractor/youtube.py | 7 ++----- 3 files changed, 5 insertions(+), 20 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4a7712cb6..cf97ff21c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2095,25 +2095,14 @@ def _sanitize_thumbnails(self, info_dict): t.get('url'))) def thumbnail_tester(): - if self.params.get('check_formats'): - test_all = True - to_screen = lambda msg: self.to_screen(f'[info] {msg}') - else: - test_all = False - to_screen = self.write_debug - def test_thumbnail(t): - if not test_all and not t.get('_test_url'): - return True - to_screen('Testing thumbnail %s' % t['id']) + self.to_screen(f'[info] Testing thumbnail {t["id"]}') try: self.urlopen(HEADRequest(t['url'])) except network_exceptions as err: - to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % ( - t['id'], t['url'], error_to_compat_str(err))) + self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') return False return True - return test_thumbnail for i, t in enumerate(thumbnails): @@ -2123,7 +2112,7 @@ def test_thumbnail(t): t['resolution'] = '%dx%d' % (t['width'], t['height']) t['url'] = sanitize_url(t['url']) - if self.params.get('check_formats') is not False: + if self.params.get('check_formats'): info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse() else: info_dict['thumbnails'] = thumbnails diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index dbe7dfcbf..0a14f7c0d 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -233,7 +233,6 @@ class InfoExtractor(object): * "resolution" (optional, string "{width}x{height}", deprecated) * "filesize" (optional, int) - * "_test_url" (optional, bool) - If true, test the URL thumbnail: Full URL to a video thumbnail image. description: Full video description. uploader: Full name of the video uploader. diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index b71cd4292..b9566a0a7 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2699,21 +2699,18 @@ def feed_entry(name): # The best resolution thumbnails sometimes does not appear in the webpage # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340 # List of possible thumbnails - Ref: - hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3'] - # TODO: Test them also? - For some videos, even these don't exist - guaranteed_thumbnail_names = [ + thumbnail_names = [ + 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3', 'hqdefault', 'hq1', 'hq2', 'hq3', '0', 'mqdefault', 'mq1', 'mq2', 'mq3', 'default', '1', '2', '3' ] - thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names n_thumbnail_names = len(thumbnail_names) thumbnails.extend({ 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format( video_id=video_id, name=name, ext=ext, webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''), - '_test_url': name in hq_thumbnail_names, } for name in thumbnail_names for ext in ('webp', 'jpg')) for thumb in thumbnails: i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)