From 3ad0b7f422d547204df687b6d0b2d9110fff3990 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 24 Sep 2024 17:10:42 -0500 Subject: [PATCH] [ie/tiktok] Fix web formats extraction (#11074) Closes #11034 Authored by: bashonly --- yt_dlp/extractor/tiktok.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 9d823a315..f7e103fe9 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -542,16 +542,12 @@ def _extract_web_formats(self, aweme_detail): **COMMON_FORMAT_INFO, 'format_id': 'download', 'url': self._proto_relative_url(download_url), + 'format_note': 'watermarked', + 'preference': -2, }) self._remove_duplicate_formats(formats) - for f in traverse_obj(formats, lambda _, v: 'unwatermarked' not in v['url']): - f.update({ - 'format_note': join_nonempty(f.get('format_note'), 'watermarked', delim=', '), - 'preference': f.get('preference') or -2, - }) - # Is it a slideshow with only audio for download? if not formats and traverse_obj(aweme_detail, ('music', 'playUrl', {url_or_none})): audio_url = aweme_detail['music']['playUrl'] @@ -565,7 +561,8 @@ def _extract_web_formats(self, aweme_detail): 'vcodec': 'none', }) - return formats + # Filter out broken formats, see https://github.com/yt-dlp/yt-dlp/issues/11034 + return [f for f in formats if urllib.parse.urlparse(f['url']).hostname != 'www.tiktok.com'] def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id, extract_flat=False): author_info = traverse_obj(aweme_detail, (('authorInfo', 'author', None), {