From 7f51861b1820c37b157a239b1fe30628d907c034 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Wed, 1 Mar 2023 07:56:53 +0000 Subject: [PATCH] [extractor/youtube] Detect and break on looping comments (#6301) Fixes https://github.com/yt-dlp/yt-dlp/issues/6290 Authored by: coletdjnz --- yt_dlp/extractor/youtube.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 44e932293..b02e0153a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3341,6 +3341,13 @@ def extract_thread(contents): comment = self._extract_comment(comment_renderer, parent) if not comment: continue + # Sometimes YouTube may break and give us infinite looping comments. + # See: https://github.com/yt-dlp/yt-dlp/issues/6290 + if comment['id'] in tracker['seen_comment_ids']: + self.report_warning('Detected YouTube comments looping. Stopping comment extraction as we probably cannot get any more.') + yield + else: + tracker['seen_comment_ids'].add(comment['id']) tracker['running_total'] += 1 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1 @@ -3365,7 +3372,8 @@ def extract_thread(contents): est_total=0, current_page_thread=0, total_parent_comments=0, - total_reply_comments=0) + total_reply_comments=0, + seen_comment_ids=set()) # TODO: Deprecated # YouTube comments have a max depth of 2