1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-11-20 01:42:50 +01:00

[extractor/youtube] Detect and break on looping comments (#6301)

Fixes https://github.com/yt-dlp/yt-dlp/issues/6290

Authored by: coletdjnz
This commit is contained in:
coletdjnz 2023-03-01 07:56:53 +00:00 committed by GitHub
parent 5b28cef72d
commit 7f51861b18
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -3341,6 +3341,13 @@ def extract_thread(contents):
comment = self._extract_comment(comment_renderer, parent) comment = self._extract_comment(comment_renderer, parent)
if not comment: if not comment:
continue continue
# Sometimes YouTube may break and give us infinite looping comments.
# See: https://github.com/yt-dlp/yt-dlp/issues/6290
if comment['id'] in tracker['seen_comment_ids']:
self.report_warning('Detected YouTube comments looping. Stopping comment extraction as we probably cannot get any more.')
yield
else:
tracker['seen_comment_ids'].add(comment['id'])
tracker['running_total'] += 1 tracker['running_total'] += 1
tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
@ -3365,7 +3372,8 @@ def extract_thread(contents):
est_total=0, est_total=0,
current_page_thread=0, current_page_thread=0,
total_parent_comments=0, total_parent_comments=0,
total_reply_comments=0) total_reply_comments=0,
seen_comment_ids=set())
# TODO: Deprecated # TODO: Deprecated
# YouTube comments have a max depth of 2 # YouTube comments have a max depth of 2