mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-17 00:22:34 +01:00
[youtube_live_chat] Fix parse_yt_initial_data
and add fragment_retries
:ci skip dl
This commit is contained in:
parent
af819c216f
commit
82e3f6ebda
@ -4,6 +4,9 @@
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
|
from ..compat import compat_urllib_error
|
||||||
|
from ..utils import try_get
|
||||||
|
from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
|
||||||
|
|
||||||
|
|
||||||
class YoutubeLiveChatReplayFD(FragmentFD):
|
class YoutubeLiveChatReplayFD(FragmentFD):
|
||||||
@ -15,6 +18,7 @@ def real_download(self, filename, info_dict):
|
|||||||
video_id = info_dict['video_id']
|
video_id = info_dict['video_id']
|
||||||
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
|
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
|
||||||
|
|
||||||
|
fragment_retries = self.params.get('fragment_retries', 0)
|
||||||
test = self.params.get('test', False)
|
test = self.params.get('test', False)
|
||||||
|
|
||||||
ctx = {
|
ctx = {
|
||||||
@ -28,15 +32,52 @@ def dl_fragment(url):
|
|||||||
return self._download_fragment(ctx, url, info_dict, headers)
|
return self._download_fragment(ctx, url, info_dict, headers)
|
||||||
|
|
||||||
def parse_yt_initial_data(data):
|
def parse_yt_initial_data(data):
|
||||||
window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?)(?<=});'
|
patterns = (
|
||||||
var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?)(?<=});'
|
r'%s\\s*%s' % (YT_BaseIE._YT_INITIAL_DATA_RE, YT_BaseIE._YT_INITIAL_BOUNDARY_RE),
|
||||||
for patt in window_patt, var_patt:
|
r'%s' % YT_BaseIE._YT_INITIAL_DATA_RE)
|
||||||
|
data = data.decode('utf-8', 'replace')
|
||||||
|
for patt in patterns:
|
||||||
try:
|
try:
|
||||||
raw_json = re.search(patt, data).group(1)
|
raw_json = re.search(patt, data).group(1)
|
||||||
return json.loads(raw_json)
|
return json.loads(raw_json)
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
def download_and_parse_fragment(url, frag_index):
|
||||||
|
count = 0
|
||||||
|
while count <= fragment_retries:
|
||||||
|
try:
|
||||||
|
success, raw_fragment = dl_fragment(url)
|
||||||
|
if not success:
|
||||||
|
return False, None, None
|
||||||
|
data = parse_yt_initial_data(raw_fragment) or json.loads(raw_fragment)['response']
|
||||||
|
|
||||||
|
live_chat_continuation = try_get(
|
||||||
|
data,
|
||||||
|
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
|
||||||
|
offset = continuation_id = None
|
||||||
|
processed_fragment = bytearray()
|
||||||
|
for action in live_chat_continuation.get('actions', []):
|
||||||
|
if 'replayChatItemAction' in action:
|
||||||
|
replay_chat_item_action = action['replayChatItemAction']
|
||||||
|
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
|
||||||
|
processed_fragment.extend(
|
||||||
|
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
|
||||||
|
if offset is not None:
|
||||||
|
continuation_id = try_get(
|
||||||
|
live_chat_continuation,
|
||||||
|
lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation'])
|
||||||
|
self._append_fragment(ctx, processed_fragment)
|
||||||
|
|
||||||
|
return True, continuation_id, offset
|
||||||
|
except compat_urllib_error.HTTPError as err:
|
||||||
|
count += 1
|
||||||
|
if count <= fragment_retries:
|
||||||
|
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||||
|
if count > fragment_retries:
|
||||||
|
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||||
|
return False, None, None
|
||||||
|
|
||||||
self._prepare_and_start_frag_download(ctx)
|
self._prepare_and_start_frag_download(ctx)
|
||||||
|
|
||||||
success, raw_fragment = dl_fragment(
|
success, raw_fragment = dl_fragment(
|
||||||
@ -44,54 +85,23 @@ def parse_yt_initial_data(data):
|
|||||||
if not success:
|
if not success:
|
||||||
return False
|
return False
|
||||||
data = parse_yt_initial_data(raw_fragment)
|
data = parse_yt_initial_data(raw_fragment)
|
||||||
continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
|
continuation_id = try_get(
|
||||||
|
data,
|
||||||
|
lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
|
||||||
# no data yet but required to call _append_fragment
|
# no data yet but required to call _append_fragment
|
||||||
self._append_fragment(ctx, b'')
|
self._append_fragment(ctx, b'')
|
||||||
|
|
||||||
first = True
|
frag_index = offset = 0
|
||||||
offset = None
|
|
||||||
while continuation_id is not None:
|
while continuation_id is not None:
|
||||||
data = None
|
frag_index += 1
|
||||||
if first:
|
url = 'https://www.youtube.com/live_chat_replay?continuation=%s' % continuation_id
|
||||||
url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id)
|
if frag_index > 1:
|
||||||
success, raw_fragment = dl_fragment(url)
|
url += '&playerOffsetMs=%d&hidden=false&pbj=1' % max(offset - 5000, 0)
|
||||||
if not success:
|
success, continuation_id, offset = download_and_parse_fragment(url, frag_index)
|
||||||
return False
|
if not success:
|
||||||
data = parse_yt_initial_data(raw_fragment)
|
return False
|
||||||
else:
|
if test:
|
||||||
url = ('https://www.youtube.com/live_chat_replay/get_live_chat_replay'
|
|
||||||
+ '?continuation={}'.format(continuation_id)
|
|
||||||
+ '&playerOffsetMs={}'.format(max(offset - 5000, 0))
|
|
||||||
+ '&hidden=false'
|
|
||||||
+ '&pbj=1')
|
|
||||||
success, raw_fragment = dl_fragment(url)
|
|
||||||
if not success:
|
|
||||||
return False
|
|
||||||
data = json.loads(raw_fragment)['response']
|
|
||||||
|
|
||||||
first = False
|
|
||||||
continuation_id = None
|
|
||||||
|
|
||||||
live_chat_continuation = data['continuationContents']['liveChatContinuation']
|
|
||||||
offset = None
|
|
||||||
processed_fragment = bytearray()
|
|
||||||
if 'actions' in live_chat_continuation:
|
|
||||||
for action in live_chat_continuation['actions']:
|
|
||||||
if 'replayChatItemAction' in action:
|
|
||||||
replay_chat_item_action = action['replayChatItemAction']
|
|
||||||
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
|
|
||||||
processed_fragment.extend(
|
|
||||||
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
|
|
||||||
try:
|
|
||||||
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
|
|
||||||
except KeyError:
|
|
||||||
continuation_id = None
|
|
||||||
|
|
||||||
self._append_fragment(ctx, processed_fragment)
|
|
||||||
|
|
||||||
if test or offset is None:
|
|
||||||
break
|
break
|
||||||
|
|
||||||
self._finish_frag_download(ctx)
|
self._finish_frag_download(ctx)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
@ -277,15 +277,6 @@ def _download_webpage_handle(self, *args, **kwargs):
|
|||||||
return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
|
return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
|
||||||
*args, **compat_kwargs(kwargs))
|
*args, **compat_kwargs(kwargs))
|
||||||
|
|
||||||
def _get_yt_initial_data(self, video_id, webpage):
|
|
||||||
config = self._search_regex(
|
|
||||||
(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
|
|
||||||
r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
|
|
||||||
webpage, 'ytInitialData', default=None)
|
|
||||||
if config:
|
|
||||||
return self._parse_json(
|
|
||||||
uppercase_escape(config), video_id, fatal=False)
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
if self._downloader is None:
|
if self._downloader is None:
|
||||||
return
|
return
|
||||||
@ -1943,7 +1934,7 @@ def feed_entry(name):
|
|||||||
|
|
||||||
has_live_chat_replay = False
|
has_live_chat_replay = False
|
||||||
if not is_live:
|
if not is_live:
|
||||||
yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
|
yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
|
||||||
try:
|
try:
|
||||||
yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
|
yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
|
||||||
has_live_chat_replay = True
|
has_live_chat_replay = True
|
||||||
|
Loading…
Reference in New Issue
Block a user