mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-02 17:22:31 +01:00
Merge pull request #57 from insaneracist/youtube-mix-fix
[youtube] fix: extract mix playlist ids from ytInitialData (#33)
This commit is contained in:
commit
7166f47b18
@ -279,6 +279,15 @@ def _download_webpage_handle(self, *args, **kwargs):
|
|||||||
return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
|
return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
|
||||||
*args, **compat_kwargs(kwargs))
|
*args, **compat_kwargs(kwargs))
|
||||||
|
|
||||||
|
def _get_yt_initial_data(self, video_id, webpage):
|
||||||
|
config = self._search_regex(
|
||||||
|
(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
|
||||||
|
r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
|
||||||
|
webpage, 'ytInitialData', default=None)
|
||||||
|
if config:
|
||||||
|
return self._parse_json(
|
||||||
|
uppercase_escape(config), video_id, fatal=False)
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
if self._downloader is None:
|
if self._downloader is None:
|
||||||
return
|
return
|
||||||
@ -1398,15 +1407,6 @@ def _get_ytplayer_config(self, video_id, webpage):
|
|||||||
return self._parse_json(
|
return self._parse_json(
|
||||||
uppercase_escape(config), video_id, fatal=False)
|
uppercase_escape(config), video_id, fatal=False)
|
||||||
|
|
||||||
def _get_yt_initial_data(self, video_id, webpage):
|
|
||||||
config = self._search_regex(
|
|
||||||
(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
|
|
||||||
r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
|
|
||||||
webpage, 'ytInitialData', default=None)
|
|
||||||
if config:
|
|
||||||
return self._parse_json(
|
|
||||||
uppercase_escape(config), video_id, fatal=False)
|
|
||||||
|
|
||||||
def _get_music_metadata_from_yt_initial(self, yt_initial):
|
def _get_music_metadata_from_yt_initial(self, yt_initial):
|
||||||
music_metadata = []
|
music_metadata = []
|
||||||
key_map = {
|
key_map = {
|
||||||
@ -2828,6 +2828,16 @@ def extract_videos_from_page(self, page):
|
|||||||
|
|
||||||
return zip(ids_in_page, titles_in_page)
|
return zip(ids_in_page, titles_in_page)
|
||||||
|
|
||||||
|
def _extract_mix_ids_from_yt_initial(self, yt_initial):
|
||||||
|
ids = []
|
||||||
|
playlist_contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['contents'], list)
|
||||||
|
if playlist_contents:
|
||||||
|
for item in playlist_contents:
|
||||||
|
videoId = try_get(item, lambda x: x['playlistPanelVideoRenderer']['videoId'], compat_str)
|
||||||
|
if videoId:
|
||||||
|
ids.append(videoId)
|
||||||
|
return ids
|
||||||
|
|
||||||
def _extract_mix(self, playlist_id):
|
def _extract_mix(self, playlist_id):
|
||||||
# The mixes are generated from a single video
|
# The mixes are generated from a single video
|
||||||
# the id of the playlist is just 'RD' + video_id
|
# the id of the playlist is just 'RD' + video_id
|
||||||
@ -2841,6 +2851,13 @@ def _extract_mix(self, playlist_id):
|
|||||||
r'''(?xs)data-video-username=".*?".*?
|
r'''(?xs)data-video-username=".*?".*?
|
||||||
href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
|
href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
|
||||||
webpage))
|
webpage))
|
||||||
|
|
||||||
|
# if no ids in html of page, try using embedded json
|
||||||
|
if (len(new_ids) == 0):
|
||||||
|
yt_initial = self._get_yt_initial_data(playlist_id, webpage)
|
||||||
|
if yt_initial:
|
||||||
|
new_ids = self._extract_mix_ids_from_yt_initial(yt_initial)
|
||||||
|
|
||||||
# Fetch new pages until all the videos are repeated, it seems that
|
# Fetch new pages until all the videos are repeated, it seems that
|
||||||
# there are always 51 unique videos.
|
# there are always 51 unique videos.
|
||||||
new_ids = [_id for _id in new_ids if _id not in ids]
|
new_ids = [_id for _id in new_ids if _id not in ids]
|
||||||
|
Loading…
Reference in New Issue
Block a user