From e389d172b6f42e4f332ae679dc48543fb7b9b61d Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 12 Mar 2023 14:46:09 +0530
Subject: [PATCH 01/97] Fix 2a23d92d9ec44a0168079e38bcf3d383e5c4c7bb

Closes #6517
---
 yt_dlp/extractor/youtube.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 4165d795c..d7cd0dc62 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3630,6 +3630,7 @@ def _needs_live_processing(self, live_status, duration):
             return live_status
 
     def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
+        CHUNK_SIZE = 10 << 20
         itags, stream_ids = collections.defaultdict(set), []
         itag_qualities, res_qualities = {}, {0: None}
         q = qualities([
@@ -3642,6 +3643,13 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l
         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
         all_formats = self._configuration_arg('include_duplicate_formats')
 
+        def build_fragments(f):
+            return LazyList({
+                'url': update_url_query(f['url'], {
+                    'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}'
+                })
+            } for range_start in range(0, f['filesize'], CHUNK_SIZE))
+
         for fmt in streaming_formats:
             if fmt.get('targetDurationSec'):
                 continue
@@ -3771,17 +3779,12 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l
             if single_stream and dct.get('ext'):
                 dct['container'] = dct['ext'] + '_dash'
 
-            CHUNK_SIZE = 10 << 20
             if dct['filesize']:
                 yield {
                     **dct,
                     'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
                     'protocol': 'http_dash_segments',
-                    'fragments': LazyList({
-                        'url': update_url_query(dct['url'], {
-                            'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, dct["filesize"])}'
-                        })
-                    } for range_start in range(0, dct['filesize'], CHUNK_SIZE))
+                    'fragments': build_fragments(dct),
                 }
                 if not all_formats:
                     continue

From 0181b9a1b31db3fde943f7cd3fe9662f23bff292 Mon Sep 17 00:00:00 2001
From: Ha Tien Loi <loiht.b17vt220@stu.ptit.edu.vn>
Date: Sun, 12 Mar 2023 23:34:22 +0700
Subject: [PATCH 02/97] [extractor/thesun] Update `_VALID_URL` (#6522)

Authored by: hatienl0i261299
Closes #6479
---
 yt_dlp/extractor/thesun.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/thesun.py b/yt_dlp/extractor/thesun.py
index ba5848283..5edcf1cc1 100644
--- a/yt_dlp/extractor/thesun.py
+++ b/yt_dlp/extractor/thesun.py
@@ -5,15 +5,22 @@
 
 
 class TheSunIE(InfoExtractor):
-    _VALID_URL = r'https://(?:www\.)?thesun\.co\.uk/[^/]+/(?P<id>\d+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?the-?sun(\.co\.uk|\.com)/[^/]+/(?P<id>\d+)'
+    _TESTS = [{
         'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
         'info_dict': {
             'id': '2261604',
             'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf',
         },
         'playlist_count': 2,
-    }
+    }, {
+        'url': 'https://www.the-sun.com/entertainment/7611415/1000lb-sisters-fans-rip-amy-dangerous-health-decision/',
+        'info_dict': {
+            'id': '7611415',
+            'title': 'md5:e0b9b976f79dc770e5c80f22f40bb844',
+        },
+        'playlist_count': 1,
+    }]
     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
 
     def _real_extract(self, url):

From 026435714cb7c39613a0d7d2acd15d3823b78d94 Mon Sep 17 00:00:00 2001
From: Ha Tien Loi <loiht.b17vt220@stu.ptit.edu.vn>
Date: Mon, 13 Mar 2023 00:20:40 +0700
Subject: [PATCH 03/97] [extractor/LastFM] Rewrite playlist extraction (#6379)

Authored by: hatienl0i261299, pukkandan
Closes #5975
---
 yt_dlp/extractor/lastfm.py | 43 ++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/yt_dlp/extractor/lastfm.py b/yt_dlp/extractor/lastfm.py
index f14198cfd..67103352e 100644
--- a/yt_dlp/extractor/lastfm.py
+++ b/yt_dlp/extractor/lastfm.py
@@ -1,33 +1,24 @@
+import itertools
 import re
 
 from .common import InfoExtractor
-from ..utils import int_or_none, format_field
+from ..utils import int_or_none, parse_qs, traverse_obj
 
 
 class LastFMPlaylistBaseIE(InfoExtractor):
     def _entries(self, url, playlist_id):
-        webpage = self._download_webpage(url, playlist_id)
-        start_page_number = int_or_none(self._search_regex(
-            r'\bpage=(\d+)', url, 'page', default=None)) or 1
-        last_page_number = int_or_none(self._search_regex(
-            r'>(\d+)</a>[^<]*</li>[^<]*<li[^>]+class="pagination-next', webpage, 'last_page', default=None))
-
-        for page_number in range(start_page_number, (last_page_number or start_page_number) + 1):
+        single_page = traverse_obj(parse_qs(url), ('page', -1, {int_or_none}))
+        for page in itertools.count(single_page or 1):
             webpage = self._download_webpage(
-                url, playlist_id,
-                note='Downloading page %d%s' % (page_number, format_field(last_page_number, None, ' of %d')),
-                query={'page': page_number})
-            page_entries = [
-                self.url_result(player_url, 'Youtube')
-                for player_url in set(re.findall(r'data-youtube-url="([^"]+)"', webpage))
-            ]
-
-            for e in page_entries:
-                yield e
+                url, playlist_id, f'Downloading page {page}', query={'page': page})
+            videos = re.findall(r'data-youtube-url="([^"]+)"', webpage)
+            yield from videos
+            if single_page or not videos:
+                return
 
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
-        return self.playlist_result(self._entries(url, playlist_id), playlist_id)
+        return self.playlist_from_matches(self._entries(url, playlist_id), playlist_id, ie='Youtube')
 
 
 class LastFMPlaylistIE(LastFMPlaylistBaseIE):
@@ -37,7 +28,7 @@ class LastFMPlaylistIE(LastFMPlaylistBaseIE):
         'info_dict': {
             'id': 'Oasis',
         },
-        'playlist_count': 11,
+        'playlist_mincount': 11,
     }, {
         'url': 'https://www.last.fm/music/Oasis',
         'only_matching': True,
@@ -73,6 +64,18 @@ class LastFMUserIE(LastFMPlaylistBaseIE):
             'id': '12319471',
         },
         'playlist_count': 30,
+    }, {
+        'url': 'https://www.last.fm/user/naamloos1/playlists/12543760',
+        'info_dict': {
+            'id': '12543760',
+        },
+        'playlist_mincount': 80,
+    }, {
+        'url': 'https://www.last.fm/user/naamloos1/playlists/12543760?page=3',
+        'info_dict': {
+            'id': '12543760',
+        },
+        'playlist_count': 32,
     }]
 
 

From 1e3c2b6ec28d7ab5e31341fa93c47b65be4fbff4 Mon Sep 17 00:00:00 2001
From: Joshua Lochner <admin@xenova.com>
Date: Sun, 12 Mar 2023 19:38:27 +0200
Subject: [PATCH 04/97] [extractor/medaltv] Fix clips (#6502)

Closes #6489
Authored by: xenova
---
 yt_dlp/extractor/medaltv.py | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py
index 82be823b8..9e57ee21a 100644
--- a/yt_dlp/extractor/medaltv.py
+++ b/yt_dlp/extractor/medaltv.py
@@ -8,12 +8,12 @@
     float_or_none,
     int_or_none,
     str_or_none,
-    traverse_obj,
+    traverse_obj
 )
 
 
 class MedalTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?medal\.tv/(?P<path>games/[^/?#&]+/clips)/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?medal\.tv/games/[^/?#&]+/clips/(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'https://medal.tv/games/valorant/clips/jTBFnLKdLy15K',
         'md5': '6930f8972914b6b9fdc2bb3918098ba0',
@@ -80,25 +80,14 @@ class MedalTVIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        path = self._match_valid_url(url).group('path')
 
         webpage = self._download_webpage(url, video_id)
 
-        next_data = self._search_json(
-            '<script[^>]*__NEXT_DATA__[^>]*>', webpage,
+        hydration_data = self._search_json(
+            r'<script[^>]*>[^<]*\bhydrationData\s*=', webpage,
             'next data', video_id, end_pattern='</script>', fatal=False)
 
-        build_id = next_data.get('buildId')
-        if not build_id:
-            raise ExtractorError(
-                'Could not find build ID.', video_id=video_id)
-
-        locale = next_data.get('locale', 'en')
-
-        api_response = self._download_json(
-            f'https://medal.tv/_next/data/{build_id}/{locale}/{path}/{video_id}.json', video_id)
-
-        clip = traverse_obj(api_response, ('pageProps', 'clip')) or {}
+        clip = traverse_obj(hydration_data, ('clips', ...), get_all=False)
         if not clip:
             raise ExtractorError(
                 'Could not find video information.', video_id=video_id)
@@ -152,7 +141,7 @@ def add_item(container, item_url, height, id_key='format_id', item_id=None):
 
         # Necessary because the id of the author is not known in advance.
         # Won't raise an issue if no profile can be found as this is optional.
-        author = traverse_obj(api_response, ('pageProps', 'profile')) or {}
+        author = traverse_obj(hydration_data, ('profiles', ...), get_all=False) or {}
         author_id = str_or_none(author.get('userId'))
         author_url = format_field(author_id, None, 'https://medal.tv/users/%s')
 

From 80ea6d3dea8483cddd39fc89b5ee1fc06670c33c Mon Sep 17 00:00:00 2001
From: JChris246 <43832407+JChris246@users.noreply.github.com>
Date: Sun, 12 Mar 2023 14:02:17 -0400
Subject: [PATCH 05/97] [extractor/Parler] Rewrite extractor (#6446)

Authored by: JChris246
Closes #6068
---
 yt_dlp/extractor/parler.py | 94 +++++++++++++++-----------------------
 1 file changed, 37 insertions(+), 57 deletions(-)

diff --git a/yt_dlp/extractor/parler.py b/yt_dlp/extractor/parler.py
index 68a60bc84..2af805e7f 100644
--- a/yt_dlp/extractor/parler.py
+++ b/yt_dlp/extractor/parler.py
@@ -1,13 +1,14 @@
+import functools
+
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 from ..utils import (
     clean_html,
-    format_field,
     int_or_none,
     strip_or_none,
     traverse_obj,
     unified_timestamp,
-    urlencode_postdata,
+    urljoin,
 )
 
 
@@ -24,7 +25,7 @@ class ParlerIE(InfoExtractor):
                 'thumbnail': 'https://bl-images.parler.com/videos/6ce7cdf3-a27a-4d72-bf9c-d3e17ce39a66/thumbnail.jpeg',
                 'title': 'Parler video #df79fdba-07cc-48fe-b085-3293897520d7',
                 'description': 'md5:6f220bde2df4a97cbb89ac11f1fd8197',
-                'timestamp': 1659744000,
+                'timestamp': 1659785481,
                 'upload_date': '20220806',
                 'uploader': 'Tulsi Gabbard',
                 'uploader_id': 'TulsiGabbard',
@@ -34,78 +35,57 @@ class ParlerIE(InfoExtractor):
                 'repost_count': int,
             },
         },
-        {
-            'url': 'https://parler.com/feed/a7406eb4-91e5-4793-b5e3-ade57a24e287',
-            'md5': '11687e2f5bb353682cee338d181422ed',
-            'info_dict': {
-                'id': 'a7406eb4-91e5-4793-b5e3-ade57a24e287',
-                'ext': 'mp4',
-                'thumbnail': 'https://bl-images.parler.com/videos/317827a8-1e48-4cbc-981f-7dd17d4c1183/thumbnail.jpeg',
-                'title': 'Parler video #a7406eb4-91e5-4793-b5e3-ade57a24e287',
-                'description': 'This man should run for office',
-                'timestamp': 1659657600,
-                'upload_date': '20220805',
-                'uploader': 'Benny Johnson',
-                'uploader_id': 'BennyJohnson',
-                'uploader_url': 'https://parler.com/BennyJohnson',
-                'view_count': int,
-                'comment_count': int,
-                'repost_count': int,
-            },
-        },
         {
             'url': 'https://parler.com/feed/f23b85c1-6558-470f-b9ff-02c145f28da5',
             'md5': 'eaba1ff4a10fe281f5ce74e930ab2cb4',
             'info_dict': {
                 'id': 'r5vkSaz8PxQ',
                 'ext': 'mp4',
-                'thumbnail': 'https://i.ytimg.com/vi_webp/r5vkSaz8PxQ/maxresdefault.webp',
-                'title': 'Tom MacDonald Names Reaction',
-                'description': 'md5:33c21f0d35ae6dc2edf3007d6696baea',
-                'upload_date': '20220716',
-                'duration': 1267,
-                'uploader': 'Mahesh Chookolingo',
-                'uploader_id': 'maheshchookolingo',
-                'uploader_url': 'http://www.youtube.com/user/maheshchookolingo',
-                'channel': 'Mahesh Chookolingo',
-                'channel_id': 'UCox6YeMSY1PQInbCtTaZj_w',
-                'channel_url': 'https://www.youtube.com/channel/UCox6YeMSY1PQInbCtTaZj_w',
-                'categories': ['Entertainment'],
-                'tags': list,
-                'availability': 'public',
                 'live_status': 'not_live',
-                'view_count': int,
                 'comment_count': int,
+                'duration': 1267,
                 'like_count': int,
                 'channel_follower_count': int,
-                'age_limit': 0,
+                'channel_id': 'UCox6YeMSY1PQInbCtTaZj_w',
+                'upload_date': '20220716',
+                'thumbnail': 'https://i.ytimg.com/vi/r5vkSaz8PxQ/maxresdefault.jpg',
+                'tags': 'count:17',
+                'availability': 'public',
+                'categories': ['Entertainment'],
                 'playable_in_embed': True,
+                'channel': 'Who Knows What! With Mahesh & Friends',
+                'title': 'Tom MacDonald Names Reaction',
+                'uploader': 'Who Knows What! With Mahesh & Friends',
+                'uploader_id': '@maheshchookolingo',
+                'age_limit': 0,
+                'description': 'md5:33c21f0d35ae6dc2edf3007d6696baea',
+                'channel_url': 'https://www.youtube.com/channel/UCox6YeMSY1PQInbCtTaZj_w',
+                'view_count': int,
+                'uploader_url': 'http://www.youtube.com/@maheshchookolingo',
             },
         },
     ]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        data = self._download_json(
-            'https://parler.com/open-api/ParleyDetailEndpoint.php', video_id,
-            data=urlencode_postdata({'uuid': video_id}))['data'][0]
-        primary = data['primary']
-
-        embed = self._parse_json(primary.get('V2LINKLONG') or '', video_id, fatal=False)
-        if embed:
-            return self.url_result(embed[0], YoutubeIE)
+        data = self._download_json(f'https://api.parler.com/v0/public/parleys/{video_id}',
+                                   video_id)['data']
+        if data.get('link'):
+            return self.url_result(data['link'], YoutubeIE)
 
         return {
             'id': video_id,
-            'url': traverse_obj(primary, ('video_data', 'videoSrc')),
-            'thumbnail': traverse_obj(primary, ('video_data', 'thumbnailUrl')),
-            'title': '',
-            'description': strip_or_none(clean_html(primary.get('full_body'))) or None,
-            'timestamp': unified_timestamp(primary.get('date_created')),
-            'uploader': strip_or_none(primary.get('name')),
-            'uploader_id': strip_or_none(primary.get('username')),
-            'uploader_url': format_field(strip_or_none(primary.get('username')), None, 'https://parler.com/%s'),
-            'view_count': int_or_none(primary.get('view_count')),
-            'comment_count': int_or_none(traverse_obj(data, ('engagement', 'commentCount'))),
-            'repost_count': int_or_none(traverse_obj(data, ('engagement', 'echoCount'))),
+            'title': strip_or_none(data.get('title')) or '',
+            **traverse_obj(data, {
+                'url': ('video', 'videoSrc'),
+                'thumbnail': ('video', 'thumbnailUrl'),
+                'description': ('body', {clean_html}),
+                'timestamp': ('date_created', {unified_timestamp}),
+                'uploader': ('user', 'name', {strip_or_none}),
+                'uploader_id': ('user', 'username', {str}),
+                'uploader_url': ('user', 'username', {functools.partial(urljoin, 'https://parler.com/')}),
+                'view_count': ('views', {int_or_none}),
+                'comment_count': ('total_comments', {int_or_none}),
+                'repost_count': ('echos', {int_or_none}),
+            })
         }

From cf9fd52fabe71d6e7c30d3ea525029ffa561fc9c Mon Sep 17 00:00:00 2001
From: Chris Caruso <carusochrisr@gmail.com>
Date: Sun, 12 Mar 2023 11:07:34 -0700
Subject: [PATCH 06/97] [extractor/jwplatform] Update `_extract_embed_urls`
 (#6383)

Authored by: carusocr
---
 yt_dlp/extractor/jwplatform.py | 37 ++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/yt_dlp/extractor/jwplatform.py b/yt_dlp/extractor/jwplatform.py
index c94968943..bc47aa6d3 100644
--- a/yt_dlp/extractor/jwplatform.py
+++ b/yt_dlp/extractor/jwplatform.py
@@ -8,14 +8,16 @@ class JWPlatformIE(InfoExtractor):
     _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
     _TESTS = [{
         'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
-        'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
+        'md5': '3aa16e4f6860e6e78b7df5829519aed3',
         'info_dict': {
             'id': 'nPripu9l',
-            'ext': 'mov',
+            'ext': 'mp4',
             'title': 'Big Buck Bunny Trailer',
             'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
             'upload_date': '20081127',
             'timestamp': 1227796140,
+            'duration': 32.0,
+            'thumbnail': 'https://cdn.jwplayer.com/v2/media/nPripu9l/poster.jpg?width=720',
         }
     }, {
         'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
@@ -37,18 +39,31 @@ class JWPlatformIE(InfoExtractor):
         },
     }, {
         # Player url not surrounded by quotes
-        'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/darling-berlin',
+        'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/school-trip',
         'info_dict': {
-            'id': 'R10NQdhY',
-            'title': 'Playgirl',
+            'id': 'jUxh5uin',
+            'title': 'Klassenfahrt',
             'ext': 'mp4',
-            'upload_date': '20220624',
-            'thumbnail': 'https://cdn.jwplayer.com/v2/media/R10NQdhY/poster.jpg?width=720',
-            'timestamp': 1656064800,
-            'description': 'BRD 1966, Will Tremper',
-            'duration': 5146.0,
+            'upload_date': '20230109',
+            'thumbnail': 'https://cdn.jwplayer.com/v2/media/jUxh5uin/poster.jpg?width=720',
+            'timestamp': 1673270298,
+            'description': '',
+            'duration': 5193.0,
         },
         'params': {'allowed_extractors': ['generic', 'jwplatform']},
+    }, {
+        # iframe src attribute includes backslash before URL string
+        'url': 'https://www.elespectador.com/colombia/video-asi-se-evito-la-fuga-de-john-poulos-presunto-feminicida-de-valentina-trespalacios-explicacion',
+        'info_dict': {
+            'id': 'QD3gsexj',
+            'title': 'Así se evitó la fuga de John Poulos, presunto feminicida de Valentina Trespalacios',
+            'ext': 'mp4',
+            'upload_date': '20230127',
+            'thumbnail': 'https://cdn.jwplayer.com/v2/media/QD3gsexj/poster.jpg?width=720',
+            'timestamp': 1674862986,
+            'description': 'md5:128fd74591c4e1fc2da598c5cb6f5ce4',
+            'duration': 263.0,
+        },
     }]
 
     @classmethod
@@ -57,7 +72,7 @@ def _extract_embed_urls(cls, url, webpage):
             # <input value=URL> is used by hyland.com
             # if we find <iframe>, dont look for <input>
             ret = re.findall(
-                r'<%s[^>]+?%s=["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
+                r'<%s[^>]+?%s=\\?["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
                 webpage)
             if ret:
                 return ret

From cbfe2e5cbe0f4649a91e323a82b8f5f774f36662 Mon Sep 17 00:00:00 2001
From: unbeatable-101 <daviswill048@icloud.com>
Date: Sun, 12 Mar 2023 18:25:05 -0500
Subject: [PATCH 07/97] [extractor/nebula] Add `beta.nebula.tv` (#6516)

Authored by: unbeatable-101
---
 yt_dlp/extractor/nebula.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py
index 81e2f56e6..5c1b7c712 100644
--- a/yt_dlp/extractor/nebula.py
+++ b/yt_dlp/extractor/nebula.py
@@ -5,7 +5,7 @@
 from .common import InfoExtractor
 from ..utils import ExtractorError, parse_iso8601
 
-_BASE_URL_RE = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
+_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
 
 
 class NebulaBaseIE(InfoExtractor):
@@ -183,6 +183,10 @@ class NebulaIE(NebulaBaseIE):
             'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
             'only_matching': True,
         },
+        {
+            'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
+            'only_matching': True,
+        },
     ]
 
     def _fetch_video_metadata(self, slug):

From 98ac902c4979e4529b166e873473bef42baa2e3e Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 13 Mar 2023 05:19:13 +0530
Subject: [PATCH 08/97] [dependencies/Cryptodome] Fix `__bool__`

Bug in 65f6e807804d2af5e00f2aecd72bfc43af19324a
---
 yt_dlp/dependencies/Cryptodome.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/dependencies/Cryptodome.py b/yt_dlp/dependencies/Cryptodome.py
index 74ab6575c..2cfa4c952 100644
--- a/yt_dlp/dependencies/Cryptodome.py
+++ b/yt_dlp/dependencies/Cryptodome.py
@@ -1,4 +1,4 @@
-import types
+from ..compat.compat_utils import passthrough_module
 
 try:
     import Cryptodome as _parent
@@ -6,9 +6,11 @@
     try:
         import Crypto as _parent
     except (ImportError, SyntaxError):  # Old Crypto gives SyntaxError in newer Python
-        _parent = types.ModuleType('no_Cryptodome')
+        _parent = passthrough_module(__name__, 'no_Cryptodome')
         __bool__ = lambda: False
 
+del passthrough_module
+
 __version__ = ''
 AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None
 try:

From 607510b9f2f67bfe7d33d74031a5c1fe22a24862 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Mon, 13 Mar 2023 01:43:37 +0000
Subject: [PATCH 09/97] [extractor/youtube] Handle incomplete initial data from
 watch page (#6510)

Authored by: coletdjnz
---
 yt_dlp/extractor/youtube.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index d7cd0dc62..b024d18b7 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -4254,12 +4254,15 @@ def process_language(container, base_url, lang_code, sub_name, query):
         initial_data = None
         if webpage:
             initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
+            if not traverse_obj(initial_data, 'contents'):
+                self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
+                initial_data = None
         if not initial_data:
             query = {'videoId': video_id}
             query.update(self._get_checkok_params())
             initial_data = self._extract_response(
                 item_id=video_id, ep='next', fatal=False,
-                ytcfg=master_ytcfg, query=query,
+                ytcfg=master_ytcfg, query=query, check_get_keys='contents',
                 headers=self.generate_api_headers(ytcfg=master_ytcfg),
                 note='Downloading initial data API JSON')
 

From 427a8fafbb0e18c28d0ed7960be838d7b26b88d3 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Wed, 15 Mar 2023 04:49:22 +0530
Subject: [PATCH 10/97] [build] Pin `pyinstaller` version for MacOS

Workaround for #6541
---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 93668a7bf..aa11c6194 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -192,7 +192,7 @@ jobs:
       - name: Install Requirements
         run: |
           brew install coreutils
-          /usr/bin/python3 -m pip install -U --user pip Pyinstaller -r requirements.txt
+          /usr/bin/python3 -m pip install -U --user pip Pyinstaller==5.8 -r requirements.txt
 
       - name: Prepare
         run: |

From 071670cbeaa01ddf2cc20a95ae6da25f8f086431 Mon Sep 17 00:00:00 2001
From: Nicholas Defranco <39540565+nick-cd@users.noreply.github.com>
Date: Tue, 14 Mar 2023 19:21:14 -0400
Subject: [PATCH 11/97] [extractor/youtube] Fix parsing `comment_count` (#6523)

Closes #5849
Authored by: nick-cd
---
 yt_dlp/extractor/youtube.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index b024d18b7..ca56f112b 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -4268,11 +4268,11 @@ def process_language(container, base_url, lang_code, sub_name, query):
 
         info['comment_count'] = traverse_obj(initial_data, (
             'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
-            'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
+            'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount'
         ), (
             'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
-            'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
-        ), expected_type=int_or_none, get_all=False)
+            'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo'
+        ), expected_type=self._get_count, get_all=False)
 
         try:  # This will error if there is no livechat
             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

From 03025b6e105139d01cd415ddc51fd692957fd2ba Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 16 Mar 2023 14:53:18 -0500
Subject: [PATCH 12/97] [extractor/mediastream] Improve `WinSports` and embed
 extraction (#6426)

Closes #6419, Closes #6527
Authored by: bashonly
---
 yt_dlp/extractor/mediastream.py | 102 +++++++++++++++++++++-----------
 1 file changed, 66 insertions(+), 36 deletions(-)

diff --git a/yt_dlp/extractor/mediastream.py b/yt_dlp/extractor/mediastream.py
index e8d427a31..cef769f29 100644
--- a/yt_dlp/extractor/mediastream.py
+++ b/yt_dlp/extractor/mediastream.py
@@ -2,16 +2,44 @@
 
 from .common import InfoExtractor
 from ..utils import (
+    clean_html,
     remove_end,
-    str_or_none,
-    strip_or_none,
     traverse_obj,
     urljoin,
 )
 
 
-class MediaStreamIE(InfoExtractor):
-    _VALID_URL = r'https?://mdstrm.com/(?:embed|live-stream)/(?P<id>\w+)'
+class MediaStreamBaseIE(InfoExtractor):
+    _EMBED_BASE_URL = 'https://mdstrm.com/embed'
+    _BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
+
+    def _extract_mediastream_urls(self, webpage):
+        yield from traverse_obj(list(self._yield_json_ld(webpage, None)), (
+            lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
+            {lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
+
+        for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream\.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage):
+            yield f'{self._EMBED_BASE_URL}/{mobj.group("video_id")}'
+
+        yield from re.findall(
+            rf'<iframe[^>]+\bsrc="({self._BASE_URL_RE}/\w+)', webpage)
+
+        for mobj in re.finditer(
+            r'''(?x)
+                <(?:div|ps-mediastream)[^>]+
+                (class="[^"]*MediaStreamVideoPlayer)[^"]*"[^>]+
+                data-video-id="(?P<video_id>\w+)"
+                (?:\s*data-video-type="(?P<video_type>[^"]+))?
+                (?:[^>]*>\s*<div[^>]+\1[^"]*"[^>]+data-mediastream=["\'][^>]+
+                    https://mdstrm\.com/(?P<live>live-stream))?
+                ''', webpage):
+
+            video_type = 'live-stream' if mobj.group('video_type') == 'live' or mobj.group('live') else 'embed'
+            yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}'
+
+
+class MediaStreamIE(MediaStreamBaseIE):
+    _VALID_URL = MediaStreamBaseIE._BASE_URL_RE + r'/(?P<id>\w+)'
 
     _TESTS = [{
         'url': 'https://mdstrm.com/embed/6318e3f1d1d316083ae48831',
@@ -23,6 +51,7 @@ class MediaStreamIE(InfoExtractor):
             'thumbnail': r're:^https?://[^?#]+6318e3f1d1d316083ae48831',
             'ext': 'mp4',
         },
+        'params': {'skip_download': 'm3u8'},
     }]
 
     _WEBPAGE_TESTS = [{
@@ -35,9 +64,7 @@ class MediaStreamIE(InfoExtractor):
             'ext': 'mp4',
             'live_status': 'is_live',
         },
-        'params': {
-            'skip_download': 'Livestream'
-        },
+        'params': {'skip_download': 'Livestream'},
     }, {
         'url': 'https://www.multimedios.com/television/clases-de-llaves-y-castigos-quien-sabe-mas',
         'md5': 'de31f0b1ecc321fb35bf22d58734ea40',
@@ -48,6 +75,7 @@ class MediaStreamIE(InfoExtractor):
             'thumbnail': 're:^https?://[^?#]+63731bab8ec9b308a2c9ed28',
             'ext': 'mp4',
         },
+        'params': {'skip_download': 'm3u8'},
     }, {
         'url': 'https://www.americatv.com.pe/videos/esto-es-guerra/facundo-gonzalez-sufrio-fuerte-golpe-durante-competencia-frente-hugo-garcia-eeg-noticia-139120',
         'info_dict': {
@@ -57,6 +85,7 @@ class MediaStreamIE(InfoExtractor):
             'thumbnail': 're:^https?://[^?#]+63756df1c638b008a5659dec',
             'ext': 'mp4',
         },
+        'params': {'skip_download': 'm3u8'},
     }, {
         'url': 'https://www.americatv.com.pe/videos/al-fondo-hay-sitio/nuevas-lomas-town-bernardo-mata-se-enfrento-sujeto-luchar-amor-macarena-noticia-139083',
         'info_dict': {
@@ -66,26 +95,12 @@ class MediaStreamIE(InfoExtractor):
             'thumbnail': 're:^https?://[^?#]+637307669609130f74cd3a6e',
             'ext': 'mp4',
         },
+        'params': {'skip_download': 'm3u8'},
     }]
 
-    @classmethod
-    def _extract_embed_urls(cls, url, webpage):
-        for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage):
-            yield f'https://mdstrm.com/embed/{mobj.group("video_id")}'
-
-        yield from re.findall(
-            r'<iframe[^>]src\s*=\s*"(https://mdstrm.com/[\w-]+/\w+)', webpage)
-
-        for mobj in re.finditer(
-            r'''(?x)
-                <(?:div|ps-mediastream)[^>]+
-                class\s*=\s*"[^"]*MediaStreamVideoPlayer[^"]*"[^>]+
-                data-video-id\s*=\s*"(?P<video_id>\w+)\s*"
-                (?:\s*data-video-type\s*=\s*"(?P<video_type>[^"]+))?
-                ''', webpage):
-
-            video_type = 'live-stream' if mobj.group('video_type') == 'live' else 'embed'
-            yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}'
+    def _extract_from_webpage(self, url, webpage):
+        for embed_url in self._extract_mediastream_urls(webpage):
+            yield self.url_result(embed_url, MediaStreamIE, None)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -94,7 +109,7 @@ def _real_extract(self, url):
         if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
             self.raise_geo_restricted()
 
-        player_config = self._search_json(r'window.MDSTRM.OPTIONS\s*=', webpage, 'metadata', video_id)
+        player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
 
         formats, subtitles = [], {}
         for video_format in player_config['src']:
@@ -122,7 +137,7 @@ def _real_extract(self, url):
         }
 
 
-class WinSportsVideoIE(InfoExtractor):
+class WinSportsVideoIE(MediaStreamBaseIE):
     _VALID_URL = r'https?://www\.winsports\.co/videos/(?P<id>[\w-]+)'
 
     _TESTS = [{
@@ -158,21 +173,36 @@ class WinSportsVideoIE(InfoExtractor):
             'ext': 'mp4',
         },
         'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://www.winsports.co/videos/bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta',
+        'info_dict': {
+            'id': '6402adb62bbf3b18d454e1b0',
+            'display_id': 'bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta',
+            'title': '⚽Bucaramanga se quedó con el grito de gol en la garganta',
+            'description': 'Gol anulado Bucaramanga',
+            'thumbnail': r're:^https?://[^?#]+6402adb62bbf3b18d454e1b0',
+            'ext': 'mp4',
+        },
+        'params': {'skip_download': 'm3u8'},
     }]
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
-        json_ld = self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={})
-        media_setting_json = self._search_json(
-            r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'drupal-setting-json', display_id)
+        data = self._search_json(
+            r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'data', display_id)
 
-        mediastream_id = traverse_obj(
-            media_setting_json, ('settings', 'mediastream_formatter', ..., 'mediastream_id', {str_or_none}),
-            get_all=False) or json_ld.get('url')
-        if not mediastream_id:
+        mediastream_url = urljoin(f'{self._EMBED_BASE_URL}/', (
+            traverse_obj(data, (
+                (('settings', 'mediastream_formatter', ..., 'mediastream_id'), 'url'), {str}), get_all=False)
+            or next(self._extract_mediastream_urls(webpage), None)))
+
+        if not mediastream_url:
             self.raise_no_formats('No MediaStream embed found in webpage')
 
+        title = clean_html(remove_end(
+            self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={}).get('title')
+            or self._og_search_title(webpage), '| Win Sports'))
+
         return self.url_result(
-            urljoin('https://mdstrm.com/embed/', mediastream_id), MediaStreamIE, display_id, url_transparent=True,
-            display_id=display_id, video_title=strip_or_none(remove_end(json_ld.get('title'), '| Win Sports')))
+            mediastream_url, MediaStreamIE, display_id, url_transparent=True, display_id=display_id, video_title=title)

From 460da07439718d9af1e3661da2a23e05a913a2e6 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 16 Mar 2023 14:54:25 -0500
Subject: [PATCH 13/97] [extractor/genius] Add support for articles (#6474)

Closes #6465
Authored by: bashonly
---
 yt_dlp/extractor/genius.py | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/yt_dlp/extractor/genius.py b/yt_dlp/extractor/genius.py
index 62f5a28ff..57c25e71e 100644
--- a/yt_dlp/extractor/genius.py
+++ b/yt_dlp/extractor/genius.py
@@ -10,7 +10,7 @@
 
 
 class GeniusIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?genius\.com/videos/(?P<id>[^?/#]+)'
+    _VALID_URL = r'https?://(?:www\.)?genius\.com/(?:videos|(?P<article>a))/(?P<id>[^?/#]+)'
     _TESTS = [{
         'url': 'https://genius.com/videos/Vince-staples-breaks-down-the-meaning-of-when-sparks-fly',
         'md5': '64c2ad98cfafcfda23bfa0ad0c512f4c',
@@ -41,19 +41,37 @@ class GeniusIE(InfoExtractor):
             'timestamp': 1631209167,
             'thumbnail': r're:^https?://.*\.jpg$',
         },
+    }, {
+        'url': 'https://genius.com/a/cordae-anderson-paak-break-down-the-meaning-of-two-tens',
+        'md5': 'f98a4e03b16b0a2821bd6e52fb3cc9d7',
+        'info_dict': {
+            'id': '6321509903112',
+            'ext': 'mp4',
+            'title': 'Cordae & Anderson .Paak Breaks Down The Meaning Of “Two Tens”',
+            'description': 'md5:1255f0e1161d07342ce56a8464ac339d',
+            'tags': ['song id: 5457554'],
+            'uploader_id': '4863540648001',
+            'duration': 361.813,
+            'upload_date': '20230301',
+            'timestamp': 1677703908,
+            'thumbnail': r're:^https?://.*\.jpg$',
+        },
     }]
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
+        display_id, is_article = self._match_valid_url(url).group('id', 'article')
         webpage = self._download_webpage(url, display_id)
 
         metadata = self._search_json(
-            r'<meta content="', webpage, 'metadata', display_id, transform_source=unescapeHTML)
-        video_id = traverse_obj(
-            metadata, ('video', 'provider_id'),
-            ('dfp_kv', lambda _, x: x['name'] == 'brightcove_video_id', 'values', 0), get_all=False)
+            r'<meta content="', webpage, 'metadata', display_id,
+            end_pattern=r'"\s+itemprop="page_data"', transform_source=unescapeHTML)
+        video_id = traverse_obj(metadata, (
+            (('article', 'media', ...), ('video', None)),
+            ('provider_id', ('dfp_kv', lambda _, v: v['name'] == 'brightcove_video_id', 'values', ...))),
+            get_all=False)
         if not video_id:
-            raise ExtractorError('Brightcove video id not found in webpage')
+            # Not all article pages have videos, expect the error
+            raise ExtractorError('Brightcove video ID not found in webpage', expected=bool(is_article))
 
         config = self._search_json(r'var\s*APP_CONFIG\s*=', webpage, 'config', video_id, default={})
         account_id = config.get('brightcove_account_id', '4863540648001')
@@ -68,7 +86,7 @@ def _real_extract(self, url):
 
 
 class GeniusLyricsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?genius\.com/(?P<id>[^?/#]+)-lyrics[?/#]?'
+    _VALID_URL = r'https?://(?:www\.)?genius\.com/(?P<id>[^?/#]+)-lyrics(?:[?/#]|$)'
     _TESTS = [{
         'url': 'https://genius.com/Lil-baby-heyy-lyrics',
         'playlist_mincount': 2,

From 216bcb66d7dce0762767d751dad10650cb57da9d Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 16 Mar 2023 14:54:56 -0500
Subject: [PATCH 14/97] [extractor/tiktok] Improve `TikTokLive` extractor
 (#6520)

Closes #6459
Authored by: bashonly
---
 yt_dlp/extractor/tiktok.py | 179 ++++++++++++++++++++++++++++++++-----
 1 file changed, 158 insertions(+), 21 deletions(-)

diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index 096748bf7..f1696a2fc 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -1,6 +1,7 @@
 import itertools
 import json
 import random
+import re
 import string
 import time
 
@@ -12,15 +13,18 @@
     LazyList,
     UnsupportedError,
     UserNotLive,
+    format_field,
     get_element_by_id,
     get_first,
     int_or_none,
     join_nonempty,
+    merge_dicts,
     qualities,
     remove_start,
     srt_subtitles_timecode,
     str_or_none,
     traverse_obj,
+    try_call,
     try_get,
     url_or_none,
 )
@@ -563,7 +567,7 @@ def _real_extract(self, url):
             self.report_warning(f'{e}; trying with webpage')
 
         url = self._create_url(user_id, video_id)
-        webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'User-Agent:Mozilla/5.0'})
+        webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'})
         next_data = self._search_nextjs_data(webpage, video_id, default='{}')
         if next_data:
             status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode'), expected_type=int) or 0
@@ -983,40 +987,173 @@ def _real_extract(self, url):
         return self.url_result(new_url)
 
 
-class TikTokLiveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/live'
+class TikTokLiveIE(TikTokBaseIE):
+    _VALID_URL = r'''(?x)https?://(?:
+        (?:www\.)?tiktok\.com/@(?P<uploader>[\w.-]+)/live|
+        m\.tiktok\.com/share/live/(?P<id>\d+)
+    )'''
     IE_NAME = 'tiktok:live'
 
     _TESTS = [{
+        'url': 'https://www.tiktok.com/@weathernewslive/live',
+        'info_dict': {
+            'id': '7210809319192726273',
+            'ext': 'mp4',
+            'title': r're:ウェザーニュースLiVE[\d\s:-]*',
+            'creator': 'ウェザーニュースLiVE',
+            'uploader': 'weathernewslive',
+            'uploader_id': '6621496731283095554',
+            'uploader_url': 'https://www.tiktok.com/@weathernewslive',
+            'live_status': 'is_live',
+            'concurrent_view_count': int,
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://www.tiktok.com/@pilarmagenta/live',
+        'info_dict': {
+            'id': '7209423610325322522',
+            'ext': 'mp4',
+            'title': str,
+            'creator': 'Pilarmagenta',
+            'uploader': 'pilarmagenta',
+            'uploader_id': '6624846890674683909',
+            'uploader_url': 'https://www.tiktok.com/@pilarmagenta',
+            'live_status': 'is_live',
+            'concurrent_view_count': int,
+        },
+        'skip': 'Livestream',
+    }, {
+        'url': 'https://m.tiktok.com/share/live/7209423610325322522/?language=en',
+        'only_matching': True,
+    }, {
         'url': 'https://www.tiktok.com/@iris04201/live',
         'only_matching': True,
     }]
 
+    def _call_api(self, url, param, room_id, uploader, key=None):
+        response = traverse_obj(self._download_json(
+            url, room_id, fatal=False, query={
+                'aid': '1988',
+                param: room_id,
+            }), (key, {dict}), default={})
+
+        # status == 2 if live else 4
+        if int_or_none(response.get('status')) == 2:
+            return response
+        # If room_id is obtained via mobile share URL and cannot be refreshed, do not wait for live
+        elif not uploader:
+            raise ExtractorError('This livestream has ended', expected=True)
+        raise UserNotLive(video_id=uploader)
+
     def _real_extract(self, url):
-        uploader = self._match_id(url)
-        webpage = self._download_webpage(url, uploader, headers={'User-Agent': 'User-Agent:Mozilla/5.0'})
-        room_id = self._html_search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
+        uploader, room_id = self._match_valid_url(url).group('uploader', 'id')
+        webpage = self._download_webpage(
+            url, uploader or room_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=not room_id)
+
+        if webpage:
+            data = try_call(lambda: self._get_sigi_state(webpage, uploader or room_id))
+            room_id = (traverse_obj(data, ('UserModule', 'users', ..., 'roomId', {str_or_none}), get_all=False)
+                       or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
+                       or room_id)
+            uploader = uploader or traverse_obj(
+                data, ('LiveRoom', 'liveRoomUserInfo', 'user', 'uniqueId'),
+                ('UserModule', 'users', ..., 'uniqueId'), get_all=False, expected_type=str)
+
         if not room_id:
             raise UserNotLive(video_id=uploader)
-        live_info = traverse_obj(self._download_json(
-            'https://www.tiktok.com/api/live/detail/', room_id, query={
-                'aid': '1988',
-                'roomID': room_id,
-            }), 'LiveRoomInfo', expected_type=dict, default={})
 
-        if 'status' not in live_info:
-            raise ExtractorError('Unexpected response from TikTok API')
-        # status = 2 if live else 4
-        if not int_or_none(live_info['status']) == 2:
-            raise UserNotLive(video_id=uploader)
+        formats = []
+        live_info = self._call_api(
+            'https://webcast.tiktok.com/webcast/room/info', 'room_id', room_id, uploader, key='data')
+
+        get_quality = qualities(('SD1', 'ld', 'SD2', 'sd', 'HD1', 'hd', 'FULL_HD1', 'uhd', 'ORIGION', 'origin'))
+        parse_inner = lambda x: self._parse_json(x, None)
+
+        for quality, stream in traverse_obj(live_info, (
+                'stream_url', 'live_core_sdk_data', 'pull_data', 'stream_data',
+                {parse_inner}, 'data', {dict}), default={}).items():
+
+            sdk_params = traverse_obj(stream, ('main', 'sdk_params', {parse_inner}, {
+                'vcodec': ('VCodec', {str}),
+                'tbr': ('vbitrate', {lambda x: int_or_none(x, 1000)}),
+                'resolution': ('resolution', {lambda x: re.match(r'(?i)\d+x\d+|\d+p', x).group().lower()}),
+            }))
+
+            flv_url = traverse_obj(stream, ('main', 'flv', {url_or_none}))
+            if flv_url:
+                formats.append({
+                    'url': flv_url,
+                    'ext': 'flv',
+                    'format_id': f'flv-{quality}',
+                    'quality': get_quality(quality),
+                    **sdk_params,
+                })
+
+            hls_url = traverse_obj(stream, ('main', 'hls', {url_or_none}))
+            if hls_url:
+                formats.append({
+                    'url': hls_url,
+                    'ext': 'mp4',
+                    'protocol': 'm3u8_native',
+                    'format_id': f'hls-{quality}',
+                    'quality': get_quality(quality),
+                    **sdk_params,
+                })
+
+        def get_vcodec(*keys):
+            return traverse_obj(live_info, (
+                'stream_url', *keys, {parse_inner}, 'VCodec', {str}))
+
+        for stream in ('hls', 'rtmp'):
+            stream_url = traverse_obj(live_info, ('stream_url', f'{stream}_pull_url', {url_or_none}))
+            if stream_url:
+                formats.append({
+                    'url': stream_url,
+                    'ext': 'mp4' if stream == 'hls' else 'flv',
+                    'protocol': 'm3u8_native' if stream == 'hls' else 'https',
+                    'format_id': f'{stream}-pull',
+                    'vcodec': get_vcodec(f'{stream}_pull_url_params'),
+                    'quality': get_quality('ORIGION'),
+                })
+
+        for f_id, f_url in traverse_obj(live_info, ('stream_url', 'flv_pull_url', {dict}), default={}).items():
+            if not url_or_none(f_url):
+                continue
+            formats.append({
+                'url': f_url,
+                'ext': 'flv',
+                'format_id': f'flv-{f_id}'.lower(),
+                'vcodec': get_vcodec('flv_pull_url_params', f_id),
+                'quality': get_quality(f_id),
+            })
+
+        # If uploader is a guest on another's livestream, primary endpoint will not have m3u8 URLs
+        if not traverse_obj(formats, lambda _, v: v['ext'] == 'mp4'):
+            live_info = merge_dicts(live_info, self._call_api(
+                'https://www.tiktok.com/api/live/detail/', 'roomID', room_id, uploader, key='LiveRoomInfo'))
+            if url_or_none(live_info.get('liveUrl')):
+                formats.append({
+                    'url': live_info['liveUrl'],
+                    'ext': 'mp4',
+                    'protocol': 'm3u8_native',
+                    'format_id': 'hls-fallback',
+                    'vcodec': 'h264',
+                    'quality': get_quality('origin'),
+                })
+
+        uploader = uploader or traverse_obj(live_info, ('ownerInfo', 'uniqueId'), ('owner', 'display_id'))
 
         return {
             'id': room_id,
-            'title': live_info.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage, default=''),
             'uploader': uploader,
-            'uploader_id': traverse_obj(live_info, ('ownerInfo', 'id')),
-            'creator': traverse_obj(live_info, ('ownerInfo', 'nickname')),
-            'concurrent_view_count': traverse_obj(live_info, ('liveRoomStats', 'userCount'), expected_type=int),
-            'formats': self._extract_m3u8_formats(live_info['liveUrl'], room_id, 'mp4', live=True),
+            'uploader_url': format_field(uploader, None, self._UPLOADER_URL_FORMAT) or None,
             'is_live': True,
+            'formats': formats,
+            '_format_sort_fields': ('quality', 'ext'),
+            **traverse_obj(live_info, {
+                'title': 'title',
+                'uploader_id': (('ownerInfo', 'owner'), 'id', {str_or_none}),
+                'creator': (('ownerInfo', 'owner'), 'nickname'),
+                'concurrent_view_count': (('user_count', ('liveRoomStats', 'userCount')), {int_or_none}),
+            }, get_all=False),
         }

From 9a06b7b1891b48cebbe275652ae8025a36d97d97 Mon Sep 17 00:00:00 2001
From: viktor-enzell <viktor.enzell@gmail.com>
Date: Sat, 18 Mar 2023 14:06:46 +0100
Subject: [PATCH 15/97] [extractor/drtv] Fix radio page extraction (#6552)

Authored by: viktor-enzell
---
 yt_dlp/extractor/drtv.py | 58 +++++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 19 deletions(-)

diff --git a/yt_dlp/extractor/drtv.py b/yt_dlp/extractor/drtv.py
index 470546bbc..6c381aa14 100644
--- a/yt_dlp/extractor/drtv.py
+++ b/yt_dlp/extractor/drtv.py
@@ -12,7 +12,6 @@
     mimetype2ext,
     str_or_none,
     traverse_obj,
-    try_get,
     unified_timestamp,
     update_url_query,
     url_or_none,
@@ -25,7 +24,7 @@ class DRTVIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:
-                            (?:www\.)?dr\.dk/(?:tv/se|nyheder|(?:radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*|
+                            (?:www\.)?dr\.dk/(?:tv/se|nyheder|(?P<radio>radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*|
                             (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
                         )
                         (?P<id>[\da-z_-]+)
@@ -80,7 +79,7 @@ class DRTVIE(InfoExtractor):
             'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
             'timestamp': 1546628400,
             'upload_date': '20190104',
-            'duration': 3504.618,
+            'duration': 3504.619,
             'formats': 'mincount:20',
             'release_year': 2017,
             'season_id': 'urn:dr:mu:bundle:5afc03ad6187a4065ca5fd35',
@@ -101,14 +100,16 @@ class DRTVIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Bonderøven 2019 (1:8)',
             'description': 'md5:b6dcfe9b6f0bea6703e9a0092739a5bd',
-            'timestamp': 1603188600,
-            'upload_date': '20201020',
+            'timestamp': 1654856100,
+            'upload_date': '20220610',
             'duration': 2576.6,
             'season': 'Bonderøven 2019',
             'season_id': 'urn:dr:mu:bundle:5c201667a11fa01ca4528ce5',
             'release_year': 2019,
             'season_number': 2019,
-            'series': 'Frank & Kastaniegaarden'
+            'series': 'Frank & Kastaniegaarden',
+            'episode_number': 1,
+            'episode': 'Episode 1',
         },
         'params': {
             'skip_download': True,
@@ -140,10 +141,26 @@ class DRTVIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+        'skip': 'this video has been removed',
+    }, {
+        'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/regionale-nyheder-2023-03-14-10-30-9',
+        'info_dict': {
+            'ext': 'mp4',
+            'id': '14802310112',
+            'timestamp': 1678786200,
+            'duration': 120.043,
+            'season_id': 'urn:dr:mu:bundle:63a4f7c87140143504b6710f',
+            'series': 'P4 København regionale nyheder',
+            'upload_date': '20230314',
+            'release_year': 0,
+            'description': 'Hør seneste regionale nyheder fra P4 København.',
+            'season': 'Regionale nyheder',
+            'title': 'Regionale nyheder',
+        },
     }]
 
     def _real_extract(self, url):
-        raw_video_id = self._match_id(url)
+        raw_video_id, is_radio_url = self._match_valid_url(url).group('id', 'radio')
 
         webpage = self._download_webpage(url, raw_video_id)
 
@@ -170,15 +187,17 @@ def _real_extract(self, url):
             programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id)
         else:
             programcard_url = _PROGRAMCARD_BASE
-            page = self._parse_json(
-                self._search_regex(
-                    r'data\s*=\s*({.+?})\s*(?:;|</script)', webpage,
-                    'data'), '1')['cache']['page']
-            page = page[list(page.keys())[0]]
-            item = try_get(
-                page, (lambda x: x['item'], lambda x: x['entries'][0]['item']),
-                dict)
-            video_id = item['customId'].split(':')[-1]
+            if is_radio_url:
+                video_id = self._search_nextjs_data(
+                    webpage, raw_video_id)['props']['pageProps']['episode']['productionNumber']
+            else:
+                json_data = self._search_json(
+                    r'window\.__data\s*=', webpage, 'data', raw_video_id)
+                video_id = traverse_obj(json_data, (
+                    'cache', 'page', ..., (None, ('entries', 0)), 'item', 'customId',
+                    {lambda x: x.split(':')[-1]}), get_all=False)
+                if not video_id:
+                    raise ExtractorError('Unable to extract video id')
             query['productionnumber'] = video_id
 
         data = self._download_json(
@@ -269,10 +288,11 @@ def decrypt_uri(e):
                                 f['vcodec'] = 'none'
                         formats.extend(f4m_formats)
                     elif target == 'HLS':
-                        formats.extend(self._extract_m3u8_formats(
+                        fmts, subs = self._extract_m3u8_formats_and_subtitles(
                             uri, video_id, 'mp4', entry_protocol='m3u8_native',
-                            quality=preference, m3u8_id=format_id,
-                            fatal=False))
+                            quality=preference, m3u8_id=format_id, fatal=False)
+                        formats.extend(fmts)
+                        self._merge_subtitles(subs, target=subtitles)
                     else:
                         bitrate = link.get('Bitrate')
                         if bitrate:

From c14af7a741931b364bab3d9546c0f4359f318f8c Mon Sep 17 00:00:00 2001
From: Lesmiscore <nao20010128@gmail.com>
Date: Sat, 18 Mar 2023 23:29:02 +0900
Subject: [PATCH 16/97] [extractor/iwara] Overhaul extractors (#6557)

Authored by: Lesmiscore
---
 yt_dlp/extractor/iwara.py | 333 ++++++++++++++++----------------------
 1 file changed, 139 insertions(+), 194 deletions(-)

diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py
index ec3e59c6d..62a179700 100644
--- a/yt_dlp/extractor/iwara.py
+++ b/yt_dlp/extractor/iwara.py
@@ -1,239 +1,184 @@
-import itertools
-import re
+import functools
 import urllib.parse
+import hashlib
 
 from .common import InfoExtractor
 from ..utils import (
+    OnDemandPagedList,
     int_or_none,
     mimetype2ext,
-    remove_end,
-    strip_or_none,
-    unified_strdate,
-    url_or_none,
-    urljoin,
+    traverse_obj,
+    unified_timestamp,
 )
 
 
-class IwaraBaseIE(InfoExtractor):
-    _BASE_REGEX = r'(?P<base_url>https?://(?:www\.|ecchi\.)?iwara\.tv)'
-
-    def _extract_playlist(self, base_url, webpage):
-        for path in re.findall(r'class="title">\s*<a[^<]+href="([^"]+)', webpage):
-            yield self.url_result(urljoin(base_url, path))
-
-
-class IwaraIE(IwaraBaseIE):
-    _VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/videos/(?P<id>[a-zA-Z0-9]+)'
+class IwaraIE(InfoExtractor):
+    IE_NAME = 'iwara'
+    _VALID_URL = r'https?://(?:www\.)?iwara\.tv/video/(?P<id>[a-zA-Z0-9]+)'
     _TESTS = [{
-        'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
-        # md5 is unstable
+        # this video cannot be played because of migration
+        'only_matching': True,
+        'url': 'https://www.iwara.tv/video/k2ayoueezfkx6gvq',
         'info_dict': {
-            'id': 'amVwUl1EHpAD9RD',
+            'id': 'k2ayoueezfkx6gvq',
             'ext': 'mp4',
-            'title': '【MMD R-18】ガールフレンド carry_me_off',
             'age_limit': 18,
-            'thumbnail': 'https://i.iwara.tv/sites/default/files/videos/thumbnails/7951/thumbnail-7951_0001.png',
-            'uploader': 'Reimu丨Action',
-            'upload_date': '20150828',
-            'description': 'md5:1d4905ce48c66c9299c617f08e106e0f',
+            'title': 'Defeat of Irybelda - アイリベルダの敗北',
+            'description': 'md5:70278abebe706647a8b4cb04cf23e0d3',
+            'uploader': 'Inwerwm',
+            'uploader_id': 'inwerwm',
+            'tags': 'count:1',
+            'like_count': 6133,
+            'view_count': 1050343,
+            'comment_count': 1,
+            'timestamp': 1677843869,
+            'modified_timestamp': 1679056362,
         },
     }, {
-        'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO',
-        'md5': '7e5f1f359cd51a027ba4a7b7710a50f0',
+        'url': 'https://iwara.tv/video/1ywe1sbkqwumpdxz5/',
+        'md5': '20691ce1473ec2766c0788e14c60ce66',
         'info_dict': {
-            'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
-            'ext': 'mp4',
-            'title': '[3D Hentai] Kyonyu × Genkai × Emaki Shinobi Girls.mp4',
-            'age_limit': 18,
-        },
-        'add_ie': ['GoogleDrive'],
-    }, {
-        'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
-        # md5 is unstable
-        'info_dict': {
-            'id': '6liAP9s2Ojc',
+            'id': '1ywe1sbkqwumpdxz5',
             'ext': 'mp4',
             'age_limit': 18,
-            'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
-            'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
-            'upload_date': '20160910',
-            'uploader': 'aMMDsork',
-            'uploader_id': 'UCVOFyOSCyFkXTYYHITtqB7A',
+            'title': 'Aponia 阿波尼亚SEX  Party Tonight 手动脱衣 大奶 裸腿',
+            'description': 'md5:0c4c310f2e0592d68b9f771d348329ca',
+            'uploader': '龙也zZZ',
+            'uploader_id': 'user792540',
+            'tags': [
+                'uncategorized'
+            ],
+            'like_count': 1809,
+            'view_count': 25156,
+            'comment_count': 1,
+            'timestamp': 1678732213,
+            'modified_timestamp': 1679110271,
         },
-        'add_ie': ['Youtube'],
     }]
 
+    def _extract_formats(self, video_id, fileurl):
+        up = urllib.parse.urlparse(fileurl)
+        q = urllib.parse.parse_qs(up.query)
+        paths = up.path.rstrip('/').split('/')
+        # https://github.com/yt-dlp/yt-dlp/issues/6549#issuecomment-1473771047
+        x_version = hashlib.sha1('_'.join((paths[-1], q['expires'][0], '5nFp9kmbNnHdAFhaqMvt')).encode()).hexdigest()
+
+        files = self._download_json(fileurl, video_id, headers={'X-Version': x_version})
+        for fmt in files:
+            yield traverse_obj(fmt, {
+                'format_id': 'name',
+                'url': ('src', ('view', 'download'), {self._proto_relative_url}),
+                'ext': ('type', {mimetype2ext}),
+                'quality': ('name', {lambda x: int_or_none(x) or 1e4}),
+                'height': ('name', {int_or_none}),
+            }, get_all=False)
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
-        webpage, urlh = self._download_webpage_handle(url, video_id)
-
-        hostname = urllib.parse.urlparse(urlh.geturl()).hostname
-        # ecchi is 'sexy' in Japanese
-        age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
-
-        video_data = self._download_json('http://www.iwara.tv/api/video/%s' % video_id, video_id)
-
-        if not video_data:
-            iframe_url = self._html_search_regex(
-                r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
-                webpage, 'iframe URL', group='url')
-            return {
-                '_type': 'url_transparent',
-                'url': iframe_url,
-                'age_limit': age_limit,
-            }
-
-        title = remove_end(self._html_extract_title(webpage), ' | Iwara')
-
-        thumbnail = self._html_search_regex(
-            r'poster=[\'"]([^\'"]+)', webpage, 'thumbnail', default=None)
-
-        uploader = self._html_search_regex(
-            r'class="username">([^<]+)', webpage, 'uploader', fatal=False)
-
-        upload_date = unified_strdate(self._html_search_regex(
-            r'作成日:([^\s]+)', webpage, 'upload_date', fatal=False))
-
-        description = strip_or_none(self._search_regex(
-            r'<p>(.+?(?=</div))', webpage, 'description', fatal=False,
-            flags=re.DOTALL))
-
-        formats = []
-        for a_format in video_data:
-            format_uri = url_or_none(a_format.get('uri'))
-            if not format_uri:
-                continue
-            format_id = a_format.get('resolution')
-            height = int_or_none(self._search_regex(
-                r'(\d+)p', format_id, 'height', default=None))
-            formats.append({
-                'url': self._proto_relative_url(format_uri, 'https:'),
-                'format_id': format_id,
-                'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
-                'height': height,
-                'width': int_or_none(height / 9.0 * 16.0 if height else None),
-                'quality': 1 if format_id == 'Source' else 0,
-            })
+        video_data = self._download_json(f'http://api.iwara.tv/video/{video_id}', video_id)
 
         return {
             'id': video_id,
-            'title': title,
-            'age_limit': age_limit,
-            'formats': formats,
-            'thumbnail': self._proto_relative_url(thumbnail, 'https:'),
-            'uploader': uploader,
-            'upload_date': upload_date,
-            'description': description,
+            'age_limit': 18 if video_data.get('rating') == 'ecchi' else 0,  # ecchi is 'sexy' in Japanese
+            **traverse_obj(video_data, {
+                'title': 'title',
+                'description': 'body',
+                'uploader': ('user', 'name'),
+                'uploader_id': ('user', 'username'),
+                'tags': ('tags', ..., 'id'),
+                'like_count': 'numLikes',
+                'view_count': 'numViews',
+                'comment_count': 'numComments',
+                'timestamp': ('createdAt', {unified_timestamp}),
+                'modified_timestamp': ('updatedAt', {unified_timestamp}),
+                'thumbnail': ('file', 'id', {str}, {
+                    lambda x: f'https://files.iwara.tv/image/thumbnail/{x}/thumbnail-00.jpg'}),
+            }),
+            'formats': list(self._extract_formats(video_id, video_data.get('fileUrl'))),
         }
 
 
-class IwaraPlaylistIE(IwaraBaseIE):
-    _VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/playlist/(?P<id>[^/?#&]+)'
-    IE_NAME = 'iwara:playlist'
-
-    _TESTS = [{
-        'url': 'https://ecchi.iwara.tv/playlist/best-enf',
-        'info_dict': {
-            'title': 'Best enf',
-            'uploader': 'Jared98112',
-            'id': 'best-enf',
-        },
-        'playlist_mincount': 1097,
-    }, {
-        # urlencoded
-        'url': 'https://ecchi.iwara.tv/playlist/%E3%83%97%E3%83%AC%E3%82%A4%E3%83%AA%E3%82%B9%E3%83%88-2',
-        'info_dict': {
-            'id': 'プレイリスト-2',
-            'title': 'プレイリスト',
-            'uploader': 'mainyu',
-        },
-        'playlist_mincount': 91,
-    }]
-
-    def _real_extract(self, url):
-        playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url')
-        playlist_id = urllib.parse.unquote(playlist_id)
-        webpage = self._download_webpage(url, playlist_id)
-
-        return {
-            '_type': 'playlist',
-            'id': playlist_id,
-            'title': self._html_search_regex(r'class="title"[^>]*>([^<]+)', webpage, 'title', fatal=False),
-            'uploader': self._html_search_regex(r'<h2>([^<]+)', webpage, 'uploader', fatal=False),
-            'entries': self._extract_playlist(base_url, webpage),
-        }
-
-
-class IwaraUserIE(IwaraBaseIE):
-    _VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/users/(?P<id>[^/?#&]+)'
+class IwaraUserIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?iwara\.tv/profile/(?P<id>[^/?#&]+)'
     IE_NAME = 'iwara:user'
+    _PER_PAGE = 32
 
     _TESTS = [{
-        'note': 'number of all videos page is just 1 page. less than 40 videos',
-        'url': 'https://ecchi.iwara.tv/users/infinityyukarip',
+        'url': 'https://iwara.tv/profile/user792540/videos',
         'info_dict': {
-            'title': 'Uploaded videos from Infinity_YukariP',
-            'id': 'infinityyukarip',
-            'uploader': 'Infinity_YukariP',
-            'uploader_id': 'infinityyukarip',
+            'id': 'user792540',
         },
-        'playlist_mincount': 39,
+        'playlist_mincount': 80,
     }, {
-        'note': 'no even all videos page. probably less than 10 videos',
-        'url': 'https://ecchi.iwara.tv/users/mmd-quintet',
+        'url': 'https://iwara.tv/profile/theblackbirdcalls/videos',
         'info_dict': {
-            'title': 'Uploaded videos from mmd quintet',
-            'id': 'mmd-quintet',
-            'uploader': 'mmd quintet',
-            'uploader_id': 'mmd-quintet',
-        },
-        'playlist_mincount': 6,
-    }, {
-        'note': 'has paging. more than 40 videos',
-        'url': 'https://ecchi.iwara.tv/users/theblackbirdcalls',
-        'info_dict': {
-            'title': 'Uploaded videos from TheBlackbirdCalls',
             'id': 'theblackbirdcalls',
-            'uploader': 'TheBlackbirdCalls',
-            'uploader_id': 'theblackbirdcalls',
         },
-        'playlist_mincount': 420,
+        'playlist_mincount': 723,
     }, {
-        'note': 'foreign chars in URL. there must be foreign characters in URL',
-        'url': 'https://ecchi.iwara.tv/users/ぶた丼',
-        'info_dict': {
-            'title': 'Uploaded videos from ぶた丼',
-            'id': 'ぶた丼',
-            'uploader': 'ぶた丼',
-            'uploader_id': 'ぶた丼',
-        },
-        'playlist_mincount': 170,
+        'url': 'https://iwara.tv/profile/user792540',
+        'only_matching': True,
+    }, {
+        'url': 'https://iwara.tv/profile/theblackbirdcalls',
+        'only_matching': True,
     }]
 
-    def _entries(self, playlist_id, base_url):
-        webpage = self._download_webpage(
-            f'{base_url}/users/{playlist_id}', playlist_id)
-        videos_url = self._search_regex(r'<a href="(/users/[^/]+/videos)(?:\?[^"]+)?">', webpage, 'all videos url', default=None)
-        if not videos_url:
-            yield from self._extract_playlist(base_url, webpage)
-            return
-
-        videos_url = urljoin(base_url, videos_url)
-
-        for n in itertools.count(1):
-            page = self._download_webpage(
-                videos_url, playlist_id, note=f'Downloading playlist page {n}',
-                query={'page': str(n - 1)} if n > 1 else {})
-            yield from self._extract_playlist(
-                base_url, page)
-
-            if f'page={n}' not in page:
-                break
+    def _entries(self, playlist_id, user_id, page):
+        videos = self._download_json(
+            'https://api.iwara.tv/videos', playlist_id,
+            note=f'Downloading page {page}',
+            query={
+                'page': page,
+                'sort': 'date',
+                'user': user_id,
+                'limit': self._PER_PAGE,
+            })
+        for x in traverse_obj(videos, ('results', ..., 'id')):
+            yield self.url_result(f'https://iwara.tv/video/{x}')
 
     def _real_extract(self, url):
-        playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url')
-        playlist_id = urllib.parse.unquote(playlist_id)
+        playlist_id = self._match_id(url)
+        user_info = self._download_json(
+            f'https://api.iwara.tv/profile/{playlist_id}', playlist_id,
+            note='Requesting user info')
+        user_id = traverse_obj(user_info, ('user', 'id'))
 
         return self.playlist_result(
-            self._entries(playlist_id, base_url), playlist_id)
+            OnDemandPagedList(
+                functools.partial(self._entries, playlist_id, user_id),
+                self._PER_PAGE),
+            playlist_id, traverse_obj(user_info, ('user', 'name')))
+
+
+class IwaraPlaylistIE(InfoExtractor):
+    # the ID is an UUID but I don't think it's necessary to write concrete regex
+    _VALID_URL = r'https?://(?:www\.)?iwara\.tv/playlist/(?P<id>[0-9a-f-]+)'
+    IE_NAME = 'iwara:playlist'
+    _PER_PAGE = 32
+
+    _TESTS = [{
+        'url': 'https://iwara.tv/playlist/458e5486-36a4-4ac0-b233-7e9eef01025f',
+        'info_dict': {
+            'id': '458e5486-36a4-4ac0-b233-7e9eef01025f',
+        },
+        'playlist_mincount': 3,
+    }]
+
+    def _entries(self, playlist_id, first_page, page):
+        videos = self._download_json(
+            'https://api.iwara.tv/videos', playlist_id, f'Downloading page {page}',
+            query={'page': page, 'limit': self._PER_PAGE}) if page else first_page
+        for x in traverse_obj(videos, ('results', ..., 'id')):
+            yield self.url_result(f'https://iwara.tv/video/{x}')
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        page_0 = self._download_json(
+            f'https://api.iwara.tv/playlist/{playlist_id}?page=0&limit={self._PER_PAGE}', playlist_id,
+            note='Requesting playlist info')
+
+        return self.playlist_result(
+            OnDemandPagedList(
+                functools.partial(self._entries, playlist_id, page_0),
+                self._PER_PAGE),
+            playlist_id, traverse_obj(page_0, ('title', 'name')))

From e4cf7741f9302b3faa092962f2895b55cb3d89bb Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Tue, 21 Mar 2023 17:48:22 -0500
Subject: [PATCH 17/97] [extractor/rozhlas] Extract manifest formats (#6590)

Closes #6584
Authored by: bashonly
---
 yt_dlp/extractor/rozhlas.py | 80 ++++++++++++++++++++++++++++---------
 1 file changed, 61 insertions(+), 19 deletions(-)

diff --git a/yt_dlp/extractor/rozhlas.py b/yt_dlp/extractor/rozhlas.py
index 08ebb93e3..5cc664e00 100644
--- a/yt_dlp/extractor/rozhlas.py
+++ b/yt_dlp/extractor/rozhlas.py
@@ -1,5 +1,12 @@
 from .common import InfoExtractor
-from ..utils import extract_attributes, int_or_none, remove_start, traverse_obj
+from ..utils import (
+    extract_attributes,
+    int_or_none,
+    remove_start,
+    str_or_none,
+    traverse_obj,
+    url_or_none,
+)
 
 
 class RozhlasIE(InfoExtractor):
@@ -50,7 +57,7 @@ class RozhlasVltavaIE(InfoExtractor):
         'url': 'https://wave.rozhlas.cz/papej-masicko-porcujeme-a-bilancujeme-filmy-a-serialy-ktere-letos-zabily-8891337',
         'md5': 'ba2fdbc1242fc16771c7695d271ec355',
         'info_dict': {
-            'id': 8891337,
+            'id': '8891337',
             'title': 'md5:21f99739d04ab49d8c189ec711eef4ec',
         },
         'playlist_count': 1,
@@ -69,7 +76,7 @@ class RozhlasVltavaIE(InfoExtractor):
     }, {
         'url': 'https://wave.rozhlas.cz/poslechnete-si-neklid-podcastovy-thriller-o-vine-strachu-a-vztahu-ktery-zasel-8554744',
         'info_dict': {
-            'id': 8554744,
+            'id': '8554744',
             'title': 'Poslechněte si Neklid. Podcastový thriller o vině, strachu a vztahu, který zašel příliš daleko',
         },
         'playlist_count': 5,
@@ -139,27 +146,62 @@ class RozhlasVltavaIE(InfoExtractor):
                 'chapter_number': 5,
             },
         }]
+    }, {
+        'url': 'https://dvojka.rozhlas.cz/karel-siktanc-cerny-jezdec-bily-kun-napinava-pohadka-o-tajemnem-prizraku-8946969',
+        'info_dict': {
+            'id': '8946969',
+            'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku',
+        },
+        'playlist_count': 1,
+        'playlist': [{
+            'info_dict': {
+                'id': '10631121',
+                'ext': 'm4a',
+                'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku',
+                'description': 'Karel Šiktanc: Černý jezdec, bílý kůň',
+                'duration': 2656,
+                'artist': 'Tvůrčí skupina Drama a literatura',
+                'channel_id': 'dvojka',
+            },
+        }],
+        'params': {'skip_download': 'dash'},
     }]
 
     def _extract_video(self, entry):
-        chapter_number = int_or_none(traverse_obj(entry, ('meta', 'ga', 'contentSerialPart')))
+        formats = []
+        audio_id = entry['meta']['ga']['contentId']
+        for audio in traverse_obj(entry, ('audioLinks', lambda _, v: url_or_none(v['url']))):
+            ext = audio.get('variant')
+            if ext == 'dash':
+                formats.extend(self._extract_mpd_formats(
+                    audio['url'], audio_id, mpd_id=ext, fatal=False))
+            elif ext == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    audio['url'], audio_id, 'm4a', m3u8_id=ext, fatal=False))
+            else:
+                formats.append({
+                    'url': audio['url'],
+                    'ext': ext,
+                    'format_id': ext,
+                    'abr': int_or_none(audio.get('bitrate')),
+                    'acodec': ext,
+                    'vcodec': 'none',
+                })
+
+        chapter_number = traverse_obj(entry, ('meta', 'ga', 'contentSerialPart', {int_or_none}))
+
         return {
-            'id': entry['meta']['ga']['contentId'],
-            'title': traverse_obj(entry, ('meta', 'ga', 'contentName')),
-            'description': entry.get('title'),
-            'duration': entry.get('duration'),
-            'artist': traverse_obj(entry, ('meta', 'ga', 'contentAuthor')),
-            'channel_id': traverse_obj(entry, ('meta', 'ga', 'contentCreator')),
+            'id': audio_id,
             'chapter': traverse_obj(entry, ('meta', 'ga', 'contentNameShort')) if chapter_number else None,
             'chapter_number': chapter_number,
-            'formats': [{
-                'url': audio_link['url'],
-                'ext': audio_link.get('variant'),
-                'format_id': audio_link.get('variant'),
-                'abr': audio_link.get('bitrate'),
-                'acodec': audio_link.get('variant'),
-                'vcodec': 'none',
-            } for audio_link in entry['audioLinks']],
+            'formats': formats,
+            **traverse_obj(entry, {
+                'title': ('meta', 'ga', 'contentName'),
+                'description': 'title',
+                'duration': ('duration', {int_or_none}),
+                'artist': ('meta', 'ga', 'contentAuthor'),
+                'channel_id': ('meta', 'ga', 'contentCreator'),
+            })
         }
 
     def _real_extract(self, url):
@@ -173,7 +215,7 @@ def _real_extract(self, url):
 
         return {
             '_type': 'playlist',
-            'id': data.get('embedId'),
+            'id': str_or_none(data.get('embedId')) or video_id,
             'title': traverse_obj(data, ('series', 'title')),
             'entries': map(self._extract_video, data['playlist']),
         }

From 06966cb8966b9aa4f60ab9c44c182a057d4ca3a3 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Tue, 21 Mar 2023 17:57:46 -0500
Subject: [PATCH 18/97] [extractor/bravotv] Fix extractor (#6568)

Closes #6562
Authored by: bashonly
---
 yt_dlp/extractor/bravotv.py | 232 +++++++++++++++++++++++-------------
 1 file changed, 150 insertions(+), 82 deletions(-)

diff --git a/yt_dlp/extractor/bravotv.py b/yt_dlp/extractor/bravotv.py
index d4895848e..d4bf9b53b 100644
--- a/yt_dlp/extractor/bravotv.py
+++ b/yt_dlp/extractor/bravotv.py
@@ -1,117 +1,185 @@
-import re
-
 from .adobepass import AdobePassIE
 from ..utils import (
-    smuggle_url,
-    update_url_query,
-    int_or_none,
+    extract_attributes,
     float_or_none,
-    try_get,
-    dict_get,
+    get_element_html_by_class,
+    int_or_none,
+    merge_dicts,
+    parse_age_limit,
+    remove_end,
+    str_or_none,
+    traverse_obj,
+    unescapeHTML,
+    unified_timestamp,
+    update_url_query,
+    url_or_none,
 )
 
 
 class BravoTVIE(AdobePassIE):
-    _VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<site>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
-        'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
         'info_dict': {
-            'id': 'epL0pmK1kQlT',
+            'id': '3923059',
             'ext': 'mp4',
             'title': 'The Top Chef Season 16 Winner Is...',
             'description': 'Find out who takes the title of Top Chef!',
-            'uploader': 'NBCU-BRAV',
             'upload_date': '20190314',
             'timestamp': 1552591860,
             'season_number': 16,
             'episode_number': 15,
             'series': 'Top Chef',
             'episode': 'The Top Chef Season 16 Winner Is...',
-            'duration': 190.0,
-        }
+            'duration': 190.357,
+            'season': 'Season 16',
+            'thumbnail': r're:^https://.+\.jpg',
+        },
+        'params': {'skip_download': 'm3u8'},
     }, {
-        'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
-        'only_matching': True,
+        'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling',
+        'info_dict': {
+            'id': '9000234570',
+            'ext': 'mp4',
+            'title': 'London Calling',
+            'description': 'md5:5af95a8cbac1856bd10e7562f86bb759',
+            'upload_date': '20230310',
+            'timestamp': 1678410000,
+            'season_number': 20,
+            'episode_number': 1,
+            'series': 'Top Chef',
+            'episode': 'London Calling',
+            'duration': 3266.03,
+            'season': 'Season 20',
+            'chapters': 'count:7',
+            'thumbnail': r're:^https://.+\.jpg',
+            'age_limit': 14,
+        },
+        'params': {'skip_download': 'm3u8'},
+        'skip': 'This video requires AdobePass MSO credentials',
+    }, {
+        'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night',
+        'info_dict': {
+            'id': '3692045',
+            'ext': 'mp4',
+            'title': 'Closing Night',
+            'description': 'md5:3170065c5c2f19548d72a4cbc254af63',
+            'upload_date': '20180401',
+            'timestamp': 1522623600,
+            'season_number': 1,
+            'episode_number': 1,
+            'series': 'In Ice Cold Blood',
+            'episode': 'Closing Night',
+            'duration': 2629.051,
+            'season': 'Season 1',
+            'chapters': 'count:6',
+            'thumbnail': r're:^https://.+\.jpg',
+            'age_limit': 14,
+        },
+        'params': {'skip_download': 'm3u8'},
+        'skip': 'This video requires AdobePass MSO credentials',
     }, {
         'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
+        'info_dict': {
+            'id': '3974019',
+            'ext': 'mp4',
+            'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
+            'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5',
+            'upload_date': '20190617',
+            'timestamp': 1560790800,
+            'season_number': 2,
+            'episode_number': 16,
+            'series': 'In Ice Cold Blood',
+            'episode': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
+            'duration': 68.235,
+            'season': 'Season 2',
+            'thumbnail': r're:^https://.+\.jpg',
+            'age_limit': 14,
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
         'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        site, display_id = self._match_valid_url(url).groups()
+        site, display_id = self._match_valid_url(url).group('site', 'id')
         webpage = self._download_webpage(url, display_id)
-        settings = self._parse_json(self._search_regex(
-            r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
-            display_id)
-        info = {}
+        settings = self._search_json(
+            r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>', webpage, 'settings', display_id)
+        tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '')
         query = {
-            'mbr': 'true',
+            'manifest': 'm3u',
+            'formats': 'm3u,mpeg4',
         }
-        account_pid, release_pid = [None] * 2
-        tve = settings.get('ls_tve')
+
         if tve:
-            query['manifest'] = 'm3u'
-            mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage)
-            if mobj:
-                account_pid, tp_path = mobj.groups()
-                release_pid = tp_path.strip('/').split('/')[-1]
-            else:
-                account_pid = 'HNK2IC'
-                tp_path = release_pid = tve['release_pid']
-            if tve.get('entitlement') == 'auth':
-                adobe_pass = settings.get('tve_adobe_auth', {})
-                if site == 'bravotv':
-                    site = 'bravo'
+            account_pid = tve.get('data-mpx-media-account-pid') or 'HNK2IC'
+            account_id = tve['data-mpx-media-account-id']
+            metadata = self._parse_json(
+                tve.get('data-normalized-video', ''), display_id, fatal=False, transform_source=unescapeHTML)
+            video_id = tve.get('data-guid') or metadata['guid']
+            if tve.get('data-entitlement') == 'auth':
+                auth = traverse_obj(settings, ('tve_adobe_auth', {dict})) or {}
+                site = remove_end(site, 'tv')
+                release_pid = tve['data-release-pid']
                 resource = self._get_mvpd_resource(
-                    adobe_pass.get('adobePassResourceId') or site,
-                    tve['title'], release_pid, tve.get('rating'))
-                query['auth'] = self._extract_mvpd_auth(
-                    url, release_pid,
-                    adobe_pass.get('adobePassRequestorId') or site, resource)
+                    tve.get('data-adobe-pass-resource-id') or auth.get('adobePassResourceId') or site,
+                    tve['data-title'], release_pid, tve.get('data-rating'))
+                query.update({
+                    'switch': 'HLSServiceSecure',
+                    'auth': self._extract_mvpd_auth(
+                        url, release_pid, auth.get('adobePassRequestorId') or site, resource),
+                })
+
         else:
-            shared_playlist = settings['ls_playlist']
-            account_pid = shared_playlist['account_pid']
-            metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']]
-            tp_path = release_pid = metadata.get('release_pid')
-            if not release_pid:
-                release_pid = metadata['guid']
-                tp_path = 'media/guid/2140479951/' + release_pid
-            info.update({
-                'title': metadata['title'],
-                'description': metadata.get('description'),
-                'season_number': int_or_none(metadata.get('season_num')),
-                'episode_number': int_or_none(metadata.get('episode_num')),
-            })
-            query['switch'] = 'progressive'
-
-        tp_url = 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path)
+            ls_playlist = traverse_obj(settings, ('ls_playlist', ..., {dict}), get_all=False) or {}
+            account_pid = ls_playlist.get('mpxMediaAccountPid') or 'PHSl-B'
+            account_id = ls_playlist['mpxMediaAccountId']
+            video_id = ls_playlist['defaultGuid']
+            metadata = traverse_obj(
+                ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, {dict}), get_all=False)
 
+        tp_url = f'https://link.theplatform.com/s/{account_pid}/media/guid/{account_id}/{video_id}'
         tp_metadata = self._download_json(
-            update_url_query(tp_url, {'format': 'preview'}),
-            display_id, fatal=False)
-        if tp_metadata:
-            info.update({
-                'title': tp_metadata.get('title'),
-                'description': tp_metadata.get('description'),
-                'duration': float_or_none(tp_metadata.get('duration'), 1000),
-                'season_number': int_or_none(
-                    dict_get(tp_metadata, ('pl1$seasonNumber', 'nbcu$seasonNumber'))),
-                'episode_number': int_or_none(
-                    dict_get(tp_metadata, ('pl1$episodeNumber', 'nbcu$episodeNumber'))),
-                # For some reason the series is sometimes wrapped into a single element array.
-                'series': try_get(
-                    dict_get(tp_metadata, ('pl1$show', 'nbcu$show')),
-                    lambda x: x[0] if isinstance(x, list) else x,
-                    expected_type=str),
-                'episode': dict_get(
-                    tp_metadata, ('pl1$episodeName', 'nbcu$episodeName', 'title')),
-            })
+            update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
 
-        info.update({
-            '_type': 'url_transparent',
-            'id': release_pid,
-            'url': smuggle_url(update_url_query(tp_url, query), {'force_smil_url': True}),
-            'ie_key': 'ThePlatform',
-        })
-        return info
+        seconds_or_none = lambda x: float_or_none(x, 1000)
+        chapters = traverse_obj(tp_metadata, ('chapters', ..., {
+            'start_time': ('startTime', {seconds_or_none}),
+            'end_time': ('endTime', {seconds_or_none}),
+        }))
+        # prune pointless single chapters that span the entire duration from short videos
+        if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
+            chapters = None
+
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+            update_url_query(f'{tp_url}/stream.m3u8', query), video_id, 'mp4', m3u8_id='hls')
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            'chapters': chapters,
+            **merge_dicts(traverse_obj(tp_metadata, {
+                'title': 'title',
+                'description': 'description',
+                'duration': ('duration', {seconds_or_none}),
+                'timestamp': ('pubDate', {seconds_or_none}),
+                'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
+                'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
+                'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
+                'episode': (('title', 'pl1$episodeNumber', 'nbcu$episodeNumber'), {str_or_none}),
+                'age_limit': ('ratings', ..., 'rating', {parse_age_limit}),
+            }, get_all=False), traverse_obj(metadata, {
+                'title': 'title',
+                'description': 'description',
+                'duration': ('durationInSeconds', {int_or_none}),
+                'timestamp': ('airDate', {unified_timestamp}),
+                'thumbnail': ('thumbnailUrl', {url_or_none}),
+                'season_number': ('seasonNumber', {int_or_none}),
+                'episode_number': ('episodeNumber', {int_or_none}),
+                'episode': 'episodeTitle',
+                'series': 'show',
+            }))
+        }

From c2e0fc40a73dd85ab3920f977f579d475e66ef59 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Tue, 21 Mar 2023 18:12:17 -0500
Subject: [PATCH 19/97] [extractor/generic] Add extractor-args `hls_key`,
 `variant_query` (#6567)

Authored by: bashonly
---
 README.md                   |  2 ++
 yt_dlp/extractor/generic.py | 32 +++++++++++++++++++++-----------
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index de83e421f..9ce85d631 100644
--- a/README.md
+++ b/README.md
@@ -1798,6 +1798,8 @@ #### youtubetab (YouTube playlists, channels, feeds, etc.)
 
 #### generic
 * `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments. Does not apply to ffmpeg
+* `variant_query`: Passthrough the master m3u8 URL query to its variant playlist URLs
+* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
 
 #### funimation
 * `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 49aa5a1f5..075bb36de 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -24,6 +24,7 @@
     mimetype2ext,
     orderedSet,
     parse_duration,
+    parse_qs,
     parse_resolution,
     smuggle_url,
     str_or_none,
@@ -32,6 +33,7 @@
     unescapeHTML,
     unified_timestamp,
     unsmuggle_url,
+    update_url_query,
     url_or_none,
     urljoin,
     variadic,
@@ -2184,12 +2186,21 @@ def report_detected(self, name, num=1, note=None):
 
         self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
 
-    def _fragment_query(self, url):
+    def _extra_manifest_info(self, info, manifest_url):
         if self._configuration_arg('fragment_query'):
-            query_string = urllib.parse.urlparse(url).query
+            query_string = urllib.parse.urlparse(manifest_url).query
             if query_string:
-                return {'extra_param_to_segment_url': query_string}
-        return {}
+                info['extra_param_to_segment_url'] = query_string
+
+        hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None
+        info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key'), {
+            'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
+        }) or None
+
+        if self._configuration_arg('variant_query'):
+            query = parse_qs(manifest_url)
+            for fmt in self._downloader._get_formats(info):
+                fmt['url'] = update_url_query(fmt['url'], query)
 
     def _extract_rss(self, url, video_id, doc):
         NS_MAP = {
@@ -2397,10 +2408,8 @@ def _real_extract(self, url):
             subtitles = {}
             if format_id.endswith('mpegurl') or ext == 'm3u8':
                 formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
-                info_dict.update(self._fragment_query(url))
             elif format_id.endswith('mpd') or format_id.endswith('dash+xml') or ext == 'mpd':
                 formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
-                info_dict.update(self._fragment_query(url))
             elif format_id == 'f4m' or ext == 'f4m':
                 formats = self._extract_f4m_formats(url, video_id, headers=headers)
             else:
@@ -2415,6 +2424,7 @@ def _real_extract(self, url):
                 'subtitles': subtitles,
                 'http_headers': headers or None,
             })
+            self._extra_manifest_info(info_dict, url)
             return info_dict
 
         if not self.get_param('test', False) and not is_intentional:
@@ -2427,7 +2437,7 @@ def _real_extract(self, url):
         if first_bytes.startswith(b'#EXTM3U'):
             self.report_detected('M3U playlist')
             info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
-            info_dict.update(self._fragment_query(url))
+            self._extra_manifest_info(info_dict, url)
             return info_dict
 
         # Maybe it's a direct link to a video?
@@ -2478,7 +2488,7 @@ def _real_extract(self, url):
                     doc,
                     mpd_base_url=full_response.geturl().rpartition('/')[0],
                     mpd_url=url)
-                info_dict.update(self._fragment_query(url))
+                self._extra_manifest_info(info_dict, url)
                 self.report_detected('DASH manifest')
                 return info_dict
             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
@@ -2592,7 +2602,7 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
                     formats.extend(fmts)
                     self._merge_subtitles(subs, target=subtitles)
                 for fmt in formats:
-                    fmt.update(self._fragment_query(src))
+                    self._extra_manifest_info(fmt, src)
 
                 if not formats:
                     formats.append({
@@ -2795,10 +2805,10 @@ def filter_video(urls):
                 return [self._extract_xspf_playlist(video_url, video_id)]
             elif ext == 'm3u8':
                 entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
-                entry_info_dict.update(self._fragment_query(video_url))
+                self._extra_manifest_info(entry_info_dict, video_url)
             elif ext == 'mpd':
                 entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
-                entry_info_dict.update(self._fragment_query(video_url))
+                self._extra_manifest_info(entry_info_dict, video_url)
             elif ext == 'f4m':
                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
             elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:

From 44369c9afa996e14e9f466754481d878811b5b4a Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 23 Mar 2023 11:18:42 -0500
Subject: [PATCH 20/97] [extractor/cbs] Add `ParamountPressExpress` extractor
 (#6604)

Closes #6597
Authored by: bashonly
---
 yt_dlp/extractor/_extractors.py |   5 +-
 yt_dlp/extractor/brightcove.py  |   6 +-
 yt_dlp/extractor/cbs.py         | 113 ++++++++++++++++++++++++++++++++
 3 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 01281b5a1..6c948e5fc 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -298,7 +298,10 @@
     CBCGemPlaylistIE,
     CBCGemLiveIE,
 )
-from .cbs import CBSIE
+from .cbs import (
+    CBSIE,
+    ParamountPressExpressIE,
+)
 from .cbslocal import (
     CBSLocalIE,
     CBSLocalArticleIE,
diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py
index 2b7ddcae8..cd0e8ff27 100644
--- a/yt_dlp/extractor/brightcove.py
+++ b/yt_dlp/extractor/brightcove.py
@@ -575,6 +575,7 @@ def build_format_id(kind):
                 self.raise_no_formats(
                     error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
 
+        headers.pop('Authorization', None)  # or else http formats will give error 400
         for f in formats:
             f.setdefault('http_headers', {}).update(headers)
 
@@ -895,8 +896,9 @@ def extract_policy_key():
             store_pk(policy_key)
             return policy_key
 
-        api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
-        headers = {}
+        token = smuggled_data.get('token')
+        api_url = f'https://{"edge-auth" if token else "edge"}.api.brightcove.com/playback/v1/accounts/{account_id}/{content_type}s/{video_id}'
+        headers = {'Authorization': f'Bearer {token}'} if token else {}
         referrer = smuggled_data.get('referrer')  # XXX: notice the spelling/case of the key
         if referrer:
             headers.update({
diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py
index 9aacd50c4..1c0dbdea9 100644
--- a/yt_dlp/extractor/cbs.py
+++ b/yt_dlp/extractor/cbs.py
@@ -1,8 +1,14 @@
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
 from .theplatform import ThePlatformFeedIE
+from .youtube import YoutubeIE
 from ..utils import (
     ExtractorError,
+    extract_attributes,
+    get_element_html_by_id,
     int_or_none,
     find_xpath_attr,
+    smuggle_url,
     xpath_element,
     xpath_text,
     update_url_query,
@@ -162,3 +168,110 @@ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
             'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
             'thumbnail': url_or_none(xpath_text(video_data, 'previewImageURL')),
         })
+
+
+class ParamountPressExpressIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?paramountpressexpress\.com(?:/[\w-]+)+/(?P<yt>yt-)?video/?\?watch=(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://www.paramountpressexpress.com/cbs-entertainment/shows/survivor/video/?watch=pnzew7e2hx',
+        'md5': '56631dbcadaab980d1fc47cb7b76cba4',
+        'info_dict': {
+            'id': '6322981580112',
+            'ext': 'mp4',
+            'title': 'I’m Felicia',
+            'description': 'md5:88fad93f8eede1c9c8f390239e4c6290',
+            'uploader_id': '6055873637001',
+            'upload_date': '20230320',
+            'timestamp': 1679334960,
+            'duration': 49.557,
+            'thumbnail': r're:^https://.+\.jpg',
+            'tags': [],
+        },
+    }, {
+        'url': 'https://www.paramountpressexpress.com/cbs-entertainment/video/?watch=2s5eh8kppc',
+        'md5': 'edcb03e3210b88a3e56c05aa863e0e5b',
+        'info_dict': {
+            'id': '6323036027112',
+            'ext': 'mp4',
+            'title': '‘Y&R’ Set Visit: Jerry O’Connell Quizzes Cast on Pre-Love Scene Rituals and More',
+            'description': 'md5:b929867a357aac5544b783d834c78383',
+            'uploader_id': '6055873637001',
+            'upload_date': '20230321',
+            'timestamp': 1679430180,
+            'duration': 132.032,
+            'thumbnail': r're:^https://.+\.jpg',
+            'tags': [],
+        },
+    }, {
+        'url': 'https://www.paramountpressexpress.com/paramount-plus/yt-video/?watch=OX9wJWOcqck',
+        'info_dict': {
+            'id': 'OX9wJWOcqck',
+            'ext': 'mp4',
+            'title': 'Rugrats | Season 2 Official Trailer | Paramount+',
+            'description': 'md5:1f7e26f5625a9f0d6564d9ad97a9f7de',
+            'uploader': 'Paramount Plus',
+            'uploader_id': '@paramountplus',
+            'uploader_url': 'http://www.youtube.com/@paramountplus',
+            'channel': 'Paramount Plus',
+            'channel_id': 'UCrRttZIypNTA1Mrfwo745Sg',
+            'channel_url': 'https://www.youtube.com/channel/UCrRttZIypNTA1Mrfwo745Sg',
+            'upload_date': '20230316',
+            'duration': 88,
+            'age_limit': 0,
+            'availability': 'public',
+            'live_status': 'not_live',
+            'playable_in_embed': True,
+            'view_count': int,
+            'like_count': int,
+            'channel_follower_count': int,
+            'thumbnail': 'https://i.ytimg.com/vi/OX9wJWOcqck/maxresdefault.jpg',
+            'categories': ['Entertainment'],
+            'tags': ['Rugrats'],
+        },
+    }, {
+        'url': 'https://www.paramountpressexpress.com/showtime/yt-video/?watch=_ljssSoDLkw',
+        'info_dict': {
+            'id': '_ljssSoDLkw',
+            'ext': 'mp4',
+            'title': 'Lavell Crawford: THEE Lavell Crawford Comedy Special Official Trailer | SHOWTIME',
+            'description': 'md5:39581bcc3fd810209b642609f448af70',
+            'uploader': 'SHOWTIME',
+            'uploader_id': '@Showtime',
+            'uploader_url': 'http://www.youtube.com/@Showtime',
+            'channel': 'SHOWTIME',
+            'channel_id': 'UCtwMWJr2BFPkuJTnSvCESSQ',
+            'channel_url': 'https://www.youtube.com/channel/UCtwMWJr2BFPkuJTnSvCESSQ',
+            'upload_date': '20230209',
+            'duration': 49,
+            'age_limit': 0,
+            'availability': 'public',
+            'live_status': 'not_live',
+            'playable_in_embed': True,
+            'view_count': int,
+            'like_count': int,
+            'comment_count': int,
+            'channel_follower_count': int,
+            'thumbnail': 'https://i.ytimg.com/vi_webp/_ljssSoDLkw/maxresdefault.webp',
+            'categories': ['People & Blogs'],
+            'tags': 'count:27',
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id, is_youtube = self._match_valid_url(url).group('id', 'yt')
+        if is_youtube:
+            return self.url_result(display_id, YoutubeIE)
+
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._search_regex(
+            r'\bvideo_id\s*=\s*["\'](\d+)["\']\s*,', webpage, 'Brightcove ID')
+        token = self._search_regex(r'\btoken\s*=\s*["\']([\w.-]+)["\']', webpage, 'token')
+
+        player = extract_attributes(get_element_html_by_id('vcbrightcoveplayer', webpage) or '')
+        account_id = player.get('data-account') or '6055873637001'
+        player_id = player.get('data-player') or 'OtLKgXlO9F'
+        embed = player.get('data-embed') or 'default'
+
+        return self.url_result(smuggle_url(
+            f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}',
+            {'token': token}), BrightcoveNewIE)

From 69b2f838d3d3e37dc17367ef64d978db1bea45cf Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 23 Mar 2023 11:19:37 -0500
Subject: [PATCH 21/97] [extractor/telecaribe] Expand livestream support
 (#6601)

Closes #6598
Authored by: bashonly
---
 yt_dlp/extractor/telecaribe.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/telecaribe.py b/yt_dlp/extractor/telecaribe.py
index b6d88a809..91118a1a4 100644
--- a/yt_dlp/extractor/telecaribe.py
+++ b/yt_dlp/extractor/telecaribe.py
@@ -38,11 +38,23 @@ class TelecaribePlayIE(InfoExtractor):
         'params': {
             'skip_download': 'Livestream',
         }
+    }, {
+        'url': 'https://www.play.telecaribe.co/liveplus',
+        'info_dict': {
+            'id': 'liveplus',
+            'title': r're:^Señal en vivo Plus',
+            'live_status': 'is_live',
+            'ext': 'mp4',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+        'skip': 'Geo-restricted to Colombia',
     }]
 
     def _download_player_webpage(self, webpage, display_id):
         page_id = self._search_regex(
-            (r'window.firstPageId\s*=\s*["\']([^"\']+)', r'<div[^>]+id\s*=\s*"pageBackground_([^"]+)'),
+            (r'window\.firstPageId\s*=\s*["\']([^"\']+)', r'<div[^>]+id\s*=\s*"pageBackground_([^"]+)'),
             webpage, 'page_id')
 
         props = self._download_json(self._search_regex(
@@ -59,14 +71,16 @@ def _real_extract(self, url):
         webpage = self._download_webpage(url, display_id)
         player = self._download_player_webpage(webpage, display_id)
 
-        if display_id != 'live':
+        livestream_url = self._search_regex(
+            r'(?:let|const|var)\s+source\s*=\s*["\']([^"\']+)', player, 'm3u8 url', default=None)
+
+        if not livestream_url:
             return self.playlist_from_matches(
                 re.findall(r'<a[^>]+href\s*=\s*"([^"]+\.mp4)', player), display_id,
                 self._get_clean_title(self._og_search_title(webpage)))
 
         formats, subtitles = self._extract_m3u8_formats_and_subtitles(
-            self._search_regex(r'(?:let|const|var)\s+source\s*=\s*["\']([^"\']+)', player, 'm3u8 url'),
-            display_id, 'mp4')
+            livestream_url, display_id, 'mp4', live=True)
 
         return {
             'id': display_id,

From 78bc1868ff3352108ab2911033d1ac67a55f151e Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 23 Mar 2023 15:16:02 +0530
Subject: [PATCH 22/97] [extractor/rumble] Detect timeline format

Closes #6607
---
 yt_dlp/extractor/rumble.py | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py
index 97f81446c..834fe704f 100644
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@@ -7,6 +7,7 @@
     ExtractorError,
     UnsupportedError,
     clean_html,
+    determine_ext,
     get_element_by_class,
     int_or_none,
     parse_count,
@@ -175,12 +176,16 @@ def _real_extract(self, url):
                         video_info['url'], video_id,
                         ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live'))
                     continue
+                timeline = ext == 'timeline'
+                if timeline:
+                    ext = determine_ext(video_info['url'])
                 formats.append({
                     'ext': ext,
+                    'acodec': 'none' if timeline else None,
                     'url': video_info['url'],
                     'format_id': '%s-%sp' % (ext, height),
-                    'height': int_or_none(height),
-                    'fps': video.get('fps'),
+                    'format_note': 'Timeline' if timeline else None,
+                    'fps': None if timeline else video.get('fps'),
                     **traverse_obj(meta, {
                         'tbr': 'bitrate',
                         'filesize': 'size',
@@ -247,6 +252,25 @@ class RumbleIE(InfoExtractor):
     }, {
         'url': 'http://www.rumble.com/vDMUM1?key=value',
         'only_matching': True,
+    }, {
+        'note': 'timeline format',
+        'url': 'https://rumble.com/v2ea9qb-the-u.s.-cannot-hide-this-in-ukraine-anymore-redacted-with-natali-and-clayt.html',
+        'md5': '40d61fec6c0945bca3d0e1dc1aa53d79',
+        'params': {'format': 'wv'},
+        'info_dict': {
+            'id': 'v2bou5f',
+            'ext': 'mp4',
+            'uploader': 'Redacted News',
+            'upload_date': '20230322',
+            'timestamp': 1679445010,
+            'title': 'The U.S. CANNOT hide this in Ukraine anymore | Redacted with Natali and Clayton Morris',
+            'duration': 892,
+            'channel': 'Redacted News',
+            'description': 'md5:aaad0c5c3426d7a361c29bdaaced7c42',
+            'channel_url': 'https://rumble.com/c/Redacted',
+            'live_status': 'not_live',
+            'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg',
+        },
     }]
 
     _WEBPAGE_TESTS = [{

From 6994afc030d2a786d8032075ed71a14d7eac5a4f Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 23 Mar 2023 19:09:29 +0530
Subject: [PATCH 23/97] [extractor/rumble] Fix videos without quality selection

Closes #6612
---
 yt_dlp/extractor/rumble.py | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py
index 834fe704f..98f660f8b 100644
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@@ -8,8 +8,10 @@
     UnsupportedError,
     clean_html,
     determine_ext,
+    format_field,
     get_element_by_class,
     int_or_none,
+    join_nonempty,
     parse_count,
     parse_iso8601,
     traverse_obj,
@@ -165,7 +167,13 @@ def _real_extract(self, url):
 
         formats = []
         for ext, ext_info in (video.get('ua') or {}).items():
-            for height, video_info in (ext_info or {}).items():
+            if isinstance(ext_info, dict):
+                for height, video_info in ext_info.items():
+                    if not traverse_obj(video_info, ('meta', 'h', {int_or_none})):
+                        video_info.setdefault('meta', {})['h'] = height
+                ext_info = ext_info.values()
+
+            for video_info in ext_info:
                 meta = video_info.get('meta') or {}
                 if not video_info.get('url'):
                     continue
@@ -183,7 +191,7 @@ def _real_extract(self, url):
                     'ext': ext,
                     'acodec': 'none' if timeline else None,
                     'url': video_info['url'],
-                    'format_id': '%s-%sp' % (ext, height),
+                    'format_id': join_nonempty(ext, format_field(meta, 'h', '%sp')),
                     'format_note': 'Timeline' if timeline else None,
                     'fps': None if timeline else video.get('fps'),
                     **traverse_obj(meta, {
@@ -271,6 +279,24 @@ class RumbleIE(InfoExtractor):
             'live_status': 'not_live',
             'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg',
         },
+    }, {
+        'url': 'https://rumble.com/v2e7fju-the-covid-twitter-files-drop-protecting-fauci-while-censoring-the-truth-wma.html',
+        'info_dict': {
+            'id': 'v2blzyy',
+            'ext': 'mp4',
+            'live_status': 'was_live',
+            'release_timestamp': 1679446804,
+            'description': 'md5:2ac4908ccfecfb921f8ffa4b30c1e636',
+            'release_date': '20230322',
+            'timestamp': 1679445692,
+            'duration': 4435,
+            'upload_date': '20230322',
+            'title': 'The Covid Twitter Files Drop: Protecting Fauci While Censoring The Truth w/Matt Taibbi',
+            'uploader': 'Kim Iversen',
+            'channel_url': 'https://rumble.com/c/KimIversen',
+            'channel': 'Kim Iversen',
+            'thumbnail': 'https://sp.rmbl.ws/s8/1/6/b/w/O/6bwOi.qR4e-small-The-Covid-Twitter-Files-Dro.jpg',
+        },
     }]
 
     _WEBPAGE_TESTS = [{

From 5cc0a8fd2e9fec50026fb92170b57993af939e4a Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 23 Mar 2023 11:28:23 -0500
Subject: [PATCH 24/97] [extractor/generic] Accept values for `fragment_query`,
 `variant_query` (#6600)

Closes #6593
Authored by: bashonly
---
 README.md                   |  4 ++--
 yt_dlp/extractor/generic.py | 20 ++++++++++++--------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 9ce85d631..3e8484314 100644
--- a/README.md
+++ b/README.md
@@ -1797,8 +1797,8 @@ #### youtubetab (YouTube playlists, channels, feeds, etc.)
 * `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off
 
 #### generic
-* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments. Does not apply to ffmpeg
-* `variant_query`: Passthrough the master m3u8 URL query to its variant playlist URLs
+* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments if no value is provided, or else apply the query string given as `fragment_query=VALUE`. Does not apply to ffmpeg
+* `variant_query`: Passthrough the master m3u8 URL query to its variant playlist URLs if no value is provided, or else apply the query string given as `variant_query=VALUE`
 * `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
 
 #### funimation
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 075bb36de..f9fa01feb 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -24,7 +24,6 @@
     mimetype2ext,
     orderedSet,
     parse_duration,
-    parse_qs,
     parse_resolution,
     smuggle_url,
     str_or_none,
@@ -2187,18 +2186,23 @@ def report_detected(self, name, num=1, note=None):
         self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
 
     def _extra_manifest_info(self, info, manifest_url):
-        if self._configuration_arg('fragment_query'):
-            query_string = urllib.parse.urlparse(manifest_url).query
-            if query_string:
-                info['extra_param_to_segment_url'] = query_string
+        fragment_query = self._configuration_arg('fragment_query', [None], casesense=True)[0]
+        if fragment_query is not None:
+            fragment_query = self._configuration_arg('fragment_query', casesense=True)[0]
+            info['extra_param_to_segment_url'] = (
+                urllib.parse.urlparse(fragment_query).query or fragment_query
+                or urllib.parse.urlparse(manifest_url).query or None)
 
         hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None
-        info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key'), {
+        info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key', casesense=True), {
             'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
         }) or None
 
-        if self._configuration_arg('variant_query'):
-            query = parse_qs(manifest_url)
+        variant_query = self._configuration_arg('variant_query', [None], casesense=True)[0]
+        if variant_query is not None:
+            query = urllib.parse.parse_qs(
+                urllib.parse.urlparse(variant_query).query or variant_query
+                or urllib.parse.urlparse(manifest_url).query)
             for fmt in self._downloader._get_formats(info):
                 fmt['url'] = update_url_query(fmt['url'], query)
 

From 3ae182ad89e1427ff7b1684d6a44ff93fa857a0c Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 23 Mar 2023 13:45:27 -0500
Subject: [PATCH 25/97] [extractor/pgatour] Add extractor (#6613)

Closes #6537
Authored by: bashonly
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/pgatour.py     | 47 +++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)
 create mode 100644 yt_dlp/extractor/pgatour.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 6c948e5fc..4a4d38caf 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1393,6 +1393,7 @@
     PeriscopeIE,
     PeriscopeUserIE,
 )
+from .pgatour import PGATourIE
 from .philharmoniedeparis import PhilharmonieDeParisIE
 from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
diff --git a/yt_dlp/extractor/pgatour.py b/yt_dlp/extractor/pgatour.py
new file mode 100644
index 000000000..36c2c6207
--- /dev/null
+++ b/yt_dlp/extractor/pgatour.py
@@ -0,0 +1,47 @@
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+
+
+class PGATourIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?pgatour\.com/video/[\w-]+/(?P<tc>T)?(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.pgatour.com/video/competition/T6322447785112/adam-hadwin-2023-the-players-round-4-18th-hole-shot-1',
+        'info_dict': {
+            'id': '6322447785112',
+            'ext': 'mp4',
+            'title': 'Adam Hadwin | 2023 THE PLAYERS | Round 4 | 18th hole | Shot 1',
+            'uploader_id': '6116716431001',
+            'upload_date': '20230312',
+            'timestamp': 1678653136,
+            'duration': 20.011,
+            'thumbnail': r're:^https://.+\.jpg',
+            'tags': 'count:7',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://www.pgatour.com/video/features/6322506425112/follow-the-players-trophy-on-championship-sunday',
+        'info_dict': {
+            'id': '6322506425112',
+            'ext': 'mp4',
+            'title': 'Follow THE PLAYERS trophy on Championship Sunday',
+            'description': 'md5:4d29e4bdfa03694a0ebfd08950398568',
+            'uploader_id': '6082840763001',
+            'upload_date': '20230313',
+            'timestamp': 1678739835,
+            'duration': 123.435,
+            'thumbnail': r're:^https://.+\.jpg',
+            'tags': 'count:8',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+
+    def _real_extract(self, url):
+        video_id, is_tourcast = self._match_valid_url(url).group('id', 'tc')
+
+        # From https://www.pgatour.com/_next/static/chunks/pages/_app-8bcf849560daf38d.js
+        account_id = '6116716431001' if is_tourcast else '6082840763001'
+        player_id = 'Vsd5Umu8r' if is_tourcast else 'FWIBYMBPj'
+
+        return self.url_result(
+            f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}',
+            BrightcoveNewIE)

From 6bdb64e2a2a6d504d8ce1dc830fbfb8a7f199c63 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 23 Mar 2023 13:45:56 -0500
Subject: [PATCH 26/97] [extractor/hollywoodreporter] Add extractors (#6614)

Closes #6525
Authored by: bashonly
---
 yt_dlp/extractor/_extractors.py       |  4 ++
 yt_dlp/extractor/hollywoodreporter.py | 72 +++++++++++++++++++++++++++
 2 files changed, 76 insertions(+)
 create mode 100644 yt_dlp/extractor/hollywoodreporter.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 4a4d38caf..69464b6f0 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -719,6 +719,10 @@
 from .historicfilms import HistoricFilmsIE
 from .hitbox import HitboxIE, HitboxLiveIE
 from .hitrecord import HitRecordIE
+from .hollywoodreporter import (
+    HollywoodReporterIE,
+    HollywoodReporterPlaylistIE,
+)
 from .holodex import HolodexIE
 from .hotnewhiphop import HotNewHipHopIE
 from .hotstar import (
diff --git a/yt_dlp/extractor/hollywoodreporter.py b/yt_dlp/extractor/hollywoodreporter.py
new file mode 100644
index 000000000..1f7eb89bc
--- /dev/null
+++ b/yt_dlp/extractor/hollywoodreporter.py
@@ -0,0 +1,72 @@
+import functools
+import re
+
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+from ..utils import (
+    ExtractorError,
+    OnDemandPagedList,
+    extract_attributes,
+    get_element_by_class,
+    get_element_html_by_class,
+)
+
+
+class HollywoodReporterIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?hollywoodreporter\.com/video/(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://www.hollywoodreporter.com/video/chris-pine-michelle-rodriguez-dungeons-dragons-cast-directors-on-what-it-took-to-make-film-sxsw-2023/',
+        'info_dict': {
+            'id': 'zH4jZaR5',
+            'ext': 'mp4',
+            'title': 'md5:a9a1c073770a32f178955997712c4bd9',
+            'description': 'The cast and directors of \'Dungeons & Dragons: Honor Among Thieves\' talk about their new film.',
+            'thumbnail': 'https://cdn.jwplayer.com/v2/media/zH4jZaR5/poster.jpg?width=720',
+            'upload_date': '20230312',
+            'timestamp': 1678586423,
+            'duration': 242.0,
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        data = extract_attributes(get_element_html_by_class('vlanding-video-card__link', webpage) or '')
+        video_id = data['data-video-showcase-trigger']
+        showcase_type = data['data-video-showcase-type']
+
+        if showcase_type == 'jwplayer':
+            return self.url_result(f'jwplatform:{video_id}', JWPlatformIE)
+        elif showcase_type == 'youtube':
+            return self.url_result(video_id, 'Youtube')
+        else:
+            raise ExtractorError(f'Unsupported showcase type "{showcase_type}"')
+
+
+class HollywoodReporterPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?hollywoodreporter\.com/vcategory/(?P<slug>[\w-]+)-(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.hollywoodreporter.com/vcategory/heat-vision-breakdown-57822/',
+        'playlist_mincount': 109,
+        'info_dict': {
+            'id': '57822',
+            'title': 'heat-vision-breakdown',
+        }
+    }]
+
+    def _fetch_page(self, slug, pl_id, page):
+        page += 1
+        webpage = self._download_webpage(
+            f'https://www.hollywoodreporter.com/vcategory/{slug}-{pl_id}/page/{page}/',
+            pl_id, note=f'Downloading playlist page {page}')
+        section = get_element_by_class('video-playlist-river', webpage) or ''
+
+        for url in re.findall(r'<a[^>]+href="([^"]+)"[^>]+class="c-title__link', section):
+            yield self.url_result(url, HollywoodReporterIE)
+
+    def _real_extract(self, url):
+        slug, pl_id = self._match_valid_url(url).group('slug', 'id')
+        return self.playlist_result(
+            OnDemandPagedList(functools.partial(self._fetch_page, slug, pl_id), 15), pl_id, slug)

From 8ceb07e870424c219dced8f4348729553f05c5cc Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 23 Mar 2023 13:46:33 -0500
Subject: [PATCH 27/97] [extractor/tiktok] Fix mp3 formats (#6615)

Closes #6608
Authored by: bashonly
---
 yt_dlp/extractor/tiktok.py | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index f1696a2fc..fb838d529 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -13,6 +13,7 @@
     LazyList,
     UnsupportedError,
     UserNotLive,
+    determine_ext,
     format_field,
     get_element_by_id,
     get_first,
@@ -204,6 +205,16 @@ def parse_url_key(url_key):
 
         known_resolutions = {}
 
+        def mp3_meta(url):
+            return {
+                'format_note': 'Music track',
+                'ext': 'mp3',
+                'acodec': 'mp3',
+                'vcodec': 'none',
+                'width': None,
+                'height': None,
+            } if determine_ext(url) == 'mp3' else {}
+
         def extract_addr(addr, add_meta={}):
             parsed_meta, res = parse_url_key(addr.get('url_key', ''))
             if res:
@@ -219,7 +230,8 @@ def extract_addr(addr, add_meta={}):
                 'source_preference': -2 if 'aweme/v1' in url else -1,  # Downloads from API might get blocked
                 **add_meta, **parsed_meta,
                 'format_note': join_nonempty(
-                    add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, delim=' ')
+                    add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, delim=' '),
+                **mp3_meta(url),
             } for url in addr.get('url_list') or []]
 
         # Hack: Add direct video links first to prioritize them when removing duplicate formats
@@ -553,6 +565,28 @@ class TikTokIE(TikTokBaseIE):
             'comment_count': int,
         },
         'skip': 'This video is unavailable',
+    }, {
+        # slideshow audio-only mp3 format
+        'url': 'https://www.tiktok.com/@_le_cannibale_/video/7139980461132074283',
+        'info_dict': {
+            'id': '7139980461132074283',
+            'ext': 'mp3',
+            'title': 'TikTok video #7139980461132074283',
+            'description': '',
+            'creator': 'Antaura',
+            'uploader': '_le_cannibale_',
+            'uploader_id': '6604511138619654149',
+            'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
+            'artist': 'nathan !',
+            'track': 'grahamscott canon',
+            'upload_date': '20220905',
+            'timestamp': 1662406249,
+            'view_count': int,
+            'like_count': int,
+            'repost_count': int,
+            'comment_count': int,
+            'thumbnail': r're:^https://.+\.webp',
+        },
     }, {
         # Auto-captions available
         'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',

From 9bfe0d15bd7dbdc6b0e6378fa9f5e2e289b2373b Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Thu, 23 Mar 2023 14:28:31 -0500
Subject: [PATCH 28/97] Fix 5cc0a8fd2e9fec50026fb92170b57993af939e4a

Authored by: bashonly
---
 yt_dlp/extractor/generic.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index f9fa01feb..75355aeb5 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2188,7 +2188,6 @@ def report_detected(self, name, num=1, note=None):
     def _extra_manifest_info(self, info, manifest_url):
         fragment_query = self._configuration_arg('fragment_query', [None], casesense=True)[0]
         if fragment_query is not None:
-            fragment_query = self._configuration_arg('fragment_query', casesense=True)[0]
             info['extra_param_to_segment_url'] = (
                 urllib.parse.urlparse(fragment_query).query or fragment_query
                 or urllib.parse.urlparse(manifest_url).query or None)

From baa922b5c74b10e3b86ff5e6cf6529b3aae8efab Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Fri, 24 Mar 2023 21:53:45 +0530
Subject: [PATCH 29/97] [extractor] Do not exit early for unsuitable
 `url_result`

---
 yt_dlp/extractor/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 2091df7fa..5da12725a 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -3513,8 +3513,8 @@ def _RETURN_TYPE(cls):
     @classmethod
     def is_single_video(cls, url):
         """Returns whether the URL is of a single video, None if unknown"""
-        assert cls.suitable(url), 'The URL must be suitable for the extractor'
-        return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
+        if cls.suitable(url):
+            return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
 
     @classmethod
     def is_suitable(cls, age_limit):

From f68434cc74cfd3db01b266476a2eac8329fbb267 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Fri, 24 Mar 2023 21:53:06 +0530
Subject: [PATCH 30/97] [extractor] Extract more metadata from ISM

Fixes https://github.com/yt-dlp/yt-dlp/commit/81b6102d2099eec78a2db9ae3d101a8503dd4f25#r105892531
---
 test/test_InfoExtractor.py | 138 +++++++++++++------------------------
 yt_dlp/extractor/common.py |   2 +
 2 files changed, 49 insertions(+), 91 deletions(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index e8d94a6ac..1f60abfd2 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -1406,6 +1406,7 @@ def test_parse_ism_formats(self):
                     'vcodec': 'none',
                     'acodec': 'AACL',
                     'protocol': 'ism',
+                    'audio_channels': 2,
                     '_download_params': {
                         'stream_type': 'audio',
                         'duration': 8880746666,
@@ -1419,9 +1420,6 @@ def test_parse_ism_formats(self):
                         'bits_per_sample': 16,
                         'nal_unit_length_field': 4
                     },
-                    'audio_ext': 'isma',
-                    'video_ext': 'none',
-                    'abr': 128,
                 }, {
                     'format_id': 'video-100',
                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
@@ -1445,9 +1443,6 @@ def test_parse_ism_formats(self):
                         'bits_per_sample': 16,
                         'nal_unit_length_field': 4
                     },
-                    'video_ext': 'ismv',
-                    'audio_ext': 'none',
-                    'vbr': 100,
                 }, {
                     'format_id': 'video-326',
                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
@@ -1471,9 +1466,6 @@ def test_parse_ism_formats(self):
                         'bits_per_sample': 16,
                         'nal_unit_length_field': 4
                     },
-                    'video_ext': 'ismv',
-                    'audio_ext': 'none',
-                    'vbr': 326,
                 }, {
                     'format_id': 'video-698',
                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
@@ -1497,9 +1489,6 @@ def test_parse_ism_formats(self):
                         'bits_per_sample': 16,
                         'nal_unit_length_field': 4
                     },
-                    'video_ext': 'ismv',
-                    'audio_ext': 'none',
-                    'vbr': 698,
                 }, {
                     'format_id': 'video-1493',
                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
@@ -1523,9 +1512,6 @@ def test_parse_ism_formats(self):
                         'bits_per_sample': 16,
                         'nal_unit_length_field': 4
                     },
-                    'video_ext': 'ismv',
-                    'audio_ext': 'none',
-                    'vbr': 1493,
                 }, {
                     'format_id': 'video-4482',
                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
@@ -1549,9 +1535,6 @@ def test_parse_ism_formats(self):
                         'bits_per_sample': 16,
                         'nal_unit_length_field': 4
                     },
-                    'video_ext': 'ismv',
-                    'audio_ext': 'none',
-                    'vbr': 4482,
                 }],
                 {
                     'eng': [
@@ -1575,34 +1558,6 @@ def test_parse_ism_formats(self):
                 'ec-3_test',
                 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
                 [{
-                    'format_id': 'audio_deu_1-224',
-                    'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
-                    'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
-                    'ext': 'isma',
-                    'tbr': 224,
-                    'asr': 48000,
-                    'vcodec': 'none',
-                    'acodec': 'EC-3',
-                    'protocol': 'ism',
-                    '_download_params':
-                    {
-                        'stream_type': 'audio',
-                        'duration': 370000000,
-                        'timescale': 10000000,
-                        'width': 0,
-                        'height': 0,
-                        'fourcc': 'EC-3',
-                        'language': 'deu',
-                        'codec_private_data': '00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00',
-                        'sampling_rate': 48000,
-                        'channels': 6,
-                        'bits_per_sample': 16,
-                        'nal_unit_length_field': 4
-                    },
-                    'audio_ext': 'isma',
-                    'video_ext': 'none',
-                    'abr': 224,
-                }, {
                     'format_id': 'audio_deu-127',
                     'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
                     'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
@@ -1612,8 +1567,9 @@ def test_parse_ism_formats(self):
                     'vcodec': 'none',
                     'acodec': 'AACL',
                     'protocol': 'ism',
-                    '_download_params':
-                    {
+                    'language': 'deu',
+                    'audio_channels': 2,
+                    '_download_params': {
                         'stream_type': 'audio',
                         'duration': 370000000,
                         'timescale': 10000000,
@@ -1627,9 +1583,32 @@ def test_parse_ism_formats(self):
                         'bits_per_sample': 16,
                         'nal_unit_length_field': 4
                     },
-                    'audio_ext': 'isma',
-                    'video_ext': 'none',
-                    'abr': 127,
+                }, {
+                    'format_id': 'audio_deu_1-224',
+                    'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+                    'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+                    'ext': 'isma',
+                    'tbr': 224,
+                    'asr': 48000,
+                    'vcodec': 'none',
+                    'acodec': 'EC-3',
+                    'protocol': 'ism',
+                    'language': 'deu',
+                    'audio_channels': 6,
+                    '_download_params': {
+                        'stream_type': 'audio',
+                        'duration': 370000000,
+                        'timescale': 10000000,
+                        'width': 0,
+                        'height': 0,
+                        'fourcc': 'EC-3',
+                        'language': 'deu',
+                        'codec_private_data': '00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00',
+                        'sampling_rate': 48000,
+                        'channels': 6,
+                        'bits_per_sample': 16,
+                        'nal_unit_length_field': 4
+                    },
                 }, {
                     'format_id': 'video_deu-23',
                     'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
@@ -1641,8 +1620,8 @@ def test_parse_ism_formats(self):
                     'vcodec': 'AVC1',
                     'acodec': 'none',
                     'protocol': 'ism',
-                    '_download_params':
-                    {
+                    'language': 'deu',
+                    '_download_params': {
                         'stream_type': 'video',
                         'duration': 370000000,
                         'timescale': 10000000,
@@ -1655,9 +1634,6 @@ def test_parse_ism_formats(self):
                         'bits_per_sample': 16,
                         'nal_unit_length_field': 4
                     },
-                    'video_ext': 'ismv',
-                    'audio_ext': 'none',
-                    'vbr': 23,
                 }, {
                     'format_id': 'video_deu-403',
                     'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
@@ -1669,8 +1645,8 @@ def test_parse_ism_formats(self):
                     'vcodec': 'AVC1',
                     'acodec': 'none',
                     'protocol': 'ism',
-                    '_download_params':
-                    {
+                    'language': 'deu',
+                    '_download_params': {
                         'stream_type': 'video',
                         'duration': 370000000,
                         'timescale': 10000000,
@@ -1683,9 +1659,6 @@ def test_parse_ism_formats(self):
                         'bits_per_sample': 16,
                         'nal_unit_length_field': 4
                     },
-                    'video_ext': 'ismv',
-                    'audio_ext': 'none',
-                    'vbr': 403,
                 }, {
                     'format_id': 'video_deu-680',
                     'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
@@ -1697,8 +1670,8 @@ def test_parse_ism_formats(self):
                     'vcodec': 'AVC1',
                     'acodec': 'none',
                     'protocol': 'ism',
-                    '_download_params':
-                    {
+                    'language': 'deu',
+                    '_download_params': {
                         'stream_type': 'video',
                         'duration': 370000000,
                         'timescale': 10000000,
@@ -1711,9 +1684,6 @@ def test_parse_ism_formats(self):
                         'bits_per_sample': 16,
                         'nal_unit_length_field': 4
                     },
-                    'video_ext': 'ismv',
-                    'audio_ext': 'none',
-                    'vbr': 680,
                 }, {
                     'format_id': 'video_deu-1253',
                     'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
@@ -1725,8 +1695,9 @@ def test_parse_ism_formats(self):
                     'vcodec': 'AVC1',
                     'acodec': 'none',
                     'protocol': 'ism',
-                    '_download_params':
-                    {
+                    'vbr': 1253,
+                    'language': 'deu',
+                    '_download_params': {
                         'stream_type': 'video',
                         'duration': 370000000,
                         'timescale': 10000000,
@@ -1739,9 +1710,6 @@ def test_parse_ism_formats(self):
                         'bits_per_sample': 16,
                         'nal_unit_length_field': 4
                     },
-                    'video_ext': 'ismv',
-                    'audio_ext': 'none',
-                    'vbr': 1253,
                 }, {
                     'format_id': 'video_deu-2121',
                     'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
@@ -1753,8 +1721,8 @@ def test_parse_ism_formats(self):
                     'vcodec': 'AVC1',
                     'acodec': 'none',
                     'protocol': 'ism',
-                    '_download_params':
-                    {
+                    'language': 'deu',
+                    '_download_params': {
                         'stream_type': 'video',
                         'duration': 370000000,
                         'timescale': 10000000,
@@ -1767,9 +1735,6 @@ def test_parse_ism_formats(self):
                         'bits_per_sample': 16,
                         'nal_unit_length_field': 4
                     },
-                    'video_ext': 'ismv',
-                    'audio_ext': 'none',
-                    'vbr': 2121,
                 }, {
                     'format_id': 'video_deu-3275',
                     'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
@@ -1781,8 +1746,8 @@ def test_parse_ism_formats(self):
                     'vcodec': 'AVC1',
                     'acodec': 'none',
                     'protocol': 'ism',
-                    '_download_params':
-                    {
+                    'language': 'deu',
+                    '_download_params': {
                         'stream_type': 'video',
                         'duration': 370000000,
                         'timescale': 10000000,
@@ -1795,9 +1760,6 @@ def test_parse_ism_formats(self):
                         'bits_per_sample': 16,
                         'nal_unit_length_field': 4
                     },
-                    'video_ext': 'ismv',
-                    'audio_ext': 'none',
-                    'vbr': 3275,
                 }, {
                     'format_id': 'video_deu-5300',
                     'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
@@ -1809,8 +1771,8 @@ def test_parse_ism_formats(self):
                     'vcodec': 'AVC1',
                     'acodec': 'none',
                     'protocol': 'ism',
-                    '_download_params':
-                    {
+                    'language': 'deu',
+                    '_download_params': {
                         'stream_type': 'video',
                         'duration': 370000000,
                         'timescale': 10000000,
@@ -1823,9 +1785,6 @@ def test_parse_ism_formats(self):
                         'bits_per_sample': 16,
                         'nal_unit_length_field': 4
                     },
-                    'video_ext': 'ismv',
-                    'audio_ext': 'none',
-                    'vbr': 5300,
                 }, {
                     'format_id': 'video_deu-8079',
                     'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
@@ -1837,8 +1796,8 @@ def test_parse_ism_formats(self):
                     'vcodec': 'AVC1',
                     'acodec': 'none',
                     'protocol': 'ism',
-                    '_download_params':
-                    {
+                    'language': 'deu',
+                    '_download_params': {
                         'stream_type': 'video',
                         'duration': 370000000,
                         'timescale': 10000000,
@@ -1851,9 +1810,6 @@ def test_parse_ism_formats(self):
                         'bits_per_sample': 16,
                         'nal_unit_length_field': 4
                     },
-                    'video_ext': 'ismv',
-                    'audio_ext': 'none',
-                    'vbr': 8079,
                 }],
                 {},
             ),
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 5da12725a..838899052 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2983,6 +2983,8 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
                         'protocol': 'ism',
                         'fragments': fragments,
                         'has_drm': ism_doc.find('Protection') is not None,
+                        'language': stream_language,
+                        'audio_channels': int_or_none(track.get('Channels')),
                         '_download_params': {
                             'stream_type': stream_type,
                             'duration': duration,

From 0898c5c8ccadfc404472456a7a7751b72afebadd Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Sat, 25 Mar 2023 19:41:28 +0100
Subject: [PATCH 31/97] [utils] `js_to_json`: Implement template strings
 (#6623)

Authored by: Grub4K
---
 test/test_utils.py |  7 +++++++
 yt_dlp/utils.py    | 11 +++++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index 3045b6d7e..d4a301583 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1190,6 +1190,13 @@ def test_js_to_json_malformed(self):
         self.assertEqual(js_to_json('42a1'), '42"a1"')
         self.assertEqual(js_to_json('42a-1'), '42"a"-1')
 
+    def test_js_to_json_template_literal(self):
+        self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"')
+        self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"')
+        self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"')
+        self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
+        self.assertEqual(js_to_json('`${name}`', {}), '"name"')
+
     def test_extract_attributes(self):
         self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
         self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 8c2c5593c..40533c2cb 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -3366,7 +3366,7 @@ def strip_jsonp(code):
 
 def js_to_json(code, vars={}, *, strict=False):
     # vars is a dict of var, val pairs to substitute
-    STRING_QUOTES = '\'"'
+    STRING_QUOTES = '\'"`'
     STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
@@ -3384,6 +3384,12 @@ def process_escape(match):
                 else '' if escape == '\n'
                 else escape)
 
+    def template_substitute(match):
+        evaluated = js_to_json(match.group(1), vars, strict=strict)
+        if evaluated[0] == '"':
+            return json.loads(evaluated)
+        return evaluated
+
     def fix_kv(m):
         v = m.group(0)
         if v in ('true', 'false', 'null'):
@@ -3394,7 +3400,8 @@ def fix_kv(m):
             return ''
 
         if v[0] in STRING_QUOTES:
-            escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v[1:-1])
+            v = re.sub(r'(?s)\${([^}]+)}', template_substitute, v[1:-1]) if v[0] == '`' else v[1:-1]
+            escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v)
             return f'"{escaped}"'
 
         for regex, base in INTEGER_TABLE:

From 33b737bedf8383c0d00d4e1d06a5273dcdfdb756 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sun, 26 Mar 2023 17:16:42 -0500
Subject: [PATCH 32/97] [extractor/triller] Support short URLs, detect removed
 videos (#6636)

Authored by: bashonly
---
 yt_dlp/extractor/_extractors.py |   1 +
 yt_dlp/extractor/triller.py     | 307 ++++++++++++++++++--------------
 2 files changed, 174 insertions(+), 134 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 69464b6f0..a97c458fa 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1962,6 +1962,7 @@
 from .triller import (
     TrillerIE,
     TrillerUserIE,
+    TrillerShortIE,
 )
 from .trilulilu import TriluliluIE
 from .trovo import (
diff --git a/yt_dlp/extractor/triller.py b/yt_dlp/extractor/triller.py
index acd9e68d2..6a4dadb9b 100644
--- a/yt_dlp/extractor/triller.py
+++ b/yt_dlp/extractor/triller.py
@@ -1,15 +1,21 @@
 import itertools
 import json
+import re
 
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
+    HEADRequest,
+    UnsupportedError,
+    determine_ext,
     int_or_none,
+    parse_resolution,
     str_or_none,
     traverse_obj,
-    unified_strdate,
     unified_timestamp,
     url_basename,
+    urljoin,
+    url_or_none,
 )
 
 
@@ -22,25 +28,22 @@ def _perform_login(self, username, password):
         if self._API_HEADERS.get('Authorization'):
             return
 
-        user_check = self._download_json(
+        headers = {**self._API_HEADERS, 'Content-Type': 'application/json'}
+        user_check = traverse_obj(self._download_json(
             f'{self._API_BASE_URL}/api/user/is-valid-username', None, note='Checking username',
-            fatal=False, expected_status=400, headers={
-                'Content-Type': 'application/json',
-                'Origin': 'https://triller.co',
-            }, data=json.dumps({'username': username}, separators=(',', ':')).encode('utf-8'))
-        if user_check.get('status'):  # endpoint returns "status":false if username exists
+            fatal=False, expected_status=400, headers=headers,
+            data=json.dumps({'username': username}, separators=(',', ':')).encode()), 'status')
+
+        if user_check:  # endpoint returns `"status":false` if username exists
             raise ExtractorError('Unable to login: Invalid username', expected=True)
 
-        credentials = {
-            'username': username,
-            'password': password,
-        }
         login = self._download_json(
-            f'{self._API_BASE_URL}/user/auth', None, note='Logging in',
-            fatal=False, expected_status=400, headers={
-                'Content-Type': 'application/json',
-                'Origin': 'https://triller.co',
-            }, data=json.dumps(credentials, separators=(',', ':')).encode('utf-8'))
+            f'{self._API_BASE_URL}/user/auth', None, note='Logging in', fatal=False,
+            expected_status=400, headers=headers, data=json.dumps({
+                'username': username,
+                'password': password,
+            }, separators=(',', ':')).encode()) or {}
+
         if not login.get('auth_token'):
             if login.get('error') == 1008:
                 raise ExtractorError('Unable to login: Incorrect password', expected=True)
@@ -55,100 +58,100 @@ def _get_comments(self, video_id, limit=15):
             headers=self._API_HEADERS, query={'limit': limit}) or {}
         if not comment_info.get('comments'):
             return
-        for comment_dict in comment_info['comments']:
-            yield {
-                'author': traverse_obj(comment_dict, ('author', 'username')),
-                'author_id': traverse_obj(comment_dict, ('author', 'user_id')),
-                'id': comment_dict.get('id'),
-                'text': comment_dict.get('body'),
-                'timestamp': unified_timestamp(comment_dict.get('timestamp')),
-            }
+        yield from traverse_obj(comment_info, ('comments', ..., {
+            'id': ('id', {str_or_none}),
+            'text': 'body',
+            'author': ('author', 'username'),
+            'author_id': ('author', 'user_id'),
+            'timestamp': ('timestamp', {unified_timestamp}),
+        }))
 
     def _check_user_info(self, user_info):
-        if not user_info:
-            self.report_warning('Unable to extract user info')
-        elif user_info.get('private') and not user_info.get('followed_by_me'):
+        if user_info.get('private') and not user_info.get('followed_by_me'):
             raise ExtractorError('This video is private', expected=True)
         elif traverse_obj(user_info, 'blocked_by_user', 'blocking_user'):
             raise ExtractorError('The author of the video is blocked', expected=True)
         return user_info
 
-    def _parse_video_info(self, video_info, username, user_info=None):
-        video_uuid = video_info.get('video_uuid')
-        video_id = video_info.get('id')
+    def _parse_video_info(self, video_info, username, user_id, display_id=None):
+        video_id = str(video_info['id'])
+        display_id = display_id or video_info.get('video_uuid')
+
+        if traverse_obj(video_info, (
+                None, ('transcoded_url', 'video_url', 'stream_url', 'audio_url'),
+                {lambda x: re.search(r'/copyright/', x)}), get_all=False):
+            self.raise_no_formats('This video has been removed due to licensing restrictions', expected=True)
+
+        def format_info(url):
+            return {
+                'url': url,
+                'ext': determine_ext(url),
+                'format_id': url_basename(url).split('.')[0],
+            }
 
         formats = []
-        video_url = traverse_obj(video_info, 'video_url', 'stream_url')
-        if video_url:
+
+        if determine_ext(video_info.get('transcoded_url')) == 'm3u8':
+            formats.extend(self._extract_m3u8_formats(
+                video_info['transcoded_url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
+
+        for video in traverse_obj(video_info, ('video_set', lambda _, v: url_or_none(v['url']))):
             formats.append({
-                'url': video_url,
-                'ext': 'mp4',
-                'vcodec': 'h264',
-                'width': video_info.get('width'),
-                'height': video_info.get('height'),
-                'format_id': url_basename(video_url).split('.')[0],
-                'filesize': video_info.get('filesize'),
-            })
-        video_set = video_info.get('video_set') or []
-        for video in video_set:
-            resolution = video.get('resolution') or ''
-            formats.append({
-                'url': video['url'],
-                'ext': 'mp4',
+                **format_info(video['url']),
+                **parse_resolution(video.get('resolution')),
                 'vcodec': video.get('codec'),
                 'vbr': int_or_none(video.get('bitrate'), 1000),
-                'width': int_or_none(resolution.split('x')[0]),
-                'height': int_or_none(resolution.split('x')[1]),
-                'format_id': url_basename(video['url']).split('.')[0],
             })
-        audio_url = video_info.get('audio_url')
-        if audio_url:
+
+        video_url = traverse_obj(video_info, 'video_url', 'stream_url', expected_type=url_or_none)
+        if video_url:
             formats.append({
-                'url': audio_url,
-                'ext': 'm4a',
-                'format_id': url_basename(audio_url).split('.')[0],
+                **format_info(video_url),
+                'vcodec': 'h264',
+                **traverse_obj(video_info, {
+                    'width': 'width',
+                    'height': 'height',
+                    'filesize': 'filesize',
+                }, expected_type=int_or_none),
             })
 
-        manifest_url = video_info.get('transcoded_url')
-        if manifest_url:
-            formats.extend(self._extract_m3u8_formats(
-                manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                m3u8_id='hls', fatal=False))
+        audio_url = url_or_none(video_info.get('audio_url'))
+        if audio_url:
+            formats.append(format_info(audio_url))
 
-        comment_count = int_or_none(video_info.get('comment_count'))
-
-        user_info = user_info or traverse_obj(video_info, 'user', default={})
+        comment_count = traverse_obj(video_info, ('comment_count', {int_or_none}))
 
         return {
-            'id': str_or_none(video_id) or video_uuid,
-            'title': video_info.get('description') or f'Video by {username}',
-            'thumbnail': video_info.get('thumbnail_url'),
-            'description': video_info.get('description'),
-            'uploader': str_or_none(username),
-            'uploader_id': str_or_none(user_info.get('user_id')),
-            'creator': str_or_none(user_info.get('name')),
-            'timestamp': unified_timestamp(video_info.get('timestamp')),
-            'upload_date': unified_strdate(video_info.get('timestamp')),
-            'duration': int_or_none(video_info.get('duration')),
-            'view_count': int_or_none(video_info.get('play_count')),
-            'like_count': int_or_none(video_info.get('likes_count')),
-            'artist': str_or_none(video_info.get('song_artist')),
-            'track': str_or_none(video_info.get('song_title')),
-            'webpage_url': f'https://triller.co/@{username}/video/{video_uuid}',
+            'id': video_id,
+            'display_id': display_id,
+            'uploader': username,
+            'uploader_id': user_id or traverse_obj(video_info, ('user', 'user_id', {str_or_none})),
+            'webpage_url': urljoin(f'https://triller.co/@{username}/video/', display_id),
             'uploader_url': f'https://triller.co/@{username}',
             'extractor_key': TrillerIE.ie_key(),
             'extractor': TrillerIE.IE_NAME,
             'formats': formats,
             'comment_count': comment_count,
             '__post_extractor': self.extract_comments(video_id, comment_count),
+            **traverse_obj(video_info, {
+                'title': ('description', {lambda x: x.replace('\r\n', ' ')}),
+                'description': 'description',
+                'creator': ((('user'), ('users', lambda _, v: str(v['user_id']) == user_id)), 'name'),
+                'thumbnail': ('thumbnail_url', {url_or_none}),
+                'timestamp': ('timestamp', {unified_timestamp}),
+                'duration': ('duration', {int_or_none}),
+                'view_count': ('play_count', {int_or_none}),
+                'like_count': ('likes_count', {int_or_none}),
+                'artist': 'song_artist',
+                'track': 'song_title',
+            }, get_all=False),
         }
 
 
 class TrillerIE(TrillerBaseIE):
     _VALID_URL = r'''(?x)
             https?://(?:www\.)?triller\.co/
-            @(?P<username>[\w\._]+)/video/
-            (?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
+            @(?P<username>[\w.]+)/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})
         '''
     _TESTS = [{
         'url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
@@ -165,16 +168,14 @@ class TrillerIE(TrillerBaseIE):
             'timestamp': 1660598222,
             'upload_date': '20220815',
             'duration': 47,
-            'height': 3840,
-            'width': 2160,
             'view_count': int,
             'like_count': int,
             'artist': 'Megan Thee Stallion',
             'track': 'Her',
-            'webpage_url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
             'uploader_url': 'https://triller.co/@theestallion',
             'comment_count': int,
-        }
+        },
+        'skip': 'This video has been removed due to licensing restrictions',
     }, {
         'url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
         'md5': '874055f462af5b0699b9dbb527a505a0',
@@ -182,6 +183,7 @@ class TrillerIE(TrillerBaseIE):
             'id': '71621339',
             'ext': 'mp4',
             'title': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
+            'display_id': '46c6fcfa-aa9e-4503-a50c-68444f44cddc',
             'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
             'description': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
             'uploader': 'charlidamelio',
@@ -190,59 +192,75 @@ class TrillerIE(TrillerBaseIE):
             'timestamp': 1660773354,
             'upload_date': '20220817',
             'duration': 16,
-            'height': 1920,
-            'width': 1080,
             'view_count': int,
             'like_count': int,
             'artist': 'Dixie',
             'track': 'Someone to Blame',
-            'webpage_url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
             'uploader_url': 'https://triller.co/@charlidamelio',
             'comment_count': int,
-        }
+        },
+    }, {
+        'url': 'https://triller.co/@theestallion/video/07f35f38-1f51-48e2-8c5f-f7a8e829988f',
+        'md5': 'af7b3553e4b8bfca507636471ee2eb41',
+        'info_dict': {
+            'id': '71837829',
+            'ext': 'mp4',
+            'title': 'UNGRATEFUL VIDEO OUT NOW 👏🏾👏🏾👏🏾 💙💙 link my bio  #womeninhiphop',
+            'display_id': '07f35f38-1f51-48e2-8c5f-f7a8e829988f',
+            'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
+            'description': 'UNGRATEFUL VIDEO OUT NOW 👏🏾👏🏾👏🏾 💙💙 link my bio\r\n #womeninhiphop',
+            'uploader': 'theestallion',
+            'uploader_id': '18992236',
+            'creator': 'Megan Thee Stallion',
+            'timestamp': 1662486178,
+            'upload_date': '20220906',
+            'duration': 30,
+            'view_count': int,
+            'like_count': int,
+            'artist': 'Unknown',
+            'track': 'Unknown',
+            'uploader_url': 'https://triller.co/@theestallion',
+            'comment_count': int,
+        },
     }]
 
     def _real_extract(self, url):
-        username, video_uuid = self._match_valid_url(url).group('username', 'id')
+        username, display_id = self._match_valid_url(url).group('username', 'id')
 
-        video_info = traverse_obj(self._download_json(
-            f'{self._API_BASE_URL}/api/videos/{video_uuid}',
-            video_uuid, note='Downloading video info API JSON',
-            errnote='Unable to download video info API JSON',
-            headers=self._API_HEADERS), ('videos', 0))
-        if not video_info:
-            raise ExtractorError('No video info found in API response')
+        video_info = self._download_json(
+            f'{self._API_BASE_URL}/api/videos/{display_id}', display_id,
+            headers=self._API_HEADERS)['videos'][0]
 
-        user_info = self._check_user_info(video_info.get('user') or {})
-        return self._parse_video_info(video_info, username, user_info)
+        self._check_user_info(video_info.get('user') or {})
+
+        return self._parse_video_info(video_info, username, None, display_id)
 
 
 class TrillerUserIE(TrillerBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?triller\.co/@(?P<id>[\w\._]+)/?(?:$|[#?])'
+    _VALID_URL = r'https?://(?:www\.)?triller\.co/@(?P<id>[\w.]+)/?(?:$|[#?])'
     _TESTS = [{
-        # first videos request only returns 2 videos
         'url': 'https://triller.co/@theestallion',
-        'playlist_mincount': 9,
+        'playlist_mincount': 12,
         'info_dict': {
             'id': '18992236',
             'title': 'theestallion',
             'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
-        }
+        },
     }, {
         'url': 'https://triller.co/@charlidamelio',
-        'playlist_mincount': 25,
+        'playlist_mincount': 150,
         'info_dict': {
             'id': '1875551',
             'title': 'charlidamelio',
             'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
-        }
+        },
     }]
 
     def _real_initialize(self):
         if not self._API_HEADERS.get('Authorization'):
             guest = self._download_json(
-                f'{self._API_BASE_URL}/user/create_guest',
-                None, note='Creating guest session', data=b'', headers=self._API_HEADERS, query={
+                f'{self._API_BASE_URL}/user/create_guest', None,
+                note='Creating guest session', data=b'', headers=self._API_HEADERS, query={
                     'platform': 'Web',
                     'app_version': '',
                 })
@@ -251,44 +269,65 @@ def _real_initialize(self):
 
             self._API_HEADERS['Authorization'] = f'Bearer {guest["auth_token"]}'
 
-    def _extract_video_list(self, username, user_id, limit=6):
-        query = {
-            'limit': limit,
-        }
+    def _entries(self, username, user_id, limit=6):
+        query = {'limit': limit}
         for page in itertools.count(1):
-            for retry in self.RetryManager():
-                try:
-                    video_list = self._download_json(
-                        f'{self._API_BASE_URL}/api/users/{user_id}/videos',
-                        username, note=f'Downloading user video list page {page}',
-                        errnote='Unable to download user video list', headers=self._API_HEADERS,
-                        query=query)
-                except ExtractorError as e:
-                    if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
-                        retry.error = e
-                        continue
-                    raise
-            if not video_list.get('videos'):
-                break
-            yield from video_list['videos']
-            query['before_time'] = traverse_obj(video_list, ('videos', -1, 'timestamp'))
+            videos = self._download_json(
+                f'{self._API_BASE_URL}/api/users/{user_id}/videos',
+                username, note=f'Downloading user video list page {page}',
+                headers=self._API_HEADERS, query=query)
+
+            for video in traverse_obj(videos, ('videos', ...)):
+                yield self._parse_video_info(video, username, user_id)
+
+            query['before_time'] = traverse_obj(videos, ('videos', -1, 'timestamp'))
             if not query['before_time']:
                 break
 
-    def _entries(self, videos, username, user_info):
-        for video in videos:
-            yield self._parse_video_info(video, username, user_info)
-
     def _real_extract(self, url):
         username = self._match_id(url)
+
         user_info = self._check_user_info(self._download_json(
             f'{self._API_BASE_URL}/api/users/by_username/{username}',
-            username, note='Downloading user info',
-            errnote='Failed to download user info', headers=self._API_HEADERS).get('user', {}))
+            username, note='Downloading user info', headers=self._API_HEADERS)['user'])
 
         user_id = str_or_none(user_info.get('user_id'))
-        videos = self._extract_video_list(username, user_id)
-        thumbnail = user_info.get('avatar_url')
+        if not user_id:
+            raise ExtractorError('Unable to extract user ID')
 
         return self.playlist_result(
-            self._entries(videos, username, user_info), user_id, username, thumbnail=thumbnail)
+            self._entries(username, user_id), user_id, username, thumbnail=user_info.get('avatar_url'))
+
+
+class TrillerShortIE(InfoExtractor):
+    _VALID_URL = r'https?://v\.triller\.co/(?P<id>\w+)'
+    _TESTS = [{
+        'url': 'https://v.triller.co/WWZNWk',
+        'md5': '5eb8dc2c971bd8cd794ec9e8d5e9d101',
+        'info_dict': {
+            'id': '66210052',
+            'ext': 'mp4',
+            'title': 'md5:2dfc89d154cd91a4a18cd9582ba03e16',
+            'display_id': 'f4480e1f-fb4e-45b9-a44c-9e6c679ce7eb',
+            'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
+            'description': 'md5:2dfc89d154cd91a4a18cd9582ba03e16',
+            'uploader': 'statefairent',
+            'uploader_id': '487545193',
+            'creator': 'Official Summer Fair of LA',
+            'timestamp': 1629655457,
+            'upload_date': '20210822',
+            'duration': 19,
+            'view_count': int,
+            'like_count': int,
+            'artist': 'Unknown',
+            'track': 'Unknown',
+            'uploader_url': 'https://triller.co/@statefairent',
+            'comment_count': int,
+        },
+    }]
+
+    def _real_extract(self, url):
+        real_url = self._request_webpage(HEADRequest(url), self._match_id(url)).geturl()
+        if self.suitable(real_url):  # Prevent infinite loop in case redirect fails
+            raise UnsupportedError(real_url)
+        return self.url_result(real_url)

From 9be0fe1fd967f62cbf3c60bd14e1021a70abc147 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sun, 26 Mar 2023 17:27:39 -0500
Subject: [PATCH 33/97] [extractor/nbc] Fix `NBCStations` direct mp4 formats
 (#6637)

Authored by: bashonly
---
 yt_dlp/extractor/nbc.py | 57 ++++++++++++++++++++++++++++++++---------
 1 file changed, 45 insertions(+), 12 deletions(-)

diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py
index b9f65e927..ddc89a7c2 100644
--- a/yt_dlp/extractor/nbc.py
+++ b/yt_dlp/extractor/nbc.py
@@ -12,9 +12,13 @@
     RegexNotFoundError,
     UserNotLive,
     clean_html,
+    determine_ext,
+    float_or_none,
     int_or_none,
+    mimetype2ext,
     parse_age_limit,
     parse_duration,
+    remove_end,
     smuggle_url,
     traverse_obj,
     try_get,
@@ -22,7 +26,6 @@
     unified_timestamp,
     update_url_query,
     url_basename,
-    xpath_attr,
 )
 
 
@@ -660,6 +663,7 @@ class NBCStationsIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory',
             'description': 'md5:417ed3c2d91fe9d301e6db7b0942f182',
+            'duration': 112.513,
             'timestamp': 1661135892,
             'upload_date': '20220822',
             'uploader': 'NBC 4',
@@ -676,6 +680,7 @@ class NBCStationsIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Huracán complica que televidente de Tucson reciba  reembolso',
             'description': 'md5:af298dc73aab74d4fca6abfb12acb6cf',
+            'duration': 172.406,
             'timestamp': 1660886507,
             'upload_date': '20220819',
             'uploader': 'Telemundo Arizona',
@@ -685,6 +690,22 @@ class NBCStationsIE(InfoExtractor):
         'params': {
             'skip_download': 'm3u8',
         },
+    }, {
+        # direct mp4 link
+        'url': 'https://www.nbcboston.com/weather/video-weather/highs-near-freezing-in-boston-on-wednesday/2961135/',
+        'md5': '9bf8c41dc7abbb75b1a44f1491a4cc85',
+        'info_dict': {
+            'id': '2961135',
+            'ext': 'mp4',
+            'title': 'Highs Near Freezing in Boston on Wednesday',
+            'description': 'md5:3ec486609a926c99f00a3512e6c0e85b',
+            'duration': 235.669,
+            'timestamp': 1675268656,
+            'upload_date': '20230201',
+            'uploader': '',
+            'channel_id': 'WBTS',
+            'channel': 'nbcboston',
+        },
     }]
 
     _RESOLUTIONS = {
@@ -711,7 +732,7 @@ def _real_extract(self, url):
         if not video_data:
             raise ExtractorError('No video metadata found in webpage', expected=True)
 
-        info, formats, subtitles = {}, [], {}
+        info, formats = {}, []
         is_live = int_or_none(video_data.get('mpx_is_livestream')) == 1
         query = {
             'formats': 'MPEG-DASH none,M3U none,MPEG-DASH none,MPEG4,MP3',
@@ -747,13 +768,14 @@ def _real_extract(self, url):
 
             video_url = traverse_obj(video_data, ((None, ('video', 'meta')), 'mp4_url'), get_all=False)
             if video_url:
+                ext = determine_ext(video_url)
                 height = self._search_regex(r'\d+-(\d+)p', url_basename(video_url), 'height', default=None)
                 formats.append({
                     'url': video_url,
-                    'ext': 'mp4',
+                    'ext': ext,
                     'width': int_or_none(self._RESOLUTIONS.get(height)),
                     'height': int_or_none(height),
-                    'format_id': 'http-mp4',
+                    'format_id': f'http-{ext}',
                 })
 
             info.update({
@@ -770,14 +792,25 @@ def _real_extract(self, url):
             smil = self._download_xml(
                 f'https://link.theplatform.com/s/{pdk_acct}/{player_id}', video_id,
                 note='Downloading SMIL data', query=query, fatal=is_live)
-        if smil:
-            manifest_url = xpath_attr(smil, f'.//{{{default_ns}}}video', 'src', fatal=is_live)
-            subtitles = self._parse_smil_subtitles(smil, default_ns)
-            fmts, subs = self._extract_m3u8_formats_and_subtitles(
-                manifest_url, video_id, 'mp4', m3u8_id='hls', fatal=is_live,
-                live=is_live, errnote='No HLS formats found')
-            formats.extend(fmts)
-            self._merge_subtitles(subs, target=subtitles)
+        subtitles = self._parse_smil_subtitles(smil, default_ns) if smil else {}
+        for video in smil.findall(self._xpath_ns('.//video', default_ns)) if smil else []:
+            info['duration'] = float_or_none(remove_end(video.get('dur'), 'ms'), 1000)
+            video_src_url = video.get('src')
+            ext = mimetype2ext(video.get('type'), default=determine_ext(video_src_url))
+            if ext == 'm3u8':
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                    video_src_url, video_id, 'mp4', m3u8_id='hls', fatal=is_live,
+                    live=is_live, errnote='No HLS formats found')
+                formats.extend(fmts)
+                self._merge_subtitles(subs, target=subtitles)
+            elif video_src_url:
+                formats.append({
+                    'url': video_src_url,
+                    'format_id': f'https-{ext}',
+                    'ext': ext,
+                    'width': int_or_none(video.get('width')),
+                    'height': int_or_none(video.get('height')),
+                })
 
         if not formats:
             self.raise_no_formats('No video content found in webpage', expected=True)

From 95a383be1b6fb00c92ee3fb091732c4f6009acb6 Mon Sep 17 00:00:00 2001
From: Lesmiscore <nao20010128@gmail.com>
Date: Mon, 27 Mar 2023 22:39:55 +0900
Subject: [PATCH 34/97] [extractor/iwara] Report private videos (#6641)

Authored by: Lesmiscore
---
 yt_dlp/extractor/iwara.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py
index 62a179700..23f92786f 100644
--- a/yt_dlp/extractor/iwara.py
+++ b/yt_dlp/extractor/iwara.py
@@ -4,6 +4,7 @@
 
 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
     OnDemandPagedList,
     int_or_none,
     mimetype2ext,
@@ -75,7 +76,13 @@ def _extract_formats(self, video_id, fileurl):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        video_data = self._download_json(f'http://api.iwara.tv/video/{video_id}', video_id)
+        video_data = self._download_json(f'http://api.iwara.tv/video/{video_id}', video_id, expected_status=lambda x: True)
+        errmsg = video_data.get('message')
+        # at this point we can actually get uploaded user info, but do we need it?
+        if errmsg == 'errors.privateVideo':
+            self.raise_login_required('Private video. Login if you have permissions to watch')
+        elif errmsg:
+            raise ExtractorError(f'Iwara says: {errmsg}')
 
         return {
             'id': video_id,

From 0f0875ed555514f32522a0f30554fb08825d5124 Mon Sep 17 00:00:00 2001
From: Lesmiscore <nao20010128@gmail.com>
Date: Tue, 28 Mar 2023 01:17:42 +0900
Subject: [PATCH 35/97] 
 [postprocessor/EmbedThumbnail,postprocessor/FFmpegMetadata] Fix error on
 attaching thumbnails and info json for mkv/mka (#6647)

Authored by: Lesmiscore

Current yt-dlp code never hit this bug, but would hit once filename sanitization gets better
---
 yt_dlp/postprocessor/embedthumbnail.py | 2 +-
 yt_dlp/postprocessor/ffmpeg.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py
index b02d9d499..88a767132 100644
--- a/yt_dlp/postprocessor/embedthumbnail.py
+++ b/yt_dlp/postprocessor/embedthumbnail.py
@@ -107,7 +107,7 @@ def run(self, info):
                 options.extend(['-map', '-0:%d' % old_stream])
                 new_stream -= 1
             options.extend([
-                '-attach', thumbnail_filename,
+                '-attach', self._ffmpeg_filename_argument(thumbnail_filename),
                 '-metadata:s:%d' % new_stream, 'mimetype=%s' % mimetype,
                 '-metadata:s:%d' % new_stream, 'filename=cover.%s' % thumbnail_ext])
 
diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py
index 0e8f4c70b..63fc9ace6 100644
--- a/yt_dlp/postprocessor/ffmpeg.py
+++ b/yt_dlp/postprocessor/ffmpeg.py
@@ -809,7 +809,7 @@ def _get_infojson_opts(self, info, infofn):
             new_stream -= 1
 
         yield (
-            '-attach', infofn,
+            '-attach', self._ffmpeg_filename_argument(infofn),
             f'-metadata:s:{new_stream}', 'mimetype=application/json',
             f'-metadata:s:{new_stream}', 'filename=info.json',
         )

From ab92d8651c48d247dfb7d3f0a824cc986e47c7ed Mon Sep 17 00:00:00 2001
From: Lesmiscore <nao20010128@gmail.com>
Date: Wed, 29 Mar 2023 15:28:29 +0900
Subject: [PATCH 36/97] [extractor/iwara] Accept old URLs

Authored by: Lesmiscore

Closes #6669
---
 yt_dlp/extractor/iwara.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py
index 23f92786f..ae2960af0 100644
--- a/yt_dlp/extractor/iwara.py
+++ b/yt_dlp/extractor/iwara.py
@@ -15,7 +15,7 @@
 
 class IwaraIE(InfoExtractor):
     IE_NAME = 'iwara'
-    _VALID_URL = r'https?://(?:www\.)?iwara\.tv/video/(?P<id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos?/(?P<id>[a-zA-Z0-9]+)'
     _TESTS = [{
         # this video cannot be played because of migration
         'only_matching': True,

From 68be95bd0ca3f76aa63c9812935bd826b3a42e53 Mon Sep 17 00:00:00 2001
From: Lesmiscore <nao20010128@gmail.com>
Date: Fri, 31 Mar 2023 11:56:49 +0900
Subject: [PATCH 37/97] [extractor/YahooGyaOIE,extactor/YahooGyaOPlayerIE]
 Delete extractors due to website close (#6218)

Authored by: Lesmiscore
---
 yt_dlp/extractor/_extractors.py |   2 -
 yt_dlp/extractor/yahoo.py       | 117 --------------------------------
 2 files changed, 119 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index a97c458fa..77a3c2ce9 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -2343,8 +2343,6 @@
 from .yahoo import (
     YahooIE,
     YahooSearchIE,
-    YahooGyaOPlayerIE,
-    YahooGyaOIE,
     YahooJapanNewsIE,
 )
 from .yandexdisk import YandexDiskIE
diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py
index a69715b7c..24148a0bd 100644
--- a/yt_dlp/extractor/yahoo.py
+++ b/yt_dlp/extractor/yahoo.py
@@ -2,7 +2,6 @@
 import itertools
 import urllib.parse
 
-from .brightcove import BrightcoveNewIE
 from .common import InfoExtractor, SearchInfoExtractor
 from .youtube import YoutubeIE
 from ..utils import (
@@ -11,7 +10,6 @@
     int_or_none,
     mimetype2ext,
     parse_iso8601,
-    smuggle_url,
     traverse_obj,
     try_get,
     url_or_none,
@@ -337,121 +335,6 @@ def _search_results(self, query):
                 break
 
 
-class YahooGyaOPlayerIE(InfoExtractor):
-    IE_NAME = 'yahoo:gyao:player'
-    _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:player|episode(?:/[^/]+)?)|streaming\.yahoo\.co\.jp/c/y)/(?P<id>\d+/v\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
-    _TESTS = [{
-        'url': 'https://gyao.yahoo.co.jp/player/00998/v00818/v0000000000000008564/',
-        'info_dict': {
-            'id': '5993125228001',
-            'ext': 'mp4',
-            'title': 'フューリー　【字幕版】',
-            'description': 'md5:21e691c798a15330eda4db17a8fe45a5',
-            'uploader_id': '4235717419001',
-            'upload_date': '20190124',
-            'timestamp': 1548294365,
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-    }, {
-        'url': 'https://streaming.yahoo.co.jp/c/y/01034/v00133/v0000000000000000706/',
-        'only_matching': True,
-    }, {
-        'url': 'https://gyao.yahoo.co.jp/episode/%E3%81%8D%E3%81%AE%E3%81%86%E4%BD%95%E9%A3%9F%E3%81%B9%E3%81%9F%EF%BC%9F%20%E7%AC%AC2%E8%A9%B1%202019%2F4%2F12%E6%94%BE%E9%80%81%E5%88%86/5cb02352-b725-409e-9f8d-88f947a9f682',
-        'only_matching': True,
-    }, {
-        'url': 'https://gyao.yahoo.co.jp/episode/5fa1226c-ef8d-4e93-af7a-fd92f4e30597',
-        'only_matching': True,
-    }]
-    _GEO_BYPASS = False
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url).replace('/', ':')
-        headers = self.geo_verification_headers()
-        headers['Accept'] = 'application/json'
-        resp = self._download_json(
-            'https://gyao.yahoo.co.jp/apis/playback/graphql', video_id, query={
-                'appId': 'dj00aiZpPUNJeDh2cU1RazU3UCZzPWNvbnN1bWVyc2VjcmV0Jng9NTk-',
-                'query': '''{
-  content(parameter: {contentId: "%s", logicaAgent: PC_WEB}) {
-    video {
-      delivery {
-        id
-      }
-      title
-    }
-  }
-}''' % video_id,
-            }, headers=headers)
-        content = resp['data']['content']
-        if not content:
-            msg = resp['errors'][0]['message']
-            if msg == 'not in japan':
-                self.raise_geo_restricted(countries=['JP'])
-            raise ExtractorError(msg)
-        video = content['video']
-        return {
-            '_type': 'url_transparent',
-            'id': video_id,
-            'title': video['title'],
-            'url': smuggle_url(
-                'http://players.brightcove.net/4235717419001/SyG5P0gjb_default/index.html?videoId=' + video['delivery']['id'],
-                {'geo_countries': ['JP']}),
-            'ie_key': BrightcoveNewIE.ie_key(),
-        }
-
-
-class YahooGyaOIE(InfoExtractor):
-    IE_NAME = 'yahoo:gyao'
-    _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:p|title(?:/[^/]+)?)|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
-    _TESTS = [{
-        'url': 'https://gyao.yahoo.co.jp/title/%E3%82%BF%E3%82%A4%E3%83%A0%E3%83%9C%E3%82%AB%E3%83%B3%E3%82%B7%E3%83%AA%E3%83%BC%E3%82%BA%20%E3%83%A4%E3%83%83%E3%82%BF%E3%83%BC%E3%83%9E%E3%83%B3/5f60ceb3-6e5e-40ef-ba40-d68b598d067f',
-        'info_dict': {
-            'id': '5f60ceb3-6e5e-40ef-ba40-d68b598d067f',
-        },
-        'playlist_mincount': 80,
-    }, {
-        'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/',
-        'only_matching': True,
-    }, {
-        'url': 'https://streaming.yahoo.co.jp/p/y/01034/v00133/',
-        'only_matching': True,
-    }, {
-        'url': 'https://gyao.yahoo.co.jp/title/%E3%81%97%E3%82%83%E3%81%B9%E3%81%8F%E3%82%8A007/5b025a49-b2e5-4dc7-945c-09c6634afacf',
-        'only_matching': True,
-    }, {
-        'url': 'https://gyao.yahoo.co.jp/title/5b025a49-b2e5-4dc7-945c-09c6634afacf',
-        'only_matching': True,
-    }]
-
-    def _entries(self, program_id):
-        page = 1
-        while True:
-            playlist = self._download_json(
-                f'https://gyao.yahoo.co.jp/api/programs/{program_id}/videos?page={page}&serviceId=gy', program_id,
-                note=f'Downloading JSON metadata page {page}')
-            if not playlist:
-                break
-            for video in playlist['videos']:
-                video_id = video.get('id')
-                if not video_id:
-                    continue
-                if video.get('streamingAvailability') == 'notYet':
-                    continue
-                yield self.url_result(
-                    'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'),
-                    YahooGyaOPlayerIE.ie_key(), video_id)
-            if playlist.get('ended'):
-                break
-            page += 1
-
-    def _real_extract(self, url):
-        program_id = self._match_id(url).replace('/', ':')
-        return self.playlist_result(self._entries(program_id), program_id)
-
-
 class YahooJapanNewsIE(InfoExtractor):
     IE_NAME = 'yahoo:japannews'
     IE_DESC = 'Yahoo! Japan News'

From 141a8dff98874a426d7fbe772e0a8421bb42656f Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Thu, 6 Apr 2023 19:44:22 +1200
Subject: [PATCH 38/97] [extractor/youtube] Fix comment loop detection for
 pinned comments (#6714)

Pinned comments may repeat a second time - this is expected.

Fixes https://github.com/yt-dlp/yt-dlp/issues/6712

Authored by: coletdjnz
---
 yt_dlp/extractor/youtube.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index ca56f112b..6dc36f9b9 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3316,9 +3316,17 @@ def extract_thread(contents):
                 comment = self._extract_comment(comment_renderer, parent)
                 if not comment:
                     continue
+                is_pinned = bool(traverse_obj(comment_renderer, 'pinnedCommentBadge'))
+                comment_id = comment['id']
+                if is_pinned:
+                    tracker['pinned_comment_ids'].add(comment_id)
                 # Sometimes YouTube may break and give us infinite looping comments.
                 # See: https://github.com/yt-dlp/yt-dlp/issues/6290
-                if comment['id'] in tracker['seen_comment_ids']:
+                if comment_id in tracker['seen_comment_ids']:
+                    if comment_id in tracker['pinned_comment_ids'] and not is_pinned:
+                        # Pinned comments may appear a second time in newest first sort
+                        # See: https://github.com/yt-dlp/yt-dlp/issues/6712
+                        continue
                     self.report_warning('Detected YouTube comments looping. Stopping comment extraction as we probably cannot get any more.')
                     yield
                 else:
@@ -3348,7 +3356,9 @@ def extract_thread(contents):
                 current_page_thread=0,
                 total_parent_comments=0,
                 total_reply_comments=0,
-                seen_comment_ids=set())
+                seen_comment_ids=set(),
+                pinned_comment_ids=set()
+            )
 
         # TODO: Deprecated
         # YouTube comments have a max depth of 2

From 0a6918a4a1431960181d8c50e0bbbcb0afbaff9a Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Sat, 8 Apr 2023 11:09:05 -0500
Subject: [PATCH 39/97] [extractor/kick] Make initial request non-fatal

Authored by: bashonly
---
 yt_dlp/extractor/kick.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/kick.py b/yt_dlp/extractor/kick.py
index a79ffb7a9..765ffa0c8 100644
--- a/yt_dlp/extractor/kick.py
+++ b/yt_dlp/extractor/kick.py
@@ -14,7 +14,7 @@
 
 class KickBaseIE(InfoExtractor):
     def _real_initialize(self):
-        self._request_webpage(HEADRequest('https://kick.com/'), None, 'Setting up session')
+        self._request_webpage(HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False)
         xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN')
         if not xsrf_token:
             self.write_debug('kick.com did not set XSRF-TOKEN cookie')

From ef0848abd425dfda6db62baa8d72897eefb0007f Mon Sep 17 00:00:00 2001
From: Chris Caruso <carusochrisr@gmail.com>
Date: Tue, 11 Apr 2023 04:45:22 -0700
Subject: [PATCH 40/97] [extractor/youku] Improve error message (#6690)

Authored by: carusocr
Closes #6551
---
 yt_dlp/extractor/youku.py | 44 +++------------------------------------
 1 file changed, 3 insertions(+), 41 deletions(-)

diff --git a/yt_dlp/extractor/youku.py b/yt_dlp/extractor/youku.py
index 404f196f4..7ecd9f183 100644
--- a/yt_dlp/extractor/youku.py
+++ b/yt_dlp/extractor/youku.py
@@ -6,6 +6,7 @@
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
+    clean_html,
     get_element_by_class,
     js_to_json,
     str_or_none,
@@ -26,48 +27,8 @@ class YoukuIE(InfoExtractor):
     '''
 
     _TESTS = [{
-        # MD5 is unstable
-        'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
-        'info_dict': {
-            'id': 'XMTc1ODE5Njcy',
-            'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
-            'ext': 'mp4',
-            'duration': 74.73,
-            'thumbnail': r're:^https?://.*',
-            'uploader': '。躲猫猫、',
-            'uploader_id': '36017967',
-            'uploader_url': 'http://i.youku.com/u/UMTQ0MDcxODY4',
-            'tags': list,
-        }
-    }, {
         'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
         'only_matching': True,
-    }, {
-        'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
-        'info_dict': {
-            'id': 'XODgxNjg1Mzk2',
-            'ext': 'mp4',
-            'title': '武媚娘传奇 85',
-            'duration': 1999.61,
-            'thumbnail': r're:^https?://.*',
-            'uploader': '疯狂豆花',
-            'uploader_id': '62583473',
-            'uploader_url': 'http://i.youku.com/u/UMjUwMzMzODky',
-            'tags': list,
-        },
-    }, {
-        'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
-        'info_dict': {
-            'id': 'XMTI1OTczNDM5Mg',
-            'ext': 'mp4',
-            'title': '花千骨 04',
-            'duration': 2363,
-            'thumbnail': r're:^https?://.*',
-            'uploader': '放剧场-花千骨',
-            'uploader_id': '772849359',
-            'uploader_url': 'http://i.youku.com/u/UMzA5MTM5NzQzNg==',
-            'tags': list,
-        },
     }, {
         'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
         'note': 'Video protected with password',
@@ -81,6 +42,7 @@ class YoukuIE(InfoExtractor):
             'uploader_id': '322014285',
             'uploader_url': 'http://i.youku.com/u/UMTI4ODA1NzE0MA==',
             'tags': list,
+            'skip': '404',
         },
         'params': {
             'videopassword': '100600',
@@ -192,7 +154,7 @@ def _real_extract(self, url):
             else:
                 msg = 'Youku server reported error %i' % error.get('code')
                 if error_note is not None:
-                    msg += ': ' + error_note
+                    msg += ': ' + clean_html(error_note)
                 raise ExtractorError(msg)
 
         # get video title

From 7e35526d5b970a034b9d76215ee3e4bd7631edcd Mon Sep 17 00:00:00 2001
From: "lauren n. liberda" <lauren@selfisekai.rocks>
Date: Tue, 11 Apr 2023 13:54:49 +0200
Subject: [PATCH 41/97] [extractor/hrefli] Add extractor (#6762)

Authored by: selfisekai
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/hrefli.py      | 15 +++++++++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 yt_dlp/extractor/hrefli.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 77a3c2ce9..808b558d1 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -734,6 +734,7 @@
 )
 from .howcast import HowcastIE
 from .howstuffworks import HowStuffWorksIE
+from .hrefli import HrefLiRedirectIE
 from .hrfensehen import HRFernsehenIE
 from .hrti import (
     HRTiIE,
diff --git a/yt_dlp/extractor/hrefli.py b/yt_dlp/extractor/hrefli.py
new file mode 100644
index 000000000..77db2ea68
--- /dev/null
+++ b/yt_dlp/extractor/hrefli.py
@@ -0,0 +1,15 @@
+from .common import InfoExtractor
+
+
+class HrefLiRedirectIE(InfoExtractor):
+    IE_NAME = 'href.li'
+    IE_DESC = False  # Do not list
+    _VALID_URL = r'https?://href\.li/\?(?P<url>.+)'
+
+    _TESTS = [{
+        'url': 'https://href.li/?https://www.reddit.com/r/cats/comments/12bluel/my_cat_helps_me_with_water/?utm_source=share&utm_medium=android_app&utm_name=androidcss&utm_term=1&utm_content=share_button',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        return self.url_result(self._match_valid_url(url).group('url'))

From faa0332ed69e070cf3bd31390589a596e962f392 Mon Sep 17 00:00:00 2001
From: sian1468 <58017832+sian1468@users.noreply.github.com>
Date: Tue, 11 Apr 2023 18:56:39 +0700
Subject: [PATCH 42/97] [extractor/line] Remove extractors (#6734)

Service has shut down - https://archive.ph/txVKy
Authored by: sian1468
---
 yt_dlp/extractor/_extractors.py |   4 -
 yt_dlp/extractor/line.py        | 143 --------------------------------
 2 files changed, 147 deletions(-)
 delete mode 100644 yt_dlp/extractor/line.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 808b558d1..5f4ae7b8d 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -944,10 +944,6 @@
     LimelightChannelIE,
     LimelightChannelListIE,
 )
-from .line import (
-    LineLiveIE,
-    LineLiveChannelIE,
-)
 from .linkedin import (
     LinkedInIE,
     LinkedInLearningIE,
diff --git a/yt_dlp/extractor/line.py b/yt_dlp/extractor/line.py
deleted file mode 100644
index 3fab9c8a5..000000000
--- a/yt_dlp/extractor/line.py
+++ /dev/null
@@ -1,143 +0,0 @@
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
-    format_field,
-    int_or_none,
-    str_or_none,
-)
-
-
-class LineLiveBaseIE(InfoExtractor):
-    _API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
-
-    def _parse_broadcast_item(self, item):
-        broadcast_id = compat_str(item['id'])
-        title = item['title']
-        is_live = item.get('isBroadcastingNow')
-
-        thumbnails = []
-        for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
-            if not thumbnail_url:
-                continue
-            thumbnails.append({
-                'id': thumbnail_id,
-                'url': thumbnail_url,
-            })
-
-        channel = item.get('channel') or {}
-        channel_id = str_or_none(channel.get('id'))
-
-        return {
-            'id': broadcast_id,
-            'title': title,
-            'thumbnails': thumbnails,
-            'timestamp': int_or_none(item.get('createdAt')),
-            'channel': channel.get('name'),
-            'channel_id': channel_id,
-            'channel_url': format_field(channel_id, None, 'https://live.line.me/channels/%s'),
-            'duration': int_or_none(item.get('archiveDuration')),
-            'view_count': int_or_none(item.get('viewerCount')),
-            'comment_count': int_or_none(item.get('chatCount')),
-            'is_live': is_live,
-        }
-
-
-class LineLiveIE(LineLiveBaseIE):
-    _VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
-    _TESTS = [{
-        'url': 'https://live.line.me/channels/5833718/broadcast/18373277',
-        'md5': '2c15843b8cb3acd55009ddcb2db91f7c',
-        'info_dict': {
-            'id': '18373277',
-            'title': '2021/12/05 （15分犬）定例譲渡会🐶',
-            'ext': 'mp4',
-            'timestamp': 1638674925,
-            'upload_date': '20211205',
-            'thumbnail': 'md5:e1f5817e60f4a72b7e43377cf308d7ef',
-            'channel_url': 'https://live.line.me/channels/5833718',
-            'channel': 'Yahooニュース掲載🗞プロフ見てね🐕🐕',
-            'channel_id': '5833718',
-            'duration': 937,
-            'view_count': int,
-            'comment_count': int,
-            'is_live': False,
-        }
-    }, {
-        # archiveStatus == 'DELETED'
-        'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        channel_id, broadcast_id = self._match_valid_url(url).groups()
-        broadcast = self._download_json(
-            self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
-            broadcast_id)
-        item = broadcast['item']
-        info = self._parse_broadcast_item(item)
-        protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
-        formats = []
-        for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
-            if not v:
-                continue
-            if k == 'abr':
-                formats.extend(self._extract_m3u8_formats(
-                    v, broadcast_id, 'mp4', protocol,
-                    m3u8_id='hls', fatal=False))
-                continue
-            f = {
-                'ext': 'mp4',
-                'format_id': 'hls-' + k,
-                'protocol': protocol,
-                'url': v,
-            }
-            if not k.isdigit():
-                f['vcodec'] = 'none'
-            formats.append(f)
-        if not formats:
-            archive_status = item.get('archiveStatus')
-            if archive_status != 'ARCHIVED':
-                self.raise_no_formats('this video has been ' + archive_status.lower(), expected=True)
-        info['formats'] = formats
-        return info
-
-
-class LineLiveChannelIE(LineLiveBaseIE):
-    _VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
-    _TEST = {
-        'url': 'https://live.line.me/channels/5893542',
-        'info_dict': {
-            'id': '5893542',
-            'title': 'いくらちゃんだよぉ🦒',
-            'description': 'md5:4d418087973ad081ceb1b3481f0b1816',
-        },
-        'playlist_mincount': 29
-    }
-
-    def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
-        while True:
-            for row in (archived_broadcasts.get('rows') or []):
-                share_url = str_or_none(row.get('shareURL'))
-                if not share_url:
-                    continue
-                info = self._parse_broadcast_item(row)
-                info.update({
-                    '_type': 'url',
-                    'url': share_url,
-                    'ie_key': LineLiveIE.ie_key(),
-                })
-                yield info
-            if not archived_broadcasts.get('hasNextPage'):
-                return
-            archived_broadcasts = self._download_json(
-                self._API_BASE_URL + channel_id + '/archived_broadcasts',
-                channel_id, query={
-                    'lastId': info['id'],
-                })
-
-    def _real_extract(self, url):
-        channel_id = self._match_id(url)
-        channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
-        return self.playlist_result(
-            self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
-            channel_id, channel.get('title'), channel.get('information'))

From 79c77e85b70ae3b9942d5a88c14d021a9bd24222 Mon Sep 17 00:00:00 2001
From: Shreyas Minocha <11537232+shreyasminocha@users.noreply.github.com>
Date: Tue, 11 Apr 2023 16:05:22 +0000
Subject: [PATCH 43/97] [extractor/zoom] Fix extractor (#6741)

Authored by: shreyasminocha
Closes #6677
---
 yt_dlp/extractor/zoom.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/yt_dlp/extractor/zoom.py b/yt_dlp/extractor/zoom.py
index ef8b71522..eb0ab795b 100644
--- a/yt_dlp/extractor/zoom.py
+++ b/yt_dlp/extractor/zoom.py
@@ -5,6 +5,7 @@
     str_or_none,
     js_to_json,
     parse_filesize,
+    traverse_obj,
     urlencode_postdata,
     urljoin,
 )
@@ -53,6 +54,9 @@ def _real_extract(self, url):
             r'(?s)window\.__data__\s*=\s*({.+?});',
             webpage, 'data'), play_id, js_to_json)
 
+        data = self._download_json(
+            f'{base_url}nws/recording/1.0/play/info/{data["fileId"]}', play_id)['result']
+
         subtitles = {}
         for _type in ('transcript', 'cc', 'chapter'):
             if data.get('%sUrl' % _type):
@@ -67,11 +71,11 @@ def _real_extract(self, url):
             formats.append({
                 'format_note': 'Camera stream',
                 'url': str_or_none(data.get('viewMp4Url')),
-                'width': int_or_none(data.get('viewResolvtionsWidth')),
-                'height': int_or_none(data.get('viewResolvtionsHeight')),
-                'format_id': str_or_none(data.get('recordingId')),
+                'width': int_or_none(traverse_obj(data, ('viewResolvtions', 0))),
+                'height': int_or_none(traverse_obj(data, ('viewResolvtions', 1))),
+                'format_id': str_or_none(traverse_obj(data, ('recording', 'id'))),
                 'ext': 'mp4',
-                'filesize_approx': parse_filesize(data.get('fileSize')),
+                'filesize_approx': parse_filesize(str_or_none(traverse_obj(data, ('recording', 'fileSizeInMB')))),
                 'preference': 0
             })
 
@@ -79,16 +83,16 @@ def _real_extract(self, url):
             formats.append({
                 'format_note': 'Screen share stream',
                 'url': str_or_none(data.get('shareMp4Url')),
-                'width': int_or_none(data.get('shareResolvtionsWidth')),
-                'height': int_or_none(data.get('shareResolvtionsHeight')),
-                'format_id': str_or_none(data.get('shareVideoId')),
+                'width': int_or_none(traverse_obj(data, ('shareResolvtions', 0))),
+                'height': int_or_none(traverse_obj(data, ('shareResolvtions', 1))),
+                'format_id': str_or_none(traverse_obj(data, ('shareVideo', 'id'))),
                 'ext': 'mp4',
                 'preference': -1
             })
 
         return {
             'id': play_id,
-            'title': data.get('topic'),
+            'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),
             'subtitles': subtitles,
             'formats': formats,
             'http_headers': {

From c6786ff3baaf72a5baa4d56d34058e54cbcf8ceb Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Tue, 11 Apr 2023 16:11:15 +0530
Subject: [PATCH 44/97] [extractor/youtube] Revert default formats to `https`

---
 yt_dlp/extractor/youtube.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 6dc36f9b9..d6a55e953 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3789,15 +3789,13 @@ def build_fragments(f):
             if single_stream and dct.get('ext'):
                 dct['container'] = dct['ext'] + '_dash'
 
-            if dct['filesize']:
+            if all_formats and dct['filesize']:
                 yield {
                     **dct,
                     'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
                     'protocol': 'http_dash_segments',
                     'fragments': build_fragments(dct),
                 }
-                if not all_formats:
-                    continue
             dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
             yield dct
 

From 26010b5cec50193b98ad7845d1d77450f9f14c2b Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Tue, 11 Apr 2023 17:01:22 +0530
Subject: [PATCH 45/97] [postprocessor/FixupDuplicateMoov] Fix bug in
 triggering

---
 yt_dlp/YoutubeDL.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index a7dced8e8..0d987dbb8 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -3312,7 +3312,7 @@ def ffmpeg_fixup(cndn, msg, cls):
                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
                                      FFmpegFixupM3u8PP)
-                        ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
+                        ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
 
                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)

From 52ecc33e221f7de7eb6fed6c22489f0c5fdd2c6d Mon Sep 17 00:00:00 2001
From: Lesmiscore <nao20010128@gmail.com>
Date: Wed, 12 Apr 2023 01:19:34 +0900
Subject: [PATCH 46/97] [extractor/niconico] Download comments from the new
 endpoint (#6773)

Authored by: Lesmiscore
---
 yt_dlp/extractor/niconico.py | 54 ++++++++++++++++++++++++++----------
 1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index 9c3a5a4bc..cacefeb42 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -477,23 +477,32 @@ def _get_subtitles(self, video_id, api_data, session_api_data):
         user_id_str = session_api_data.get('serviceUserId')
 
         thread_ids = traverse_obj(api_data, ('comment', 'threads', lambda _, v: v['isActive']))
-        raw_danmaku = self._extract_all_comments(video_id, thread_ids, user_id_str, comment_user_key)
-        if not raw_danmaku:
+        legacy_danmaku = self._extract_legacy_comments(video_id, thread_ids, user_id_str, comment_user_key) or []
+
+        new_comments = traverse_obj(api_data, ('comment', 'nvComment'))
+        new_danmaku = self._extract_new_comments(
+            new_comments.get('server'), video_id,
+            new_comments.get('params'), new_comments.get('threadKey'))
+
+        if not legacy_danmaku and not new_danmaku:
             self.report_warning(f'Failed to get comments. {bug_reports_message()}')
             return
+
         return {
             'comments': [{
                 'ext': 'json',
-                'data': json.dumps(raw_danmaku),
+                'data': json.dumps(legacy_danmaku + new_danmaku),
             }],
         }
 
-    def _extract_all_comments(self, video_id, threads, user_id, user_key):
+    def _extract_legacy_comments(self, video_id, threads, user_id, user_key):
         auth_data = {
             'user_id': user_id,
             'userkey': user_key,
         } if user_id and user_key else {'user_id': ''}
 
+        api_url = traverse_obj(threads, (..., 'server'), get_all=False)
+
         # Request Start
         post_data = [{'ping': {'content': 'rs:0'}}]
         for i, thread in enumerate(threads):
@@ -532,17 +541,32 @@ def _extract_all_comments(self, video_id, threads, user_id, user_key):
         # Request Final
         post_data.append({'ping': {'content': 'rf:0'}})
 
-        for api_url in self._COMMENT_API_ENDPOINTS:
-            comments = self._download_json(
-                api_url, video_id, data=json.dumps(post_data).encode(), fatal=False,
-                headers={
-                    'Referer': 'https://www.nicovideo.jp/watch/%s' % video_id,
-                    'Origin': 'https://www.nicovideo.jp',
-                    'Content-Type': 'text/plain;charset=UTF-8',
-                },
-                note='Downloading comments', errnote=f'Failed to access endpoint {api_url}')
-            if comments:
-                return comments
+        return self._download_json(
+            f'{api_url}/api.json', video_id, data=json.dumps(post_data).encode(), fatal=False,
+            headers={
+                'Referer': f'https://www.nicovideo.jp/watch/{video_id}',
+                'Origin': 'https://www.nicovideo.jp',
+                'Content-Type': 'text/plain;charset=UTF-8',
+            },
+            note='Downloading comments', errnote=f'Failed to access endpoint {api_url}')
+
+    def _extract_new_comments(self, endpoint, video_id, params, thread_key):
+        comments = self._download_json(
+            f'{endpoint}/v1/threads', video_id, data=json.dumps({
+                'additionals': {},
+                'params': params,
+                'threadKey': thread_key,
+            }).encode(), fatal=False,
+            headers={
+                'Referer': 'https://www.nicovideo.jp/',
+                'Origin': 'https://www.nicovideo.jp',
+                'Content-Type': 'text/plain;charset=UTF-8',
+                'x-client-os-type': 'others',
+                'x-frontend-id': '6',
+                'x-frontend-version': '0',
+            },
+            note='Downloading comments (new)', errnote='Failed to download comments (new)')
+        return traverse_obj(comments, ('data', 'threads', ..., 'comments', ...))
 
 
 class NiconicoPlaylistBaseIE(InfoExtractor):

From c3f624ef0a5d7a6ae1c5ffeb243087e9fc7d79dc Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Wed, 12 Apr 2023 05:04:47 +0530
Subject: [PATCH 47/97] Relaxed validation for numeric format filters

Continued from f96bff99cb2cf1d112b099e5149dd2c3a6a76af2

Closes #6782
---
 yt_dlp/YoutubeDL.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 0d987dbb8..7b6fef204 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1932,7 +1932,7 @@ def _build_format_filter(self, filter_spec):
             '!=': operator.ne,
         }
         operator_rex = re.compile(r'''(?x)\s*
-            (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
+            (?P<key>[\w.-]+)\s*
             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
             ''' % '|'.join(map(re.escape, OPERATORS.keys())))

From 2d97d154fe4fb84fe2ed3a4e1ed5819e89b71e88 Mon Sep 17 00:00:00 2001
From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com>
Date: Thu, 13 Apr 2023 03:19:08 +0900
Subject: [PATCH 48/97] [extractor/gmanetwork] Add extractor (#5945)

Authored by: HobbyistDev
Partially fixes #5770
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/gmanetwork.py  | 83 +++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)
 create mode 100644 yt_dlp/extractor/gmanetwork.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 5f4ae7b8d..c2043bbd2 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -681,6 +681,7 @@
     GloboIE,
     GloboArticleIE,
 )
+from .gmanetwork import GMANetworkVideoIE
 from .go import GoIE
 from .godtube import GodTubeIE
 from .gofile import GofileIE
diff --git a/yt_dlp/extractor/gmanetwork.py b/yt_dlp/extractor/gmanetwork.py
new file mode 100644
index 000000000..62fff4ead
--- /dev/null
+++ b/yt_dlp/extractor/gmanetwork.py
@@ -0,0 +1,83 @@
+from .common import InfoExtractor
+from .dailymotion import DailymotionIE
+from .youtube import YoutubeIE
+
+
+class GMANetworkVideoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www)\.gmanetwork\.com/(?:\w+/){3}(?P<id>\d+)/(?P<display_id>[\w-]+)/video'
+    _TESTS = [{
+        'url': 'https://www.gmanetwork.com/fullepisodes/home/running_man_philippines/168677/running-man-philippines-catch-the-thief-full-chapter-2/video?section=home',
+        'info_dict': {
+            'id': '28BqW0AXPe0',
+            'ext': 'mp4',
+            'upload_date': '20220919',
+            'uploader_url': 'http://www.youtube.com/channel/UChsoPNR5x-wdSO2GrOSIWqQ',
+            'like_count': int,
+            'view_count': int,
+            'uploader': 'YoüLOL',
+            'channel_id': 'UChsoPNR5x-wdSO2GrOSIWqQ',
+            'duration': 5313,
+            'comment_count': int,
+            'tags': 'count:22',
+            'uploader_id': 'UChsoPNR5x-wdSO2GrOSIWqQ',
+            'title': 'Running Man Philippines: Catch the Thief (FULL CHAPTER 2)',
+            'channel_url': 'https://www.youtube.com/channel/UChsoPNR5x-wdSO2GrOSIWqQ',
+            'thumbnail': 'https://i.ytimg.com/vi/28BqW0AXPe0/maxresdefault.jpg',
+            'release_timestamp': 1663594212,
+            'age_limit': 0,
+            'channel_follower_count': int,
+            'categories': ['Entertainment'],
+            'description': 'md5:811bdcea74f9c48051824e494756e926',
+            'live_status': 'not_live',
+            'playable_in_embed': True,
+            'channel': 'YoüLOL',
+            'availability': 'public',
+            'release_date': '20220919',
+        }
+    }, {
+        'url': 'https://www.gmanetwork.com/fullepisodes/home/more_than_words/87059/more-than-words-full-episode-80/video?section=home',
+        'info_dict': {
+            'id': 'yiDOExw2aSA',
+            'ext': 'mp4',
+            'live_status': 'not_live',
+            'channel': 'GMANetwork',
+            'like_count': int,
+            'channel_follower_count': int,
+            'description': 'md5:6d00cd658394fa1a5071200d3ed4be05',
+            'duration': 1419,
+            'age_limit': 0,
+            'comment_count': int,
+            'upload_date': '20181003',
+            'thumbnail': 'https://i.ytimg.com/vi_webp/yiDOExw2aSA/maxresdefault.webp',
+            'availability': 'public',
+            'playable_in_embed': True,
+            'channel_id': 'UCKL5hAuzgFQsyrsQKgU0Qng',
+            'title': 'More Than Words: Full Episode 80 (Finale)',
+            'uploader_id': 'GMANETWORK',
+            'categories': ['Entertainment'],
+            'uploader': 'GMANetwork',
+            'channel_url': 'https://www.youtube.com/channel/UCKL5hAuzgFQsyrsQKgU0Qng',
+            'tags': 'count:29',
+            'view_count': int,
+            'uploader_url': 'http://www.youtube.com/user/GMANETWORK',
+        }
+    }]
+
+    def _real_extract(self, url):
+        content_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+        webpage = self._download_webpage(url, display_id)
+        # webpage route
+        youtube_id = self._search_regex(
+            r'var\s*YOUTUBE_VIDEO\s*=\s*[\'"]+(?P<yt_id>[\w-]+)', webpage, 'youtube_id', fatal=False)
+        if youtube_id:
+            return self.url_result(youtube_id, YoutubeIE, youtube_id)
+
+        # api call route
+        # more info at https://aphrodite.gmanetwork.com/fullepisodes/assets/fullepisodes/js/dist/fullepisodes_video.js?v=1.1.11
+        network_url = self._search_regex(
+            r'NETWORK_URL\s*=\s*[\'"](?P<url>[^\'"]+)', webpage, 'network_url')
+        json_data = self._download_json(f'{network_url}api/data/content/video/{content_id}', display_id)
+        if json_data.get('video_file'):
+            return self.url_result(json_data['video_file'], YoutubeIE, json_data['video_file'])
+        else:
+            return self.url_result(json_data['dailymotion_file'], DailymotionIE, json_data['dailymotion_file'])

From b093c38cc9f26b59a8504211d792f053142c847d Mon Sep 17 00:00:00 2001
From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com>
Date: Thu, 13 Apr 2023 03:21:57 +0900
Subject: [PATCH 49/97] [extractor/biliIntl] Add comment extraction  (#6079)

Authored by: HobbyistDev
---
 yt_dlp/extractor/bilibili.py | 111 ++++++++++++++++++++++++++++++++++-
 1 file changed, 109 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index c34439779..91d436dd8 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -26,6 +26,7 @@
     srt_subtitles_timecode,
     str_or_none,
     traverse_obj,
+    unified_timestamp,
     unsmuggle_url,
     url_or_none,
     urlencode_postdata,
@@ -996,6 +997,53 @@ class BiliIntlIE(BiliIntlBaseIE):
             'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
             'upload_date': '20221212',
             'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
+        },
+    }, {
+        # episode comment extraction
+        'url': 'https://www.bilibili.tv/en/play/34580/340317',
+        'info_dict': {
+            'id': '340317',
+            'ext': 'mp4',
+            'timestamp': 1604057820,
+            'upload_date': '20201030',
+            'episode_number': 5,
+            'title': 'E5 - My Own Steel',
+            'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
+            'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
+            'episode': 'Episode 5',
+            'comment_count': int,
+            'chapters': [{
+                'start_time': 0,
+                'end_time': 61.0,
+                'title': '<Untitled Chapter 1>'
+            }, {
+                'start_time': 61.0,
+                'end_time': 134.0,
+                'title': 'Intro'
+            }, {
+                'start_time': 1290.0,
+                'end_time': 1379.0,
+                'title': 'Outro'
+            }],
+        },
+        'params': {
+            'getcomments': True
+        }
+    }, {
+        # user generated content comment extraction
+        'url': 'https://www.bilibili.tv/en/video/2045730385',
+        'info_dict': {
+            'id': '2045730385',
+            'ext': 'mp4',
+            'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
+            'timestamp': 1667891924,
+            'upload_date': '20221108',
+            'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
+            'comment_count': int,
+            'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
+        },
+        'params': {
+            'getcomments': True
         }
     }, {
         # episode id without intro and outro
@@ -1055,11 +1103,69 @@ def _extract_video_metadata(self, url, video_id, season_id):
 
         # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
         return merge_dicts(
-            self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id), {
+            self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
                 'title': self._html_search_meta('og:title', webpage),
                 'description': self._html_search_meta('og:description', webpage)
             })
 
+    def _get_comments_reply(self, root_id, next_id=0, display_id=None):
+        comment_api_raw_data = self._download_json(
+            'https://api.bilibili.tv/reply/web/detail', display_id,
+            note=f'Downloading reply comment of {root_id} - {next_id}',
+            query={
+                'platform': 'web',
+                'ps': 20,  # comment's reply per page (default: 3)
+                'root': root_id,
+                'next': next_id,
+            })
+
+        for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
+            yield {
+                'author': traverse_obj(replies, ('member', 'name')),
+                'author_id': traverse_obj(replies, ('member', 'mid')),
+                'author_thumbnail': traverse_obj(replies, ('member', 'face')),
+                'text': traverse_obj(replies, ('content', 'message')),
+                'id': replies.get('rpid'),
+                'like_count': int_or_none(replies.get('like_count')),
+                'parent': replies.get('parent'),
+                'timestamp': unified_timestamp(replies.get('ctime_text'))
+            }
+
+        if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
+            yield from self._get_comments_reply(
+                root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
+
+    def _get_comments(self, video_id, ep_id):
+        for i in itertools.count(0):
+            comment_api_raw_data = self._download_json(
+                'https://api.bilibili.tv/reply/web/root', video_id,
+                note=f'Downloading comment page {i + 1}',
+                query={
+                    'platform': 'web',
+                    'pn': i,  # page number
+                    'ps': 20,  # comment per page (default: 20)
+                    'oid': video_id,
+                    'type': 3 if ep_id else 1,  # 1: user generated content, 3: series content
+                    'sort_type': 1,  # 1: best, 2: recent
+                })
+
+            for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
+                yield {
+                    'author': traverse_obj(replies, ('member', 'name')),
+                    'author_id': traverse_obj(replies, ('member', 'mid')),
+                    'author_thumbnail': traverse_obj(replies, ('member', 'face')),
+                    'text': traverse_obj(replies, ('content', 'message')),
+                    'id': replies.get('rpid'),
+                    'like_count': int_or_none(replies.get('like_count')),
+                    'timestamp': unified_timestamp(replies.get('ctime_text')),
+                    'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
+                }
+                if replies.get('count'):
+                    yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
+
+            if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
+                break
+
     def _real_extract(self, url):
         season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
         video_id = ep_id or aid
@@ -1087,7 +1193,8 @@ def _real_extract(self, url):
             **self._extract_video_metadata(url, video_id, season_id),
             'formats': self._get_formats(ep_id=ep_id, aid=aid),
             'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
-            'chapters': chapters
+            'chapters': chapters,
+            '__post_extractor': self.extract_comments(video_id, ep_id)
         }
 
 

From 979568f26ece80bca72b48f0dd57d676e431059a Mon Sep 17 00:00:00 2001
From: MyNey <20515340+MinePlayersPE@users.noreply.github.com>
Date: Thu, 13 Apr 2023 01:28:33 +0700
Subject: [PATCH 50/97] [extractor/BrainPOP] Add extractors (#6106)

Authored by: MinePlayersPE
Based on https://github.com/ytdl-org/youtube-dl/pull/10025
---
 yt_dlp/extractor/_extractors.py |   8 +
 yt_dlp/extractor/brainpop.py    | 318 ++++++++++++++++++++++++++++++++
 2 files changed, 326 insertions(+)
 create mode 100644 yt_dlp/extractor/brainpop.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index c2043bbd2..09903423d 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -254,6 +254,14 @@
     BRMediathekIE,
 )
 from .bravotv import BravoTVIE
+from .brainpop import (
+    BrainPOPIE,
+    BrainPOPJrIE,
+    BrainPOPELLIE,
+    BrainPOPEspIE,
+    BrainPOPFrIE,
+    BrainPOPIlIE,
+)
 from .breakcom import BreakIE
 from .breitbart import BreitBartIE
 from .brightcove import (
diff --git a/yt_dlp/extractor/brainpop.py b/yt_dlp/extractor/brainpop.py
new file mode 100644
index 000000000..1200437e6
--- /dev/null
+++ b/yt_dlp/extractor/brainpop.py
@@ -0,0 +1,318 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    classproperty,
+    int_or_none,
+    traverse_obj,
+    urljoin
+)
+
+
+class BrainPOPBaseIE(InfoExtractor):
+    _NETRC_MACHINE = 'brainpop'
+    _ORIGIN = ''  # So that _VALID_URL doesn't crash
+    _LOGIN_ERRORS = {
+        1502: 'The username and password you entered did not match.',  # LOGIN_FAILED
+        1503: 'Payment method is expired.',  # LOGIN_FAILED_ACCOUNT_NOT_ACTIVE
+        1506: 'Your BrainPOP plan has expired.',  # LOGIN_FAILED_ACCOUNT_EXPIRED
+        1507: 'Terms not accepted.',  # LOGIN_FAILED_TERMS_NOT_ACCEPTED
+        1508: 'Account not activated.',  # LOGIN_FAILED_SUBSCRIPTION_NOT_ACTIVE
+        1512: 'The maximum number of devices permitted are logged in with your account right now.',  # LOGIN_FAILED_LOGIN_LIMIT_REACHED
+        1513: 'You are trying to access your account from outside of its allowed IP range.',  # LOGIN_FAILED_INVALID_IP
+        1514: 'Individual accounts are not included in your plan. Try again with your shared username and password.',  # LOGIN_FAILED_MBP_DISABLED
+        1515: 'Account not activated.',  # LOGIN_FAILED_TEACHER_NOT_ACTIVE
+        1523: 'That username and password won\'t work on this BrainPOP site.',  # LOGIN_FAILED_NO_ACCESS
+        1524: 'You\'ll need to join a class before you can login.',  # LOGIN_FAILED_STUDENT_NO_PERIOD
+        1526: 'Your account is locked. Reset your password, or ask a teacher or administrator for help.',  # LOGIN_FAILED_ACCOUNT_LOCKED
+    }
+
+    @classproperty
+    def _VALID_URL(cls):
+        root = re.escape(cls._ORIGIN).replace(r'https:', r'https?:').replace(r'www\.', r'(?:www\.)?')
+        return rf'{root}/(?P<slug>[^/]+/[^/]+/(?P<id>[^/?#&]+))'
+
+    def _assemble_formats(self, slug, format_id, display_id, token='', extra_fields={}):
+        formats = []
+        formats = self._extract_m3u8_formats(
+            f'{urljoin(self._HLS_URL, slug)}.m3u8?{token}',
+            display_id, 'mp4', m3u8_id=f'{format_id}-hls', fatal=False)
+        formats.append({
+            'format_id': format_id,
+            'url': f'{urljoin(self._VIDEO_URL, slug)}?{token}',
+        })
+        for f in formats:
+            f.update(extra_fields)
+        return formats
+
+    def _extract_adaptive_formats(self, data, token, display_id, key_format='%s', extra_fields={}):
+        formats = []
+        additional_key_formats = {
+            '%s': {},
+            'ad_%s': {
+                'format_note': 'Audio description',
+                'source_preference': -2
+            }
+        }
+        for additional_key_format, additional_key_fields in additional_key_formats.items():
+            for key_quality, key_index in enumerate(('high', 'low')):
+                full_key_index = additional_key_format % (key_format % key_index)
+                if data.get(full_key_index):
+                    formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, {
+                        'quality': -1 - key_quality,
+                        **additional_key_fields,
+                        **extra_fields
+                    }))
+        return formats
+
+    def _perform_login(self, username, password):
+        login_res = self._download_json(
+            'https://api.brainpop.com/api/login', None,
+            data=json.dumps({'username': username, 'password': password}).encode(),
+            headers={
+                'Content-Type': 'application/json',
+                'Referer': self._ORIGIN
+            }, note='Logging in', errnote='Unable to log in', expected_status=400)
+        status_code = int_or_none(login_res['status_code'])
+        if status_code != 1505:
+            self.report_warning(
+                f'Unable to login: {self._LOGIN_ERRORS.get(status_code) or login_res.get("message")}'
+                or f'Got status code {status_code}')
+
+
+class BrainPOPIE(BrainPOPBaseIE):
+    _ORIGIN = 'https://www.brainpop.com'
+    _VIDEO_URL = 'https://svideos.brainpop.com'
+    _HLS_URL = 'https://hls.brainpop.com'
+    _CDN_URL = 'https://cdn.brainpop.com'
+    _TESTS = [{
+        'url': 'https://www.brainpop.com/health/conflictresolution/martinlutherkingjr/movie?ref=null',
+        'md5': '3ead374233ae74c7f1b0029a01c972f0',
+        'info_dict': {
+            'id': '1f3259fa457292b4',
+            'ext': 'mp4',
+            'title': 'Martin Luther King, Jr.',
+            'display_id': 'martinlutherkingjr',
+            'description': 'md5:f403dbb2bf3ccc7cf4c59d9e43e3c349',
+        },
+    }, {
+        'url': 'https://www.brainpop.com/science/space/bigbang/',
+        'md5': '9a1ff0e77444dd9e437354eb669c87ec',
+        'info_dict': {
+            'id': 'acae52cd48c99acf',
+            'ext': 'mp4',
+            'title': 'Big Bang',
+            'display_id': 'bigbang',
+            'description': 'md5:3e53b766b0f116f631b13f4cae185d38',
+        },
+        'skip': 'Requires login',
+    }]
+
+    def _real_extract(self, url):
+        slug, display_id = self._match_valid_url(url).group('slug', 'id')
+        movie_data = self._download_json(
+            f'https://api.brainpop.com/api/content/published/bp/en/{slug}/movie?full=1', display_id,
+            'Downloading movie data JSON', 'Unable to download movie data')['data']
+        topic_data = traverse_obj(self._download_json(
+            f'https://api.brainpop.com/api/content/published/bp/en/{slug}?full=1', display_id,
+            'Downloading topic data JSON', 'Unable to download topic data', fatal=False),
+            ('data', 'topic'), expected_type=dict) or movie_data['topic']
+
+        if not traverse_obj(movie_data, ('access', 'allow')):
+            reason = traverse_obj(movie_data, ('access', 'reason'))
+            if 'logged' in reason:
+                self.raise_login_required(reason, metadata_available=True)
+            else:
+                self.raise_no_formats(reason, video_id=display_id)
+        movie_feature = movie_data['feature']
+        movie_feature_data = movie_feature['data']
+
+        formats, subtitles = [], {}
+        formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', {
+            'language': movie_feature.get('language') or 'en',
+            'language_preference': 10
+        }))
+        for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items():
+            formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', {
+                'language': lang,
+                'language_preference': -10
+            }))
+
+        # TODO: Do localization fields also have subtitles?
+        for name, url in movie_feature_data.items():
+            lang = self._search_regex(
+                r'^subtitles_(?P<lang>\w+)$', name, 'subtitle metadata', default=None)
+            if lang and url:
+                subtitles.setdefault(lang, []).append({
+                    'url': urljoin(self._CDN_URL, url)
+                })
+
+        return {
+            'id': topic_data['topic_id'],
+            'display_id': display_id,
+            'title': topic_data.get('name'),
+            'description': topic_data.get('synopsis'),
+            'formats': formats,
+            'subtitles': subtitles,
+        }
+
+
+class BrainPOPLegacyBaseIE(BrainPOPBaseIE):
+    def _parse_js_topic_data(self, topic_data, display_id, token):
+        movie_data = topic_data['movies']
+        # TODO: Are there non-burned subtitles?
+        formats = self._extract_adaptive_formats(movie_data, token, display_id)
+
+        return {
+            'id': topic_data['EntryID'],
+            'display_id': display_id,
+            'title': topic_data.get('name'),
+            'alt_title': topic_data.get('title'),
+            'description': topic_data.get('synopsis'),
+            'formats': formats,
+        }
+
+    def _real_extract(self, url):
+        slug, display_id = self._match_valid_url(url).group('slug', 'id')
+        webpage = self._download_webpage(url, display_id)
+        topic_data = self._search_json(
+            r'var\s+content\s*=\s*', webpage, 'content data',
+            display_id, end_pattern=';')['category']['unit']['topic']
+        token = self._search_regex(r'ec_token\s*:\s*[\'"]([^\'"]+)', webpage, 'video token')
+        return self._parse_js_topic_data(topic_data, display_id, token)
+
+
+class BrainPOPJrIE(BrainPOPLegacyBaseIE):
+    _ORIGIN = 'https://jr.brainpop.com'
+    _VIDEO_URL = 'https://svideos-jr.brainpop.com'
+    _HLS_URL = 'https://hls-jr.brainpop.com'
+    _CDN_URL = 'https://cdn-jr.brainpop.com'
+    _TESTS = [{
+        'url': 'https://jr.brainpop.com/health/feelingsandsel/emotions/',
+        'md5': '04e0561bb21770f305a0ce6cf0d869ab',
+        'info_dict': {
+            'id': '347',
+            'ext': 'mp4',
+            'title': 'Emotions',
+            'display_id': 'emotions',
+        },
+    }, {
+        'url': 'https://jr.brainpop.com/science/habitats/arctichabitats/',
+        'md5': 'b0ed063bbd1910df00220ee29340f5d6',
+        'info_dict': {
+            'id': '29',
+            'ext': 'mp4',
+            'title': 'Arctic Habitats',
+            'display_id': 'arctichabitats',
+        },
+        'skip': 'Requires login',
+    }]
+
+
+class BrainPOPELLIE(BrainPOPLegacyBaseIE):
+    _ORIGIN = 'https://ell.brainpop.com'
+    _VIDEO_URL = 'https://svideos-esl.brainpop.com'
+    _HLS_URL = 'https://hls-esl.brainpop.com'
+    _CDN_URL = 'https://cdn-esl.brainpop.com'
+    _TESTS = [{
+        'url': 'https://ell.brainpop.com/level1/unit1/lesson1/',
+        'md5': 'a2012700cfb774acb7ad2e8834eed0d0',
+        'info_dict': {
+            'id': '1',
+            'ext': 'mp4',
+            'title': 'Lesson 1',
+            'display_id': 'lesson1',
+            'alt_title': 'Personal Pronouns',
+        },
+    }, {
+        'url': 'https://ell.brainpop.com/level3/unit6/lesson5/',
+        'md5': 'be19c8292c87b24aacfb5fda2f3f8363',
+        'info_dict': {
+            'id': '101',
+            'ext': 'mp4',
+            'title': 'Lesson 5',
+            'display_id': 'lesson5',
+            'alt_title': 'Review: Unit 6',
+        },
+        'skip': 'Requires login',
+    }]
+
+
+class BrainPOPEspIE(BrainPOPLegacyBaseIE):
+    IE_DESC = 'BrainPOP Español'
+    _ORIGIN = 'https://esp.brainpop.com'
+    _VIDEO_URL = 'https://svideos.brainpop.com'
+    _HLS_URL = 'https://hls.brainpop.com'
+    _CDN_URL = 'https://cdn.brainpop.com/mx'
+    _TESTS = [{
+        'url': 'https://esp.brainpop.com/ciencia/la_diversidad_de_la_vida/ecosistemas/',
+        'md5': 'cb3f062db2b3c5240ddfcfde7108f8c9',
+        'info_dict': {
+            'id': '3893',
+            'ext': 'mp4',
+            'title': 'Ecosistemas',
+            'display_id': 'ecosistemas',
+            'description': 'md5:80fc55b07e241f8c8f2aa8d74deaf3c3',
+        },
+    }, {
+        'url': 'https://esp.brainpop.com/espanol/la_escritura/emily_dickinson/',
+        'md5': '98c1b9559e0e33777209c425cda7dac4',
+        'info_dict': {
+            'id': '7146',
+            'ext': 'mp4',
+            'title': 'Emily Dickinson',
+            'display_id': 'emily_dickinson',
+            'description': 'md5:2795ad87b1d239c9711c1e92ab5a978b',
+        },
+        'skip': 'Requires login',
+    }]
+
+
+class BrainPOPFrIE(BrainPOPLegacyBaseIE):
+    IE_DESC = 'BrainPOP Français'
+    _ORIGIN = 'https://fr.brainpop.com'
+    _VIDEO_URL = 'https://svideos.brainpop.com'
+    _HLS_URL = 'https://hls.brainpop.com'
+    _CDN_URL = 'https://cdn.brainpop.com/fr'
+    _TESTS = [{
+        'url': 'https://fr.brainpop.com/sciencesdelaterre/energie/sourcesdenergie/',
+        'md5': '97e7f48af8af93f8a2be11709f239371',
+        'info_dict': {
+            'id': '1651',
+            'ext': 'mp4',
+            'title': 'Sources d\'énergie',
+            'display_id': 'sourcesdenergie',
+            'description': 'md5:7eece350f019a21ef9f64d4088b2d857',
+        },
+    }, {
+        'url': 'https://fr.brainpop.com/francais/ecrire/plagiat/',
+        'md5': '0cf2b4f89804d0dd4a360a51310d445a',
+        'info_dict': {
+            'id': '5803',
+            'ext': 'mp4',
+            'title': 'Plagiat',
+            'display_id': 'plagiat',
+            'description': 'md5:4496d87127ace28e8b1eda116e77cd2b',
+        },
+        'skip': 'Requires login',
+    }]
+
+
+class BrainPOPIlIE(BrainPOPLegacyBaseIE):
+    IE_DESC = 'BrainPOP Hebrew'
+    _ORIGIN = 'https://il.brainpop.com'
+    _VIDEO_URL = 'https://svideos.brainpop.com'
+    _HLS_URL = 'https://hls.brainpop.com'
+    _CDN_URL = 'https://cdn.brainpop.com/he'
+    _TESTS = [{
+        'url': 'https://il.brainpop.com/category_9/subcategory_150/subjects_3782/',
+        'md5': '9e4ea9dc60ecd385a6e5ca12ccf31641',
+        'info_dict': {
+            'id': '3782',
+            'ext': 'mp4',
+            'title': 'md5:e993632fcda0545d9205602ec314ad67',
+            'display_id': 'subjects_3782',
+            'description': 'md5:4cc084a8012beb01f037724423a4d4ed',
+        },
+    }]

From d1483ec693c79f0b4ddf493870bcb840aca4da08 Mon Sep 17 00:00:00 2001
From: Lesmiscore <nao20010128@gmail.com>
Date: Thu, 13 Apr 2023 16:09:20 +0900
Subject: [PATCH 51/97] [extractor/iwara] Fix typo

Authored by: Lesmiscore

Closes #6795
---
 yt_dlp/extractor/iwara.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py
index ae2960af0..9dbb141fd 100644
--- a/yt_dlp/extractor/iwara.py
+++ b/yt_dlp/extractor/iwara.py
@@ -76,7 +76,7 @@ def _extract_formats(self, video_id, fileurl):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        video_data = self._download_json(f'http://api.iwara.tv/video/{video_id}', video_id, expected_status=lambda x: True)
+        video_data = self._download_json(f'https://api.iwara.tv/video/{video_id}', video_id, expected_status=lambda x: True)
         errmsg = video_data.get('message')
         # at this point we can actually get uploaded user info, but do we need it?
         if errmsg == 'errors.privateVideo':

From 56793f74c36899742d7abd52afb0deca97d469e1 Mon Sep 17 00:00:00 2001
From: hasezoey <hasezoey@gmail.com>
Date: Thu, 13 Apr 2023 19:17:56 +0200
Subject: [PATCH 52/97] [extractor/iwara] Fix format sorting (#6651)

Authored by: hasezoey
---
 yt_dlp/extractor/iwara.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py
index 9dbb141fd..a5aad26ee 100644
--- a/yt_dlp/extractor/iwara.py
+++ b/yt_dlp/extractor/iwara.py
@@ -8,6 +8,7 @@
     OnDemandPagedList,
     int_or_none,
     mimetype2ext,
+    qualities,
     traverse_obj,
     unified_timestamp,
 )
@@ -64,13 +65,15 @@ def _extract_formats(self, video_id, fileurl):
         # https://github.com/yt-dlp/yt-dlp/issues/6549#issuecomment-1473771047
         x_version = hashlib.sha1('_'.join((paths[-1], q['expires'][0], '5nFp9kmbNnHdAFhaqMvt')).encode()).hexdigest()
 
+        preference = qualities(['preview', '360', '540', 'Source'])
+
         files = self._download_json(fileurl, video_id, headers={'X-Version': x_version})
         for fmt in files:
             yield traverse_obj(fmt, {
                 'format_id': 'name',
                 'url': ('src', ('view', 'download'), {self._proto_relative_url}),
                 'ext': ('type', {mimetype2ext}),
-                'quality': ('name', {lambda x: int_or_none(x) or 1e4}),
+                'quality': ('name', {preference}),
                 'height': ('name', {int_or_none}),
             }, get_all=False)
 
@@ -84,6 +87,11 @@ def _real_extract(self, url):
         elif errmsg:
             raise ExtractorError(f'Iwara says: {errmsg}')
 
+        if not video_data.get('fileUrl'):
+            if video_data.get('embedUrl'):
+                return self.url_result(video_data.get('embedUrl'))
+            raise ExtractorError('This video is unplayable', expected=True)
+
         return {
             'id': video_id,
             'age_limit': 18 if video_data.get('rating') == 'ecchi' else 0,  # ecchi is 'sexy' in Japanese

From 90c1f5120694105496a6ad9e3ecfc6c25de6cae1 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 13 Apr 2023 13:56:12 -0500
Subject: [PATCH 53/97] [extractor/zoom] Fix share URL extraction (#6789)

Authored by: bashonly
---
 yt_dlp/extractor/zoom.py | 90 +++++++++++++++++++++++++++-------------
 1 file changed, 62 insertions(+), 28 deletions(-)

diff --git a/yt_dlp/extractor/zoom.py b/yt_dlp/extractor/zoom.py
index eb0ab795b..3d7ccca76 100644
--- a/yt_dlp/extractor/zoom.py
+++ b/yt_dlp/extractor/zoom.py
@@ -13,8 +13,8 @@
 
 class ZoomIE(InfoExtractor):
     IE_NAME = 'zoom'
-    _VALID_URL = r'(?P<base_url>https?://(?:[^.]+\.)?zoom.us/)rec(?:ording)?/(?:play|share)/(?P<id>[A-Za-z0-9_.-]+)'
-    _TEST = {
+    _VALID_URL = r'(?P<base_url>https?://(?:[^.]+\.)?zoom.us/)rec(?:ording)?/(?P<type>play|share)/(?P<id>[A-Za-z0-9_.-]+)'
+    _TESTS = [{
         'url': 'https://economist.zoom.us/rec/play/dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
         'md5': 'ab445e8c911fddc4f9adc842c2c5d434',
         'info_dict': {
@@ -23,39 +23,73 @@ class ZoomIE(InfoExtractor):
             'title': 'China\'s "two sessions" and the new five-year plan',
         },
         'skip': 'Recording requires email authentication to access',
-    }
+    }, {
+        # play URL
+        'url': 'https://ffgolf.zoom.us/rec/play/qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
+        'md5': '2c4b1c4e5213ebf9db293e88d9385bee',
+        'info_dict': {
+            'id': 'qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
+            'ext': 'mp4',
+            'title': 'Prépa AF2023 - Séance 5 du 11 avril - R20/VM/GO',
+        },
+    }, {
+        # share URL
+        'url': 'https://us02web.zoom.us/rec/share/hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
+        'md5': '90fdc7cfcaee5d52d1c817fc03c43c9b',
+        'info_dict': {
+            'id': 'hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
+            'ext': 'mp4',
+            'title': 'Timea Andrea Lelik\'s Personal Meeting Room',
+        },
+    }]
 
-    def _real_extract(self, url):
-        base_url, play_id = self._match_valid_url(url).groups()
-        webpage = self._download_webpage(url, play_id)
+    def _get_page_data(self, webpage, video_id):
+        return self._search_json(
+            r'window\.__data__\s*=', webpage, 'data', video_id, transform_source=js_to_json)
 
+    def _get_real_webpage(self, url, base_url, video_id, url_type):
+        webpage = self._download_webpage(url, video_id, note=f'Downloading {url_type} webpage')
         try:
             form = self._form_hidden_inputs('password_form', webpage)
         except ExtractorError:
-            form = None
-        if form:
-            password = self.get_param('videopassword')
-            if not password:
-                raise ExtractorError(
-                    'This video is protected by a passcode, use the --video-password option', expected=True)
-            is_meeting = form.get('useWhichPasswd') == 'meeting'
-            validation = self._download_json(
-                base_url + 'rec/validate%s_passwd' % ('_meet' if is_meeting else ''),
-                play_id, 'Validating passcode', 'Wrong passcode', data=urlencode_postdata({
-                    'id': form[('meet' if is_meeting else 'file') + 'Id'],
-                    'passwd': password,
-                    'action': form.get('action'),
-                }))
-            if not validation.get('status'):
-                raise ExtractorError(validation['errorMessage'], expected=True)
-            webpage = self._download_webpage(url, play_id)
+            return webpage
 
-        data = self._parse_json(self._search_regex(
-            r'(?s)window\.__data__\s*=\s*({.+?});',
-            webpage, 'data'), play_id, js_to_json)
+        password = self.get_param('videopassword')
+        if not password:
+            raise ExtractorError(
+                'This video is protected by a passcode, use the --video-password option', expected=True)
+        is_meeting = form.get('useWhichPasswd') == 'meeting'
+        validation = self._download_json(
+            base_url + 'rec/validate%s_passwd' % ('_meet' if is_meeting else ''),
+            video_id, 'Validating passcode', 'Wrong passcode', data=urlencode_postdata({
+                'id': form[('meet' if is_meeting else 'file') + 'Id'],
+                'passwd': password,
+                'action': form.get('action'),
+            }))
+        if not validation.get('status'):
+            raise ExtractorError(validation['errorMessage'], expected=True)
+        return self._download_webpage(url, video_id, note=f'Re-downloading {url_type} webpage')
+
+    def _real_extract(self, url):
+        base_url, url_type, video_id = self._match_valid_url(url).group('base_url', 'type', 'id')
+
+        if url_type == 'share':
+            webpage = self._get_real_webpage(url, base_url, video_id, 'share')
+            meeting_id = self._get_page_data(webpage, video_id)['meetingId']
+            redirect_path = self._download_json(
+                f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}',
+                video_id, note='Downloading share info JSON')['result']['redirectUrl']
+            url = urljoin(base_url, redirect_path)
+
+        webpage = self._get_real_webpage(url, base_url, video_id, 'play')
+        file_id = self._get_page_data(webpage, video_id)['fileId']
+        if not file_id:
+            # When things go wrong, file_id can be empty string
+            raise ExtractorError('Unable to extract file ID')
 
         data = self._download_json(
-            f'{base_url}nws/recording/1.0/play/info/{data["fileId"]}', play_id)['result']
+            f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id,
+            note='Downloading play info JSON')['result']
 
         subtitles = {}
         for _type in ('transcript', 'cc', 'chapter'):
@@ -91,7 +125,7 @@ def _real_extract(self, url):
             })
 
         return {
-            'id': play_id,
+            'id': video_id,
             'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),
             'subtitles': subtitles,
             'formats': formats,

From 925936908a3c3ee0e508621db14696b9f6a8b563 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 13 Apr 2023 14:05:57 -0500
Subject: [PATCH 54/97] [extractor/tiktok] Fix and improve metadata extraction
 (#6777)

Authored by: bashonly
---
 yt_dlp/extractor/tiktok.py | 187 +++++++++++++++++++++++--------------
 1 file changed, 116 insertions(+), 71 deletions(-)

diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index fb838d529..63708229e 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -287,17 +287,15 @@ def extract_addr(addr, add_meta={}):
         thumbnails = []
         for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak',
                          'origin_cover', 'dynamic_cover'):
-            cover = video_info.get(cover_id)
-            if cover:
-                for cover_url in cover['url_list']:
-                    thumbnails.append({
-                        'id': cover_id,
-                        'url': cover_url,
-                    })
+            for cover_url in traverse_obj(video_info, (cover_id, 'url_list', ...)):
+                thumbnails.append({
+                    'id': cover_id,
+                    'url': cover_url,
+                })
 
-        stats_info = aweme_detail.get('statistics', {})
-        author_info = aweme_detail.get('author', {})
-        music_info = aweme_detail.get('music', {})
+        stats_info = aweme_detail.get('statistics') or {}
+        author_info = aweme_detail.get('author') or {}
+        music_info = aweme_detail.get('music') or {}
         user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
                                                              'sec_uid', 'id', 'uid', 'unique_id',
                                                              expected_type=str_or_none, get_all=False))
@@ -319,20 +317,27 @@ def extract_addr(addr, add_meta={}):
             'extractor_key': TikTokIE.ie_key(),
             'extractor': TikTokIE.IE_NAME,
             'webpage_url': self._create_url(author_info.get('uid'), aweme_id),
-            'title': aweme_detail.get('desc'),
-            'description': aweme_detail.get('desc'),
-            'view_count': int_or_none(stats_info.get('play_count')),
-            'like_count': int_or_none(stats_info.get('digg_count')),
-            'repost_count': int_or_none(stats_info.get('share_count')),
-            'comment_count': int_or_none(stats_info.get('comment_count')),
-            'uploader': str_or_none(author_info.get('unique_id')),
-            'creator': str_or_none(author_info.get('nickname')),
-            'uploader_id': str_or_none(author_info.get('uid')),
+            **traverse_obj(aweme_detail, {
+                'title': ('desc', {str}),
+                'description': ('desc', {str}),
+                'timestamp': ('create_time', {int_or_none}),
+            }),
+            **traverse_obj(stats_info, {
+                'view_count': 'play_count',
+                'like_count': 'digg_count',
+                'repost_count': 'share_count',
+                'comment_count': 'comment_count',
+            }, expected_type=int_or_none),
+            **traverse_obj(author_info, {
+                'uploader': 'unique_id',
+                'uploader_id': 'uid',
+                'creator': 'nickname',
+                'channel_id': 'sec_uid',
+            }, expected_type=str_or_none),
             'uploader_url': user_url,
             'track': music_track,
             'album': str_or_none(music_info.get('album')) or None,
             'artist': music_author or None,
-            'timestamp': int_or_none(aweme_detail.get('create_time')),
             'formats': formats,
             'subtitles': self.extract_subtitles(aweme_detail, aweme_id),
             'thumbnails': thumbnails,
@@ -344,37 +349,27 @@ def extract_addr(addr, add_meta={}):
             '_format_sort_fields': ('quality', 'codec', 'size', 'br'),
         }
 
-    def _parse_aweme_video_web(self, aweme_detail, webpage_url):
+    def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id):
         video_info = aweme_detail['video']
         author_info = traverse_obj(aweme_detail, 'authorInfo', 'author', expected_type=dict, default={})
         music_info = aweme_detail.get('music') or {}
         stats_info = aweme_detail.get('stats') or {}
-        user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
-                                                             'secUid', 'id', 'uid', 'uniqueId',
-                                                             expected_type=str_or_none, get_all=False)
-                                                or aweme_detail.get('authorSecId'))
+        channel_id = traverse_obj(author_info or aweme_detail, (('authorSecId', 'secUid'), {str}), get_all=False)
+        user_url = self._UPLOADER_URL_FORMAT % channel_id if channel_id else None
 
         formats = []
-        play_url = video_info.get('playAddr')
-        width = video_info.get('width')
-        height = video_info.get('height')
-        if isinstance(play_url, str):
-            formats = [{
+        width = int_or_none(video_info.get('width'))
+        height = int_or_none(video_info.get('height'))
+
+        for play_url in traverse_obj(video_info, ('playAddr', ((..., 'src'), None), {url_or_none})):
+            formats.append({
                 'url': self._proto_relative_url(play_url),
                 'ext': 'mp4',
                 'width': width,
                 'height': height,
-            }]
-        elif isinstance(play_url, list):
-            formats = [{
-                'url': self._proto_relative_url(url),
-                'ext': 'mp4',
-                'width': width,
-                'height': height,
-            } for url in traverse_obj(play_url, (..., 'src'), expected_type=url_or_none) if url]
+            })
 
-        download_url = url_or_none(video_info.get('downloadAddr')) or traverse_obj(video_info, ('download', 'url'), expected_type=url_or_none)
-        if download_url:
+        for download_url in traverse_obj(video_info, (('downloadAddr', ('download', 'url')), {url_or_none})):
             formats.append({
                 'format_id': 'download',
                 'url': self._proto_relative_url(download_url),
@@ -382,38 +377,48 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url):
                 'width': width,
                 'height': height,
             })
+
         self._remove_duplicate_formats(formats)
 
         thumbnails = []
-        for thumbnail_name in ('thumbnail', 'cover', 'dynamicCover', 'originCover'):
-            if aweme_detail.get(thumbnail_name):
-                thumbnails = [{
-                    'url': self._proto_relative_url(aweme_detail[thumbnail_name]),
-                    'width': width,
-                    'height': height
-                }]
+        for thumb_url in traverse_obj(aweme_detail, (
+                (None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {url_or_none})):
+            thumbnails.append({
+                'url': self._proto_relative_url(thumb_url),
+                'width': width,
+                'height': height,
+            })
 
         return {
-            'id': traverse_obj(aweme_detail, 'id', 'awemeId', expected_type=str_or_none),
-            'title': aweme_detail.get('desc'),
-            'duration': try_get(aweme_detail, lambda x: x['video']['duration'], int),
-            'view_count': int_or_none(stats_info.get('playCount')),
-            'like_count': int_or_none(stats_info.get('diggCount')),
-            'repost_count': int_or_none(stats_info.get('shareCount')),
-            'comment_count': int_or_none(stats_info.get('commentCount')),
-            'timestamp': int_or_none(aweme_detail.get('createTime')),
-            'creator': str_or_none(author_info.get('nickname')),
-            'uploader': str_or_none(author_info.get('uniqueId') or aweme_detail.get('author')),
-            'uploader_id': str_or_none(traverse_obj(author_info, 'id', 'uid', 'authorId')),
+            'id': video_id,
+            **traverse_obj(aweme_detail, {
+                'title': ('desc', {str}),
+                'description': ('desc', {str}),
+                'duration': ('video', 'duration', {int_or_none}),
+                'timestamp': ('createTime', {int_or_none}),
+            }),
+            **traverse_obj(author_info or aweme_detail, {
+                'creator': ('nickname', {str}),
+                'uploader': (('uniqueId', 'author'), {str}),
+                'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}),
+            }, get_all=False),
+            **traverse_obj(stats_info, {
+                'view_count': 'playCount',
+                'like_count': 'diggCount',
+                'repost_count': 'shareCount',
+                'comment_count': 'commentCount',
+            }, expected_type=int_or_none),
+            **traverse_obj(music_info, {
+                'track': 'title',
+                'album': ('album', {lambda x: x or None}),
+                'artist': 'authorName',
+            }, expected_type=str),
+            'channel_id': channel_id,
             'uploader_url': user_url,
-            'track': str_or_none(music_info.get('title')),
-            'album': str_or_none(music_info.get('album')) or None,
-            'artist': str_or_none(music_info.get('authorName')),
             'formats': formats,
             'thumbnails': thumbnails,
-            'description': str_or_none(aweme_detail.get('desc')),
             'http_headers': {
-                'Referer': webpage_url
+                'Referer': webpage_url,
             }
         }
 
@@ -447,7 +452,8 @@ class TikTokIE(TikTokBaseIE):
             'artist': 'Ysrbeats',
             'album': 'Lehanga',
             'track': 'Lehanga',
-        }
+        },
+        'skip': '404 Not Found',
     }, {
         'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
         'md5': '6f3cf8cdd9b28cb8363fe0a9a160695b',
@@ -462,6 +468,7 @@ class TikTokIE(TikTokBaseIE):
             'uploader': 'patrox',
             'uploader_id': '18702747',
             'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
+            'channel_id': 'MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
             'creator': 'patroX',
             'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
             'upload_date': '20190930',
@@ -472,7 +479,7 @@ class TikTokIE(TikTokBaseIE):
             'comment_count': int,
             'artist': 'Evan Todd, Jessica Keenan Wynn, Alice Lee, Barrett Wilbert Weed & Jon Eidson',
             'track': 'Big Fun',
-        }
+        },
     }, {
         # Banned audio, only available on the app
         'url': 'https://www.tiktok.com/@barudakhb_/video/6984138651336838402',
@@ -485,6 +492,7 @@ class TikTokIE(TikTokBaseIE):
             'creator': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
             'uploader_id': '6974687867511718913',
             'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
+            'channel_id': 'MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
             'track': 'Boka Dance',
             'artist': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
             'timestamp': 1626121503,
@@ -495,7 +503,7 @@ class TikTokIE(TikTokBaseIE):
             'like_count': int,
             'repost_count': int,
             'comment_count': int,
-        }
+        },
     }, {
         # Sponsored video, only available with feed workaround
         'url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_/video/7042692929109986561',
@@ -508,6 +516,7 @@ class TikTokIE(TikTokBaseIE):
             'creator': 'Slap And Run',
             'uploader_id': '7036055384943690754',
             'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
+            'channel_id': 'MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
             'track': 'Promoted Music',
             'timestamp': 1639754738,
             'duration': 30,
@@ -518,7 +527,6 @@ class TikTokIE(TikTokBaseIE):
             'repost_count': int,
             'comment_count': int,
         },
-        'expected_warnings': ['trying with webpage', 'Unable to find video in feed']
     }, {
         # Video without title and description
         'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694',
@@ -531,6 +539,7 @@ class TikTokIE(TikTokBaseIE):
             'creator': 'Pokemon',
             'uploader_id': '6820838815978423302',
             'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
+            'channel_id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
             'track': 'original sound',
             'timestamp': 1643714123,
             'duration': 6,
@@ -577,6 +586,7 @@ class TikTokIE(TikTokBaseIE):
             'uploader': '_le_cannibale_',
             'uploader_id': '6604511138619654149',
             'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
+            'channel_id': 'MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
             'artist': 'nathan !',
             'track': 'grahamscott canon',
             'upload_date': '20220905',
@@ -587,6 +597,33 @@ class TikTokIE(TikTokBaseIE):
             'comment_count': int,
             'thumbnail': r're:^https://.+\.webp',
         },
+    }, {
+        # only available via web
+        'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662',
+        'md5': '8d8c0be14127020cd9f5def4a2e6b411',
+        'info_dict': {
+            'id': '7206382937372134662',
+            'ext': 'mp4',
+            'title': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
+            'description': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
+            'creator': 'MoxyPatch',
+            'uploader': 'moxypatch',
+            'uploader_id': '7039142049363379205',
+            'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
+            'channel_id': 'MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
+            'artist': 'your worst nightmare',
+            'track': 'original sound',
+            'upload_date': '20230303',
+            'timestamp': 1677866781,
+            'duration': 10,
+            'view_count': int,
+            'like_count': int,
+            'repost_count': int,
+            'comment_count': int,
+            'thumbnail': r're:^https://.+',
+            'thumbnails': 'count:3',
+        },
+        'expected_warnings': ['Unable to find video in feed'],
     }, {
         # Auto-captions available
         'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',
@@ -612,7 +649,7 @@ def _real_extract(self, url):
             video_data = traverse_obj(sigi_data, ('ItemModule', video_id), expected_type=dict)
 
         if status == 0:
-            return self._parse_aweme_video_web(video_data, url)
+            return self._parse_aweme_video_web(video_data, url, video_id)
         elif status == 10216:
             raise ExtractorError('This video is private', expected=True)
         raise ExtractorError('Video not available', video_id=video_id)
@@ -839,6 +876,7 @@ class DouyinIE(TikTokBaseIE):
             'description': '#杨超越  小小水手带你去远航❤️',
             'uploader_id': '110403406559',
             'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
+            'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
             'creator': '杨超越',
             'duration': 19782,
             'timestamp': 1620905839,
@@ -848,6 +886,7 @@ class DouyinIE(TikTokBaseIE):
             'like_count': int,
             'repost_count': int,
             'comment_count': int,
+            'thumbnail': r're:https?://.+\.jpe?g',
         },
     }, {
         'url': 'https://www.douyin.com/video/6982497745948921092',
@@ -859,8 +898,9 @@ class DouyinIE(TikTokBaseIE):
             'description': '这个夏日和小羊@杨超越 一起遇见白色幻想',
             'uploader_id': '408654318141572',
             'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
+            'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
             'creator': '杨超越工作室',
-            'duration': 42608,
+            'duration': 42479,
             'timestamp': 1625739481,
             'upload_date': '20210708',
             'track': '@杨超越工作室创作的原声',
@@ -868,6 +908,7 @@ class DouyinIE(TikTokBaseIE):
             'like_count': int,
             'repost_count': int,
             'comment_count': int,
+            'thumbnail': r're:https?://.+\.jpe?g',
         },
     }, {
         'url': 'https://www.douyin.com/video/6953975910773099811',
@@ -879,8 +920,9 @@ class DouyinIE(TikTokBaseIE):
             'description': '#一起看海  出现在你的夏日里',
             'uploader_id': '110403406559',
             'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
+            'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
             'creator': '杨超越',
-            'duration': 17228,
+            'duration': 17343,
             'timestamp': 1619098692,
             'upload_date': '20210422',
             'track': '@杨超越创作的原声',
@@ -888,6 +930,7 @@ class DouyinIE(TikTokBaseIE):
             'like_count': int,
             'repost_count': int,
             'comment_count': int,
+            'thumbnail': r're:https?://.+\.jpe?g',
         },
     }, {
         'url': 'https://www.douyin.com/video/6950251282489675042',
@@ -916,6 +959,7 @@ class DouyinIE(TikTokBaseIE):
             'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
             'uploader_id': '110403406559',
             'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
+            'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
             'creator': '杨超越',
             'duration': 15115,
             'timestamp': 1621261163,
@@ -925,6 +969,7 @@ class DouyinIE(TikTokBaseIE):
             'like_count': int,
             'repost_count': int,
             'comment_count': int,
+            'thumbnail': r're:https?://.+\.jpe?g',
         },
     }]
     _APP_VERSIONS = [('23.3.0', '230300')]
@@ -956,7 +1001,7 @@ def _real_extract(self, url):
 
         render_data = self._parse_json(
             render_data_json, video_id, transform_source=compat_urllib_parse_unquote)
-        return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url)
+        return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url, video_id)
 
 
 class TikTokVMIE(InfoExtractor):

From 3f7e2bd80e3c5d8a1682f20a1b245fcd974f295d Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 13 Apr 2023 14:21:09 -0500
Subject: [PATCH 55/97] [FFmpegFixupM3u8PP] Check audio codec before fixup
 (#6778)

Closes #6673
Authored by: bashonly
---
 yt_dlp/postprocessor/ffmpeg.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py
index 63fc9ace6..323f4303c 100644
--- a/yt_dlp/postprocessor/ffmpeg.py
+++ b/yt_dlp/postprocessor/ffmpeg.py
@@ -898,8 +898,11 @@ def _needs_fixup(self, info):
     @PostProcessor._restrict_to(images=False)
     def run(self, info):
         if all(self._needs_fixup(info)):
+            args = ['-f', 'mp4']
+            if self.get_audio_codec(info['filepath']) == 'aac':
+                args.extend(['-bsf:a', 'aac_adtstoasc'])
             self._fixup('Fixing MPEG-TS in MP4 container', info['filepath'], [
-                *self.stream_copy_opts(), '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'])
+                *self.stream_copy_opts(), *args])
         return [], info
 
 

From 93e7c6995e07dafb9dcc06c0d06acf6c5bdfecc5 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 13 Apr 2023 14:36:06 -0500
Subject: [PATCH 56/97] [extractor/generic] Attempt to detect live HLS (#6775)

* Extract duration for non-live generic HLS videos
* Add extractor-arg `is_live` to bypass live HLS check

Closes #6705
Authored by: bashonly
---
 README.md                   |  1 +
 yt_dlp/extractor/generic.py | 63 +++++++++++++++++++++++++++++++------
 2 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 3e8484314..35229f728 100644
--- a/README.md
+++ b/README.md
@@ -1800,6 +1800,7 @@ #### generic
 * `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments if no value is provided, or else apply the query string given as `fragment_query=VALUE`. Does not apply to ffmpeg
 * `variant_query`: Passthrough the master m3u8 URL query to its variant playlist URLs if no value is provided, or else apply the query string given as `variant_query=VALUE`
 * `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
+* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
 
 #### funimation
 * `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 75355aeb5..87cf11d6b 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -14,6 +14,7 @@
     ExtractorError,
     UnsupportedError,
     determine_ext,
+    determine_protocol,
     dict_get,
     extract_basic_auth,
     format_field,
@@ -867,7 +868,7 @@ class GenericIE(InfoExtractor):
             },
         },
         {
-            # Video.js embed, multiple formats
+            # Youtube embed, formerly: Video.js embed, multiple formats
             'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
             'info_dict': {
                 'id': 'yygqldloqIk',
@@ -894,6 +895,7 @@ class GenericIE(InfoExtractor):
             'params': {
                 'skip_download': True,
             },
+            'skip': '404 Not Found',
         },
         # rtl.nl embed
         {
@@ -2169,6 +2171,33 @@ class GenericIE(InfoExtractor):
                 'age_limit': 18,
             },
         },
+        {
+            'note': 'Live HLS direct link',
+            'url': 'https://d18j67ugtrocuq.cloudfront.net/out/v1/2767aec339144787926bd0322f72c6e9/index.m3u8',
+            'info_dict': {
+                'id': 'index',
+                'title': r're:index',
+                'ext': 'mp4',
+                'live_status': 'is_live',
+            },
+            'params': {
+                'skip_download': 'm3u8',
+            },
+        },
+        {
+            'note': 'Video.js VOD HLS',
+            'url': 'https://gist.githubusercontent.com/bashonly/2aae0862c50f4a4b84f220c315767208/raw/e3380d413749dabbe804c9c2d8fd9a45142475c7/videojs_hls_test.html',
+            'info_dict': {
+                'id': 'videojs_hls_test',
+                'title': 'video',
+                'ext': 'mp4',
+                'age_limit': 0,
+                'duration': 1800,
+            },
+            'params': {
+                'skip_download': 'm3u8',
+            },
+        },
     ]
 
     def report_following_redirect(self, new_url):
@@ -2205,6 +2234,22 @@ def _extra_manifest_info(self, info, manifest_url):
             for fmt in self._downloader._get_formats(info):
                 fmt['url'] = update_url_query(fmt['url'], query)
 
+        # Attempt to detect live HLS or set VOD duration
+        m3u8_format = next((f for f in self._downloader._get_formats(info)
+                            if determine_protocol(f) == 'm3u8_native'), None)
+        if m3u8_format:
+            is_live = self._configuration_arg('is_live', [None])[0]
+            if is_live is not None:
+                info['live_status'] = 'not_live' if is_live == 'false' else 'is_live'
+                return
+            headers = m3u8_format.get('http_headers') or info.get('http_headers')
+            duration = self._extract_m3u8_vod_duration(
+                m3u8_format['url'], info.get('id'), note='Checking m3u8 live status',
+                errnote='Failed to download m3u8 media playlist', headers=headers)
+            if not duration:
+                info['live_status'] = 'is_live'
+            info['duration'] = info.get('duration') or duration
+
     def _extract_rss(self, url, video_id, doc):
         NS_MAP = {
             'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
@@ -2580,8 +2625,7 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
             varname = mobj.group(1)
             sources = variadic(self._parse_json(
                 mobj.group(2), video_id, transform_source=js_to_json, fatal=False) or [])
-            formats = []
-            subtitles = {}
+            formats, subtitles, src = [], {}, None
             for source in sources:
                 src = source.get('src')
                 if not src or not isinstance(src, str):
@@ -2604,8 +2648,6 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
                         m3u8_id='hls', fatal=False)
                     formats.extend(fmts)
                     self._merge_subtitles(subs, target=subtitles)
-                for fmt in formats:
-                    self._extra_manifest_info(fmt, src)
 
                 if not formats:
                     formats.append({
@@ -2621,11 +2663,11 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
             for sub_match in re.finditer(rf'(?s){re.escape(varname)}' r'\.addRemoteTextTrack\(({.+?})\s*,\s*(?:true|false)\)', webpage):
                 sub = self._parse_json(
                     sub_match.group(1), video_id, transform_source=js_to_json, fatal=False) or {}
-                src = str_or_none(sub.get('src'))
-                if not src:
+                sub_src = str_or_none(sub.get('src'))
+                if not sub_src:
                     continue
                 subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({
-                    'url': urllib.parse.urljoin(url, src),
+                    'url': urllib.parse.urljoin(url, sub_src),
                     'name': sub.get('label'),
                     'http_headers': {
                         'Referer': actual_url,
@@ -2633,7 +2675,10 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
                 })
             if formats or subtitles:
                 self.report_detected('video.js embed')
-                return [{'formats': formats, 'subtitles': subtitles}]
+                info_dict = {'formats': formats, 'subtitles': subtitles}
+                if formats:
+                    self._extra_manifest_info(info_dict, src)
+                return [info_dict]
 
         # Look for generic KVS player (before json-ld bc of some urls that break otherwise)
         found = self._search_regex((

From 7666b93604b97e9ada981c6b04ccf5605dd1bd44 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Fri, 14 Apr 2023 07:58:36 +0000
Subject: [PATCH 57/97] [extractor/youtube] Define strict uploader metadata
 mapping (#6384)

New mapping:
```
channel -> channel name
channel_id -> UCID
channel_url -> UCID channel url

uploader -> channel name (same as channel field)
uploader_id -> @handle
uploader_url -> @handle channel url
```

Authored by: coletdjnz
---
 yt_dlp/extractor/youtube.py | 709 +++++++++++++++++++-----------------
 1 file changed, 371 insertions(+), 338 deletions(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index d6a55e953..2b17751e5 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -458,6 +458,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 
     _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
 
+    _YT_HANDLE_RE = r'@[\w.-]{3,30}'  # https://support.google.com/youtube/answer/11585688?hl=en
+    _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
+
+    def ucid_or_none(self, ucid):
+        return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
+
+    def handle_or_none(self, handle):
+        return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
+
+    def handle_from_url(self, url):
+        return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
+                                  url, 'channel handle', default=None)
+
+    def ucid_from_url(self, url):
+        return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
+                                  url, 'channel id', default=None)
+
     @functools.cached_property
     def _preferred_lang(self):
         """
@@ -992,6 +1009,8 @@ def _extract_video(self, renderer):
         if not channel_id:
             channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
 
+        channel_id = self.ucid_or_none(channel_id)
+
         overlay_style = traverse_obj(
             renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
             get_all=False, expected_type=str)
@@ -1233,9 +1252,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'BaW_jenozKc',
                 'ext': 'mp4',
                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
-                'uploader': 'Philipp Hagemeister',
-                'uploader_id': 'phihag',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
                 'channel': 'Philipp Hagemeister',
                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
@@ -1254,7 +1270,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'start_time': 1,
                 'end_time': 9,
                 'comment_count': int,
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'Philipp Hagemeister',
+                'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
+                'uploader_id': '@PhilippHagemeister',
             }
         },
         {
@@ -1266,9 +1285,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'upload_date': '20120608',
                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
-                'uploader': 'SET India',
-                'uploader_id': 'setindia',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
                 'age_limit': 18,
             },
             'skip': 'Private video',
@@ -1280,9 +1296,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'BaW_jenozKc',
                 'ext': 'mp4',
                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
-                'uploader': 'Philipp Hagemeister',
-                'uploader_id': 'phihag',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
                 'channel': 'Philipp Hagemeister',
                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
@@ -1299,7 +1312,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'live_status': 'not_live',
                 'age_limit': 0,
                 'comment_count': int,
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'Philipp Hagemeister',
+                'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
+                'uploader_id': '@PhilippHagemeister',
             },
             'params': {
                 'skip_download': True,
@@ -1312,10 +1328,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'a9LDPn-MO4I',
                 'ext': 'm4a',
                 'upload_date': '20121002',
-                'uploader_id': '8KVIDEO',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
                 'description': '',
-                'uploader': '8KVIDEO',
                 'title': 'UHDTV TEST 8K VIDEO.mp4'
             },
             'params': {
@@ -1333,8 +1346,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
                 'duration': 244,
-                'uploader': 'AfrojackVEVO',
-                'uploader_id': 'AfrojackVEVO',
                 'upload_date': '20131011',
                 'abr': 129.495,
                 'like_count': int,
@@ -1346,13 +1357,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'live_status': 'not_live',
                 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
                 'channel': 'Afrojack',
-                'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
                 'tags': 'count:19',
                 'availability': 'public',
                 'categories': ['Music'],
                 'age_limit': 0,
                 'alt_title': 'The Spark',
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'Afrojack',
+                'uploader_url': 'https://www.youtube.com/@Afrojack',
+                'uploader_id': '@Afrojack',
             },
             'params': {
                 'youtube_include_dash_manifest': True,
@@ -1369,9 +1382,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
                 'duration': 142,
-                'uploader': 'The Witcher',
-                'uploader_id': 'WitcherGame',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
                 'upload_date': '20140605',
                 'age_limit': 18,
                 'categories': ['Gaming'],
@@ -1385,7 +1395,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
                 'playable_in_embed': True,
                 'view_count': int,
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'The Witcher',
+                'uploader_url': 'https://www.youtube.com/@thewitcher',
+                'uploader_id': '@thewitcher',
             },
         },
         {
@@ -1397,12 +1410,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'Godzilla 2 (Official Video)',
                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
                 'upload_date': '20200408',
-                'uploader_id': 'FlyingKitty900',
-                'uploader': 'FlyingKitty',
                 'age_limit': 18,
                 'availability': 'needs_auth',
                 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
-                'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
                 'channel': 'FlyingKitty',
                 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
                 'view_count': int,
@@ -1413,7 +1423,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'like_count': int,
                 'duration': 177,
                 'playable_in_embed': True,
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'FlyingKitty',
+                'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
+                'uploader_id': '@FlyingKitty900',
             },
         },
         {
@@ -1424,13 +1437,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
                 'ext': 'mp4',
                 'upload_date': '20191228',
-                'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
-                'uploader': 'Projekt Melody',
                 'description': 'md5:17eccca93a786d51bc67646756894066',
                 'age_limit': 18,
                 'like_count': int,
                 'availability': 'needs_auth',
-                'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
                 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
                 'view_count': int,
                 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
@@ -1442,7 +1452,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'duration': 106,
                 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
                 'comment_count': int,
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'Projekt Melody',
+                'uploader_url': 'https://www.youtube.com/@ProjektMelody',
+                'uploader_id': '@ProjektMelody',
             },
         },
         {
@@ -1452,8 +1465,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'MeJVWBSsPAY',
                 'ext': 'mp4',
                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
-                'uploader': 'Herr Lurik',
-                'uploader_id': 'st3in234',
                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
                 'upload_date': '20130730',
                 'track': 'Such mich find mich',
@@ -1470,11 +1481,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
                 'categories': ['Music'],
                 'availability': 'public',
-                'uploader_url': 'http://www.youtube.com/user/st3in234',
                 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
                 'live_status': 'not_live',
                 'artist': 'OOMPH!',
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'Herr Lurik',
+                'uploader_url': 'https://www.youtube.com/@HerrLurik',
+                'uploader_id': '@HerrLurik',
             },
         },
         {
@@ -1491,11 +1504,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'duration': 266,
                 'upload_date': '20100430',
-                'uploader_id': 'deadmau5',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
                 'creator': 'deadmau5',
                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
-                'uploader': 'deadmau5',
                 'title': 'Deadmau5 - Some Chords (HD)',
                 'alt_title': 'Some Chords',
                 'availability': 'public',
@@ -1513,7 +1523,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
                 'categories': ['Music'],
                 'album': 'Some Chords',
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'deadmau5',
+                'uploader_url': 'https://www.youtube.com/@deadmau5',
+                'uploader_id': '@deadmau5',
             },
             'expected_warnings': [
                 'DASH manifest missing',
@@ -1527,10 +1540,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'duration': 6085,
                 'upload_date': '20150827',
-                'uploader_id': 'olympic',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
                 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
-                'uploader': 'Olympics',
                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
                 'like_count': int,
                 'release_timestamp': 1343767800,
@@ -1546,7 +1556,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'live_status': 'was_live',
                 'view_count': int,
                 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'Olympics',
+                'uploader_url': 'https://www.youtube.com/@Olympics',
+                'uploader_id': '@Olympics',
             },
             'params': {
                 'skip_download': 'requires avconv',
@@ -1561,10 +1574,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'stretched_ratio': 16 / 9.,
                 'duration': 85,
                 'upload_date': '20110310',
-                'uploader_id': 'AllenMeow',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
-                'uploader': '孫ᄋᄅ',
                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
                 'playable_in_embed': True,
                 'channel': '孫ᄋᄅ',
@@ -1579,7 +1589,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'live_status': 'not_live',
                 'availability': 'unlisted',
                 'comment_count': int,
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': '孫ᄋᄅ',
+                'uploader_url': 'https://www.youtube.com/@AllenMeow',
+                'uploader_id': '@AllenMeow',
             },
         },
         # url_encoded_fmt_stream_map is empty string
@@ -1591,8 +1604,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
                 'description': '',
                 'upload_date': '20150404',
-                'uploader_id': 'spbelect',
-                'uploader': 'Наблюдатели Петербурга',
             },
             'params': {
                 'skip_download': 'requires avconv',
@@ -1609,9 +1620,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
                 'duration': 220,
                 'upload_date': '20150625',
-                'uploader_id': 'dorappi2000',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
-                'uploader': 'dorappi2000',
                 'formats': 'mincount:31',
             },
             'skip': 'not actual anymore',
@@ -1624,9 +1632,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'CsmdDsKjzN8',
                 'ext': 'mp4',
                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
-                'uploader': 'Airtek',
                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
-                'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
             },
             'params': {
@@ -1637,6 +1643,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         },
         {
             # Multifeed videos (multiple cameras), URL can be of any Camera
+            # TODO: fix multifeed titles
             'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
             'info_dict': {
                 'id': 'zaPI8MvL8pg',
@@ -1648,16 +1655,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'id': 'j5yGuxZ8lLU',
                     'ext': 'mp4',
                     'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
-                    'uploader': 'WiiLikeToPlay',
                     'description': 'md5:563ccbc698b39298481ca3c571169519',
-                    'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
                     'duration': 10120,
                     'channel_follower_count': int,
                     'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
                     'availability': 'public',
                     'playable_in_embed': True,
                     'upload_date': '20131105',
-                    'uploader_id': 'WiiRikeToPray',
                     'categories': ['Gaming'],
                     'live_status': 'was_live',
                     'tags': 'count:24',
@@ -1670,17 +1674,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'channel': 'WiiLikeToPlay',
                     'view_count': int,
                     'release_date': '20131106',
+                    'uploader': 'WiiLikeToPlay',
+                    'uploader_id': '@WLTP',
+                    'uploader_url': 'https://www.youtube.com/@WLTP',
                 },
             }, {
                 'info_dict': {
                     'id': 'zaPI8MvL8pg',
                     'ext': 'mp4',
                     'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
-                    'uploader_id': 'WiiRikeToPray',
                     'availability': 'public',
                     'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
                     'channel': 'WiiLikeToPlay',
-                    'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
                     'channel_follower_count': int,
                     'description': 'md5:563ccbc698b39298481ca3c571169519',
                     'duration': 10108,
@@ -1688,7 +1693,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'like_count': int,
                     'tags': 'count:24',
                     'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
-                    'uploader': 'WiiLikeToPlay',
                     'release_timestamp': 1383701915,
                     'comment_count': int,
                     'upload_date': '20131105',
@@ -1698,6 +1702,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'live_status': 'was_live',
                     'categories': ['Gaming'],
                     'view_count': int,
+                    'uploader': 'WiiLikeToPlay',
+                    'uploader_id': '@WLTP',
+                    'uploader_url': 'https://www.youtube.com/@WLTP',
                 },
             }, {
                 'info_dict': {
@@ -1711,12 +1718,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'playable_in_embed': True,
                     'upload_date': '20131105',
                     'description': 'md5:563ccbc698b39298481ca3c571169519',
-                    'uploader_id': 'WiiRikeToPray',
-                    'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
                     'channel_follower_count': int,
                     'tags': 'count:24',
                     'release_date': '20131106',
-                    'uploader': 'WiiLikeToPlay',
                     'comment_count': int,
                     'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
                     'channel': 'WiiLikeToPlay',
@@ -1726,6 +1730,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'age_limit': 0,
                     'duration': 10128,
                     'view_count': int,
+                    'uploader': 'WiiLikeToPlay',
+                    'uploader_id': '@WLTP',
+                    'uploader_url': 'https://www.youtube.com/@WLTP',
                 },
             }],
             'params': {'skip_download': True},
@@ -1762,9 +1769,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
                 'duration': 133,
                 'upload_date': '20151119',
-                'uploader_id': 'IronSoulElf',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
-                'uploader': 'IronSoulElf',
                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
                 'track': 'Dark Walk',
                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
@@ -1801,8 +1805,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
                 'upload_date': '20151107',
-                'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
-                'uploader': 'CH GAMER DROID',
             },
             'params': {
                 'skip_download': True,
@@ -1824,9 +1826,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
                 'duration': 721,
                 'upload_date': '20150128',
-                'uploader_id': 'BerkmanCenter',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
-                'uploader': 'The Berkman Klein Center for Internet & Society',
                 'license': 'Creative Commons Attribution license (reuse allowed)',
                 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
                 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
@@ -1840,16 +1839,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
                 'live_status': 'not_live',
                 'playable_in_embed': True,
-                'comment_count': int,
                 'channel_follower_count': int,
                 'chapters': list,
+                'uploader': 'The Berkman Klein Center for Internet & Society',
+                'uploader_id': '@BKCHarvard',
+                'uploader_url': 'https://www.youtube.com/@BKCHarvard',
             },
             'params': {
                 'skip_download': True,
             },
         },
         {
-            # Channel-like uploader_url
             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
             'info_dict': {
                 'id': 'eQcmzGIKrzg',
@@ -1858,9 +1858,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
                 'duration': 4060,
                 'upload_date': '20151120',
-                'uploader': 'Bernie Sanders',
-                'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
                 'license': 'Creative Commons Attribution license (reuse allowed)',
                 'playable_in_embed': True,
                 'tags': 'count:12',
@@ -1877,6 +1874,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'comment_count': int,
                 'channel_follower_count': int,
                 'chapters': list,
+                'uploader': 'Bernie Sanders',
+                'uploader_url': 'https://www.youtube.com/@BernieSanders',
+                'uploader_id': '@BernieSanders',
             },
             'params': {
                 'skip_download': True,
@@ -1900,9 +1900,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'Piku - Trailer',
                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
                 'upload_date': '20150811',
-                'uploader': 'FlixMatrix',
-                'uploader_id': 'FlixMatrixKaravan',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
                 'license': 'Standard YouTube License',
             },
             'params': {
@@ -1920,9 +1917,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
                 'duration': 2085,
                 'upload_date': '20170118',
-                'uploader': 'Vsauce',
-                'uploader_id': 'Vsauce',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
                 'series': 'Mind Field',
                 'season_number': 1,
                 'episode_number': 1,
@@ -1940,7 +1934,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'like_count': int,
                 'playable_in_embed': True,
                 'live_status': 'not_live',
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'Vsauce',
+                'uploader_url': 'https://www.youtube.com/@Vsauce',
+                'uploader_id': '@Vsauce',
             },
             'params': {
                 'skip_download': True,
@@ -1960,9 +1957,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
                 'duration': 965,
                 'upload_date': '20140124',
-                'uploader': 'New Century Foundation',
-                'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
             },
             'params': {
                 'skip_download': True,
@@ -2007,9 +2001,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
                 'duration': 433,
                 'upload_date': '20130923',
-                'uploader': 'Amelia Putri Harwita',
-                'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
                 'formats': 'maxcount:10',
             },
             'params': {
@@ -2020,6 +2011,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         },
         {
             # Youtube Music Auto-generated description
+            # TODO: fix metadata extraction
             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
             'info_dict': {
                 'id': 'MgNrAu2pzNs',
@@ -2027,8 +2019,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'Voyeur Girl',
                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
                 'upload_date': '20190312',
-                'uploader': 'Stephen - Topic',
-                'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
                 'artist': 'Stephen',
                 'track': 'Voyeur Girl',
                 'album': 'it\'s too much love to know my dear',
@@ -2036,12 +2026,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'release_year': 2019,
                 'alt_title': 'Voyeur Girl',
                 'view_count': int,
-                'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
                 'playable_in_embed': True,
                 'like_count': int,
                 'categories': ['Music'],
                 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
-                'channel': 'Stephen',
+                'channel': 'Stephen',  # TODO: should be "Stephen - Topic"
+                'uploader': 'Stephen',
                 'availability': 'public',
                 'creator': 'Stephen',
                 'duration': 169,
@@ -2069,9 +2059,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
                 'upload_date': '20090125',
-                'uploader': 'Prochorowka',
-                'uploader_id': 'Prochorowka',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
                 'artist': 'Panjabi MC',
                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
@@ -2090,11 +2077,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'IMG 3456',
                 'description': '',
                 'upload_date': '20170613',
-                'uploader_id': 'ElevageOrVert',
-                'uploader': 'ElevageOrVert',
                 'view_count': int,
                 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
-                'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
                 'like_count': int,
                 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
                 'tags': [],
@@ -2105,8 +2089,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'duration': 7,
                 'playable_in_embed': True,
                 'live_status': 'not_live',
-                'channel': 'ElevageOrVert',
-                'channel_follower_count': int
+                'channel': 'l\'Or Vert asbl',
+                'channel_follower_count': int,
+                'uploader': 'l\'Or Vert asbl',
+                'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
+                'uploader_id': '@ElevageOrVert',
             },
             'params': {
                 'skip_download': True,
@@ -2124,11 +2111,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'Part 77   Sort a list of simple types in c#',
                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
                 'upload_date': '20130831',
-                'uploader_id': 'kudvenkat',
-                'uploader': 'kudvenkat',
                 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
                 'like_count': int,
-                'uploader_url': 'http://www.youtube.com/user/kudvenkat',
                 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
                 'live_status': 'not_live',
                 'categories': ['Education'],
@@ -2143,6 +2127,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'comment_count': int,
                 'channel_follower_count': int,
                 'chapters': list,
+                'uploader': 'kudvenkat',
+                'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
+                'uploader_id': '@Csharp-video-tutorialsBlogspot',
             },
             'params': {
                 'skip_download': True,
@@ -2166,9 +2153,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'Burn Out',
                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
                 'upload_date': '20141120',
-                'uploader': 'The Cinematic Orchestra - Topic',
-                'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
                 'artist': 'The Cinematic Orchestra',
                 'track': 'Burn Out',
                 'album': 'Every Day',
@@ -2187,7 +2171,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
                 'categories': ['Music'],
                 'playable_in_embed': True,
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'The Cinematic Orchestra',
+                'comment_count': int,
             },
             'params': {
                 'skip_download': True,
@@ -2206,13 +2192,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
-                'uploader': 'CBS Mornings',
-                'uploader_id': 'CBSThisMorning',
                 'upload_date': '20140716',
                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
                 'duration': 170,
                 'categories': ['News & Politics'],
-                'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
                 'view_count': int,
                 'channel': 'CBS Mornings',
                 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
@@ -2223,7 +2206,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'like_count': int,
                 'live_status': 'not_live',
                 'playable_in_embed': True,
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'CBS Mornings',
+                'uploader_url': 'https://www.youtube.com/@CBSMornings',
+                'uploader_id': '@CBSMornings',
             }
         },
         {
@@ -2235,9 +2221,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
                 'upload_date': '20201120',
-                'uploader': 'Walk around Japan',
-                'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
                 'duration': 1456,
                 'categories': ['Travel & Events'],
                 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
@@ -2250,7 +2233,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
                 'live_status': 'not_live',
                 'playable_in_embed': True,
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'Walk around Japan',
+                'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
+                'uploader_id': '@walkaroundjapan7124',
             },
             'params': {
                 'skip_download': True,
@@ -2276,13 +2262,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': '3gp',
                 'upload_date': '20210624',
                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
-                'uploader': 'colinfurze',
-                'uploader_id': 'colinfurze',
                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
                 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
                 'duration': 596,
                 'categories': ['Entertainment'],
-                'uploader_url': 'http://www.youtube.com/user/colinfurze',
                 'view_count': int,
                 'channel': 'colinfurze',
                 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
@@ -2294,6 +2277,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'playable_in_embed': True,
                 'channel_follower_count': int,
                 'chapters': list,
+                'uploader': 'colinfurze',
+                'uploader_url': 'https://www.youtube.com/@colinfurze',
+                'uploader_id': '@colinfurze',
             },
             'params': {
                 'format': '17',  # 3gp format available on android
@@ -2319,10 +2305,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mhtml',
                 'format_id': 'sb0',
                 'title': 'Your Brain is Plastic',
-                'uploader_id': 'scishow',
                 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
                 'upload_date': '20140324',
-                'uploader': 'SciShow',
                 'like_count': int,
                 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
                 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
@@ -2330,7 +2314,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
                 'playable_in_embed': True,
                 'tags': 'count:12',
-                'uploader_url': 'http://www.youtube.com/user/scishow',
                 'availability': 'public',
                 'channel': 'SciShow',
                 'live_status': 'not_live',
@@ -2339,6 +2322,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'age_limit': 0,
                 'channel_follower_count': int,
                 'chapters': list,
+                'uploader': 'SciShow',
+                'uploader_url': 'https://www.youtube.com/@SciShow',
+                'uploader_id': '@SciShow',
             }, 'params': {'format': 'mhtml', 'skip_download': True}
         }, {
             # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
@@ -2348,9 +2334,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'title': 'The NP that test your phone performance 🙂',
                 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
-                'uploader': 'Leon Nguyen',
-                'uploader_id': 'VNSXIII',
-                'uploader_url': 'http://www.youtube.com/user/VNSXIII',
                 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
                 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
                 'duration': 21,
@@ -2366,7 +2349,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'channel': 'Leon Nguyen',
                 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
                 'comment_count': int,
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'Leon Nguyen',
+                'uploader_url': 'https://www.youtube.com/@LeonNguyen',
+                'uploader_id': '@LeonNguyen',
             }
         }, {
             # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
@@ -2376,9 +2362,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'title': 'The NP that test your phone performance 🙂',
                 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
-                'uploader': 'Leon Nguyen',
-                'uploader_id': 'VNSXIII',
-                'uploader_url': 'http://www.youtube.com/user/VNSXIII',
                 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
                 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
                 'duration': 21,
@@ -2394,7 +2377,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'channel': 'Leon Nguyen',
                 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
                 'comment_count': int,
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'Leon Nguyen',
+                'uploader_url': 'https://www.youtube.com/@LeonNguyen',
+                'uploader_id': '@LeonNguyen',
             },
             'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
         }, {
@@ -2404,10 +2390,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'mzZzzBU6lrM',
                 'ext': 'mp4',
                 'title': 'I Met GeorgeNotFound In Real Life...',
-                'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
-                'uploader': 'Quackity',
-                'uploader_id': 'QuackityHQ',
-                'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
+                'description': 'md5:978296ec9783a031738b684d4ebf302d',
                 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
                 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
                 'duration': 955,
@@ -2424,7 +2407,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'availability': 'public',
                 'channel': 'Quackity',
                 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
-                'channel_follower_count': int
+                'channel_follower_count': int,
+                'uploader': 'Quackity',
+                'uploader_id': '@Quackity',
+                'uploader_url': 'https://www.youtube.com/@Quackity',
             }
         },
         {   # continuous livestream. Microformat upload date should be preferred.
@@ -2442,19 +2428,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
                 'live_status': 'is_live',
                 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
-                'uploader': '阿鲍Abao',
-                'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
                 'channel': 'Abao in Tokyo',
                 'channel_follower_count': int,
                 'release_date': '20211127',
                 'tags': 'count:39',
                 'categories': ['People & Blogs'],
                 'like_count': int,
-                'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
                 'view_count': int,
                 'playable_in_embed': True,
                 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
                 'concurrent_view_count': int,
+                'uploader': 'Abao in Tokyo',
+                'uploader_url': 'https://www.youtube.com/@abaointokyo',
+                'uploader_id': '@abaointokyo',
             },
             'params': {'skip_download': True}
         }, {
@@ -2471,15 +2457,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'Story',
                 'channel': 'IT\'S HISTORY',
                 'description': '',
-                'uploader_id': 'BlastfromthePast',
                 'duration': 12,
-                'uploader': 'IT\'S HISTORY',
                 'playable_in_embed': True,
                 'age_limit': 0,
                 'live_status': 'not_live',
                 'tags': [],
                 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
-                'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
                 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
             },
             'skip': 'stories get removed after some period of time',
@@ -2492,11 +2475,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'upload_date': '20220323',
                 'like_count': int,
                 'availability': 'unlisted',
-                'channel': 'nao20010128nao',
-                'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
+                'channel': 'Lesmiscore',
+                'thumbnail': r're:^https?://.*\.jpg',
                 'age_limit': 0,
-                'uploader': 'nao20010128nao',
-                'uploader_id': 'nao20010128nao',
                 'categories': ['Music'],
                 'view_count': int,
                 'description': '',
@@ -2507,7 +2488,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'channel_follower_count': int,
                 'duration': 6,
                 'tags': [],
-                'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
+                'uploader_id': '@lesmiscore',
+                'uploader': 'Lesmiscore',
+                'uploader_url': 'https://www.youtube.com/@lesmiscore',
             }
         }, {
             # Prefer primary title+description language metadata by default
@@ -2525,16 +2508,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'like_count': int,
                 'playable_in_embed': True,
                 'availability': 'unlisted',
-                'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
+                'thumbnail': r're:^https?://.*\.jpg',
                 'age_limit': 0,
                 'duration': 5,
-                'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
-                'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
                 'live_status': 'not_live',
                 'upload_date': '20220908',
                 'categories': ['People & Blogs'],
-                'uploader': 'cole-dlp-test-acc',
                 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+                'uploader_url': 'https://www.youtube.com/@coletdjnz',
+                'uploader_id': '@coletdjnz',
+                'uploader': 'cole-dlp-test-acc',
             },
             'params': {'skip_download': True}
         }, {
@@ -2549,18 +2532,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'live_status': 'not_live',
                 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
                 'upload_date': '20220728',
-                'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
                 'view_count': int,
                 'categories': ['People & Blogs'],
-                'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
+                'thumbnail': r're:^https?://.*\.jpg',
                 'title': 'dlp test video title translated (fr)',
                 'availability': 'public',
-                'uploader': 'cole-dlp-test-acc',
                 'age_limit': 0,
                 'description': 'dlp test video description translated (fr)',
                 'playable_in_embed': True,
                 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
-                'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+                'uploader_url': 'https://www.youtube.com/@coletdjnz',
+                'uploader_id': '@coletdjnz',
+                'uploader': 'cole-dlp-test-acc',
             },
             'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
             'expected_warnings': [r'Preferring "fr" translated fields'],
@@ -2576,7 +2559,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'categories': ['Entertainment'],
                 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
-                'uploader_url': 'http://www.youtube.com/user/MrBeast6000',
                 'live_status': 'not_live',
                 'duration': 937,
                 'channel_follower_count': int,
@@ -2586,14 +2568,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'playable_in_embed': True,
                 'view_count': int,
                 'upload_date': '20221112',
-                'uploader': 'MrBeast',
-                'uploader_id': 'MrBeast6000',
                 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
                 'age_limit': 0,
                 'availability': 'public',
                 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
                 'like_count': int,
                 'tags': [],
+                'uploader': 'MrBeast',
+                'uploader_url': 'https://www.youtube.com/@MrBeast',
+                'uploader_id': '@MrBeast',
             },
             'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
         }, {
@@ -2601,14 +2584,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
             'info_dict': {
                 'id': 'Tq92D6wQ1mg',
-                'ext': 'weba',
+                'ext': 'webm',
                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
                 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
                 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
                 'channel_follower_count': int,
                 'description': 'md5:17eccca93a786d51bc67646756894066',
                 'upload_date': '20191228',
-                'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
                 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
                 'playable_in_embed': True,
                 'like_count': int,
@@ -2616,13 +2598,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
                 'age_limit': 18,
                 'channel': 'Projekt Melody',
-                'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
                 'view_count': int,
                 'availability': 'needs_auth',
                 'comment_count': int,
                 'live_status': 'not_live',
-                'uploader': 'Projekt Melody',
                 'duration': 106,
+                'uploader': 'Projekt Melody',
+                'uploader_id': '@ProjektMelody',
+                'uploader_url': 'https://www.youtube.com/@ProjektMelody',
             },
             'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
         },
@@ -2632,13 +2615,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'qVv6vCqciTM',
                 'ext': 'mp4',
                 'age_limit': 0,
-                'uploader_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
                 'comment_count': int,
                 'chapters': 'count:13',
                 'upload_date': '20221223',
                 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
                 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
-                'uploader_url': 'http://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
                 'like_count': int,
                 'release_date': '20221223',
                 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
@@ -2655,6 +2636,40 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'channel': 'さなちゃんねる',
                 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
                 'uploader': 'さなちゃんねる',
+                'uploader_url': 'https://www.youtube.com/@sana_natori',
+                'uploader_id': '@sana_natori',
+            },
+        },
+        {
+            # Fallbacks when webpage and web client is unavailable
+            'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
+            'info_dict': {
+                'id': 'wSSmNUl9Snw',
+                'ext': 'mp4',
+                # 'categories': ['Science & Technology'],
+                'view_count': int,
+                'chapters': 'count:2',
+                'channel': 'Scott Manley',
+                'like_count': int,
+                'age_limit': 0,
+                # 'availability': 'public',
+                'channel_follower_count': int,
+                'live_status': 'not_live',
+                'upload_date': '20170831',
+                'duration': 682,
+                'tags': 'count:8',
+                'uploader_url': 'https://www.youtube.com/@scottmanley',
+                'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
+                'uploader': 'Scott Manley',
+                'uploader_id': '@scottmanley',
+                'title': 'The Computer Hack That Saved Apollo 14',
+                'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
+                'thumbnail': r're:^https?://.*\.webp',
+                'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
+                'playable_in_embed': True,
+            },
+            'params': {
+                'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
             },
         },
     ]
@@ -2670,8 +2685,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
                 'upload_date': '20080526',
                 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
-                'uploader': 'Christopher Sykes',
-                'uploader_id': 'ChristopherJSykes',
                 'age_limit': 0,
                 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
                 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
@@ -2687,7 +2700,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'view_count': int,
                 'categories': ['Science & Technology'],
                 'channel_follower_count': int,
-                'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
+                'uploader': 'Christopher Sykes',
+                'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
+                'uploader_id': '@ChristopherSykesDocumentaries',
             },
             'params': {
                 'skip_download': True,
@@ -4097,10 +4112,10 @@ def feed_entry(name):
         self._downloader._sort_thumbnails(original_thumbnails)
 
         category = get_first(microformats, 'category') or search_meta('genre')
-        channel_id = str_or_none(
+        channel_id = self.ucid_or_none(str_or_none(
             get_first(video_details, 'channelId')
             or get_first(microformats, 'externalChannelId')
-            or search_meta('channelId'))
+            or search_meta('channelId')))
         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
 
         live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
@@ -4127,6 +4142,8 @@ def is_bad_format(fmt):
 
         formats.extend(self._extract_storyboard(player_responses, duration))
 
+        channel_handle = self.handle_from_url(owner_profile_url)
+
         info = {
             'id': video_id,
             'title': video_title,
@@ -4136,11 +4153,8 @@ def is_bad_format(fmt):
             # URL checking if user don't care about getting the best possible thumbnail
             'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
             'description': video_description,
-            'uploader': get_first(video_details, 'author'),
-            'uploader_id': self._search_regex(r'/(?:channel/|user/|(?=@))([^/?&#]+)', owner_profile_url, 'uploader id', default=None),
-            'uploader_url': owner_profile_url,
             'channel_id': channel_id,
-            'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
+            'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),
             'duration': duration,
             'view_count': int_or_none(
                 get_first((video_details, microformats), (..., 'viewCount'))
@@ -4369,6 +4383,13 @@ def process_language(container, base_url, lang_code, sub_name, query):
                 'channel': self._get_text(vor, 'title'),
                 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
 
+            if not channel_handle:
+                channel_handle = self.handle_from_url(
+                    traverse_obj(vor, (
+                        ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
+                        (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
+                        {str}), get_all=False))
+
             rows = try_get(
                 vsir,
                 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
@@ -4395,12 +4416,11 @@ def process_language(container, base_url, lang_code, sub_name, query):
                     elif mrr_title == 'Song':
                         info['track'] = mrr_contents_text
 
-        fallbacks = {
-            'channel': 'uploader',
-            'channel_id': 'uploader_id',
-            'channel_url': 'uploader_url',
-        }
-
+        info.update({
+            'uploader': info.get('channel'),
+            'uploader_id': channel_handle,
+            'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
+        })
         # The upload date for scheduled, live and past live streams / premieres in microformats
         # may be different from the stream date. Although not in UTC, we will prefer it in this case.
         # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
@@ -4415,10 +4435,6 @@ def process_language(container, base_url, lang_code, sub_name, query):
                 self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
         info['upload_date'] = upload_date
 
-        for to, frm in fallbacks.items():
-            if not info.get(to):
-                info[to] = info.get(frm)
-
         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
             v = info.get(s_k)
             if v:
@@ -4494,18 +4510,28 @@ def _extract_basic_item_renderer(item):
                 return renderer
 
     def _extract_channel_renderer(self, renderer):
-        channel_id = renderer['channelId']
+        channel_id = self.ucid_or_none(renderer['channelId'])
         title = self._get_text(renderer, 'title')
-        channel_url = f'https://www.youtube.com/channel/{channel_id}'
+        channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
+        # As of 2023-03-01 YouTube doesn't use the channel handles on these renderers yet.
+        # However we can expect them to change that in the future.
+        channel_handle = self.handle_from_url(
+            traverse_obj(renderer, (
+                'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
+                                       ('browseEndpoint', 'canonicalBaseUrl')),
+                {str}), get_all=False))
         return {
             '_type': 'url',
             'url': channel_url,
             'id': channel_id,
             'ie_key': YoutubeTabIE.ie_key(),
             'channel': title,
+            'uploader': title,
             'channel_id': channel_id,
             'channel_url': channel_url,
             'title': title,
+            'uploader_id': channel_handle,
+            'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
             'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
             'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
             'playlist_count': self._get_count(renderer, 'videoCountText'),
@@ -4845,13 +4871,14 @@ def _extract_metadata_from_tabs(self, item_id, data):
 
         metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
         if metadata_renderer:
+            channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
+                                                         ('channelUrl', {self.ucid_from_url}))
             info.update({
-                'uploader': metadata_renderer.get('title'),
-                'uploader_id': metadata_renderer.get('externalId'),
-                'uploader_url': metadata_renderer.get('channelUrl'),
+                'channel': metadata_renderer.get('title'),
+                'channel_id': channel_id,
             })
-            if info['uploader_id']:
-                info['id'] = info['uploader_id']
+            if info['channel_id']:
+                info['id'] = info['channel_id']
         else:
             metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
 
@@ -4904,6 +4931,15 @@ def _get_uncropped(url):
             'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
         })
 
+        channel_handle = (
+            traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)
+            or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))
+
+        if channel_handle:
+            info.update({
+                'uploader_id': channel_handle,
+                'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
+            })
         # Playlist stats is a text runs array containing [video count, view count, last updated].
         # last updated or (view count and last updated) may be missing.
         playlist_stats = get_first(
@@ -4926,7 +4962,7 @@ def _get_uncropped(url):
         if info['playlist_count'] is None:  # 0 is allowed
             info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
 
-        if not info.get('uploader_id'):
+        if not info.get('channel_id'):
             owner = traverse_obj(playlist_header_renderer, 'ownerText')
             if not owner:  # Deprecated
                 owner = traverse_obj(
@@ -4935,16 +4971,17 @@ def _get_uncropped(url):
             owner_text = self._get_text(owner)
             browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
             info.update({
-                'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
-                'uploader_id': browse_ep.get('browseId'),
-                'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))
+                'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
+                'channel_id': self.ucid_or_none(browse_ep.get('browseId')),
+                'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl')))
             })
 
         info.update({
-            'channel': info['uploader'],
-            'channel_id': info['uploader_id'],
-            'channel_url': info['uploader_url']
+            'uploader': info['channel'],
+            'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),
+            'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),
         })
+
         return info
 
     def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
@@ -5221,12 +5258,12 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'title': 'Igor Kleiner - Playlists',
             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
             'uploader': 'Igor Kleiner',
-            'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
+            'uploader_id': '@IgorDataScience',
+            'uploader_url': 'https://www.youtube.com/@IgorDataScience',
             'channel': 'Igor Kleiner',
             'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
             'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
             'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
-            'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
             'channel_follower_count': int
         },
     }, {
@@ -5237,9 +5274,9 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
             'title': 'Igor Kleiner - Playlists',
             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
-            'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
             'uploader': 'Igor Kleiner',
-            'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
+            'uploader_id': '@IgorDataScience',
+            'uploader_url': 'https://www.youtube.com/@IgorDataScience',
             'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
             'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
             'channel': 'Igor Kleiner',
@@ -5254,12 +5291,12 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'id': 'UCYO_jab_esuFRV4b17AJtAw',
             'title': '3Blue1Brown - Playlists',
             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
-            'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
-            'uploader': '3Blue1Brown',
             'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
-            'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
             'channel': '3Blue1Brown',
             'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
+            'uploader_id': '@3blue1brown',
+            'uploader_url': 'https://www.youtube.com/@3blue1brown',
+            'uploader': '3Blue1Brown',
             'tags': ['Mathematics'],
             'channel_follower_count': int
         },
@@ -5272,10 +5309,10 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'title': 'ThirstForScience - Playlists',
             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
             'uploader': 'ThirstForScience',
-            'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
-            'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
-            'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
+            'uploader_url': 'https://www.youtube.com/@ThirstForScience',
+            'uploader_id': '@ThirstForScience',
             'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
+            'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
             'tags': 'count:13',
             'channel': 'ThirstForScience',
             'channel_follower_count': int
@@ -5287,8 +5324,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
         'note': 'basic, single video playlist',
         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
         'info_dict': {
-            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
-            'uploader': 'Sergey M.',
             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
             'title': 'youtube-dl public playlist',
             'description': '',
@@ -5297,17 +5332,17 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'modified_date': '20201130',
             'channel': 'Sergey M.',
             'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
-            'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
             'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
             'availability': 'public',
+            'uploader': 'Sergey M.',
+            'uploader_url': 'https://www.youtube.com/@sergeym.6173',
+            'uploader_id': '@sergeym.6173',
         },
         'playlist_count': 1,
     }, {
         'note': 'empty playlist',
         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
         'info_dict': {
-            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
-            'uploader': 'Sergey M.',
             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
             'title': 'youtube-dl empty playlist',
             'tags': [],
@@ -5316,8 +5351,10 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'modified_date': '20160902',
             'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
             'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
-            'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
             'availability': 'public',
+            'uploader_url': 'https://www.youtube.com/@sergeym.6173',
+            'uploader_id': '@sergeym.6173',
+            'uploader': 'Sergey M.',
         },
         'playlist_count': 0,
     }, {
@@ -5328,10 +5365,10 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'title': 'lex will - Home',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
             'uploader': 'lex will',
-            'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+            'uploader_id': '@lexwill718',
             'channel': 'lex will',
             'tags': ['bible', 'history', 'prophesy'],
-            'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
+            'uploader_url': 'https://www.youtube.com/@lexwill718',
             'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
             'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'channel_follower_count': int
@@ -5345,11 +5382,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'title': 'lex will - Videos',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
             'uploader': 'lex will',
-            'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+            'uploader_id': '@lexwill718',
             'tags': ['bible', 'history', 'prophesy'],
             'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
             'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
-            'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
+            'uploader_url': 'https://www.youtube.com/@lexwill718',
             'channel': 'lex will',
             'channel_follower_count': int
         },
@@ -5362,9 +5399,9 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'title': 'lex will - Videos',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
             'uploader': 'lex will',
-            'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+            'uploader_id': '@lexwill718',
             'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
-            'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
+            'uploader_url': 'https://www.youtube.com/@lexwill718',
             'channel': 'lex will',
             'tags': ['bible', 'history', 'prophesy'],
             'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
@@ -5379,8 +5416,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'title': 'lex will - Playlists',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
             'uploader': 'lex will',
-            'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
-            'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
+            'uploader_id': '@lexwill718',
+            'uploader_url': 'https://www.youtube.com/@lexwill718',
             'channel': 'lex will',
             'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
             'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
@@ -5395,14 +5432,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'title': 'lex will - Community',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
-            'uploader': 'lex will',
-            'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
-            'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
             'channel': 'lex will',
             'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
             'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'tags': ['bible', 'history', 'prophesy'],
-            'channel_follower_count': int
+            'channel_follower_count': int,
+            'uploader_url': 'https://www.youtube.com/@lexwill718',
+            'uploader_id': '@lexwill718',
+            'uploader': 'lex will',
         },
         'playlist_mincount': 18,
     }, {
@@ -5412,14 +5449,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'title': 'lex will - Channels',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
-            'uploader': 'lex will',
-            'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
-            'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
             'channel': 'lex will',
             'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
             'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'tags': ['bible', 'history', 'prophesy'],
-            'channel_follower_count': int
+            'channel_follower_count': int,
+            'uploader_url': 'https://www.youtube.com/@lexwill718',
+            'uploader_id': '@lexwill718',
+            'uploader': 'lex will',
         },
         'playlist_mincount': 12,
     }, {
@@ -5430,14 +5467,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'id': 'UCYO_jab_esuFRV4b17AJtAw',
             'title': '3Blue1Brown - Search - linear algebra',
             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
-            'uploader': '3Blue1Brown',
-            'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
             'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
-            'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
             'tags': ['Mathematics'],
             'channel': '3Blue1Brown',
             'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
-            'channel_follower_count': int
+            'channel_follower_count': int,
+            'uploader_url': 'https://www.youtube.com/@3blue1brown',
+            'uploader_id': '@3blue1brown',
+            'uploader': '3Blue1Brown',
         },
     }, {
         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
@@ -5454,17 +5491,17 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
         'info_dict': {
             'title': '29C3: Not my department',
             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
-            'uploader': 'Christiaan008',
-            'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
             'tags': [],
-            'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
             'view_count': int,
             'modified_date': '20150605',
             'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
-            'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
+            'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
             'channel': 'Christiaan008',
             'availability': 'public',
+            'uploader_id': '@ChRiStIaAn008',
+            'uploader': 'Christiaan008',
+            'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
         },
         'playlist_count': 96,
     }, {
@@ -5473,17 +5510,17 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
         'info_dict': {
             'title': 'Uploads from Cauchemar',
             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
-            'uploader': 'Cauchemar',
-            'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
-            'channel_url': 'https://www.youtube.com/c/Cauchemar89',
+            'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
             'tags': [],
             'modified_date': r're:\d{8}',
             'channel': 'Cauchemar',
-            'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
             'view_count': int,
             'description': '',
             'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
             'availability': 'public',
+            'uploader_id': '@Cauchemar89',
+            'uploader': 'Cauchemar',
+            'uploader_url': 'https://www.youtube.com/@Cauchemar89',
         },
         'playlist_mincount': 1123,
         'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@@ -5497,17 +5534,17 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
         'info_dict': {
             'title': 'Uploads from Interstellar Movie',
             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
-            'uploader': 'Interstellar Movie',
-            'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
-            'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
             'tags': [],
             'view_count': int,
             'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
-            'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
+            'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
             'channel': 'Interstellar Movie',
             'description': '',
             'modified_date': r're:\d{8}',
             'availability': 'public',
+            'uploader_id': '@InterstellarMovie',
+            'uploader': 'Interstellar Movie',
+            'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
         },
         'playlist_mincount': 21,
     }, {
@@ -5516,17 +5553,17 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
         'info_dict': {
             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
-            'uploader': 'Phim Siêu Nhân Nhật Bản',
-            'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
             'view_count': int,
             'channel': 'Phim Siêu Nhân Nhật Bản',
             'tags': [],
-            'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
             'description': '',
             'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
             'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
             'modified_date': r're:\d{8}',
             'availability': 'public',
+            'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
+            'uploader_id': '@phimsieunhannhatban',
+            'uploader': 'Phim Siêu Nhân Nhật Bản',
         },
         'playlist_mincount': 200,
         'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@@ -5536,17 +5573,17 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
         'info_dict': {
             'title': 'Uploads from BlankTV',
             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
-            'uploader': 'BlankTV',
-            'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
             'channel': 'BlankTV',
-            'channel_url': 'https://www.youtube.com/c/blanktv',
+            'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
             'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
             'view_count': int,
             'tags': [],
-            'uploader_url': 'https://www.youtube.com/c/blanktv',
             'modified_date': r're:\d{8}',
             'description': '',
             'availability': 'public',
+            'uploader_id': '@blanktv',
+            'uploader': 'BlankTV',
+            'uploader_url': 'https://www.youtube.com/@blanktv',
         },
         'playlist_mincount': 1000,
         'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@@ -5556,17 +5593,17 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
         'info_dict': {
             'title': 'Data Analysis with Dr Mike Pound',
             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
-            'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
-            'uploader': 'Computerphile',
             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
-            'uploader_url': 'https://www.youtube.com/user/Computerphile',
             'tags': [],
             'view_count': int,
             'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
-            'channel_url': 'https://www.youtube.com/user/Computerphile',
+            'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
             'channel': 'Computerphile',
             'availability': 'public',
             'modified_date': '20190712',
+            'uploader_id': '@Computerphile',
+            'uploader': 'Computerphile',
+            'uploader_url': 'https://www.youtube.com/@Computerphile',
         },
         'playlist_mincount': 11,
     }, {
@@ -5579,9 +5616,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'id': 'FqZTN594JQw',
             'ext': 'webm',
             'title': "Smiley's People 01 detective, Adventure Series, Action",
-            'uploader': 'STREEM',
-            'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
-            'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
             'upload_date': '20150526',
             'license': 'Standard YouTube License',
             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
@@ -5604,12 +5638,9 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
     }, {
         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
         'info_dict': {
-            'id': 'Wq15eF5vCbI',  # This will keep changing
+            'id': 'AlTsmyW4auo',  # This will keep changing
             'ext': 'mp4',
             'title': str,
-            'uploader': 'Sky News',
-            'uploader_id': 'skynews',
-            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
             'upload_date': r're:\d{8}',
             'description': str,
             'categories': ['News & Politics'],
@@ -5628,6 +5659,9 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
             'channel_follower_count': int,
             'concurrent_view_count': int,
+            'uploader_url': 'https://www.youtube.com/@SkyNews',
+            'uploader_id': '@SkyNews',
+            'uploader': 'Sky News',
         },
         'params': {
             'skip_download': True,
@@ -5639,9 +5673,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'id': 'a48o2S1cPoo',
             'ext': 'mp4',
             'title': 'The Young Turks - Live Main Show',
-            'uploader': 'The Young Turks',
-            'uploader_id': 'TheYoungTurks',
-            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
             'upload_date': '20150715',
             'license': 'Standard YouTube License',
             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
@@ -5722,41 +5753,40 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
         'info_dict': {
             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
-            'uploader': 'NoCopyrightSounds',
             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
-            'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
             'title': 'NCS : All Releases 💿',
-            'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
-            'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
+            'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
             'modified_date': r're:\d{8}',
             'view_count': int,
             'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
             'tags': [],
             'channel': 'NoCopyrightSounds',
             'availability': 'public',
+            'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
+            'uploader': 'NoCopyrightSounds',
+            'uploader_id': '@NoCopyrightSounds',
         },
         'playlist_mincount': 166,
-        'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
+        'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
     }, {
+        # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
         'note': 'Topic, should redirect to playlist?list=UU...',
         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
         'info_dict': {
             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
-            'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
             'title': 'Uploads from Royalty Free Music - Topic',
-            'uploader': 'Royalty Free Music - Topic',
             'tags': [],
             'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
             'channel': 'Royalty Free Music - Topic',
             'view_count': int,
             'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
-            'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
             'modified_date': r're:\d{8}',
-            'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
             'description': '',
             'availability': 'public',
+            'uploader': 'Royalty Free Music - Topic',
         },
         'playlist_mincount': 101,
+        'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
     }, {
         # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
         # Treat as a general feed
@@ -5780,12 +5810,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'modified_date': r're:\d{8}',
         },
         'playlist_count': 50,
+        'expected_warnings': ['YouTube Music is not directly supported'],
     }, {
         'note': 'unlisted single video playlist',
         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
         'info_dict': {
-            'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
-            'uploader': 'colethedj',
             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
             'title': 'yt-dlp unlisted playlist test',
             'availability': 'unlisted',
@@ -5794,9 +5823,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'channel': 'colethedj',
             'view_count': int,
             'description': '',
-            'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
             'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
             'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
+            'uploader_url': 'https://www.youtube.com/@colethedj1894',
+            'uploader_id': '@colethedj1894',
+            'uploader': 'colethedj',
         },
         'playlist_count': 1,
     }, {
@@ -5819,13 +5850,10 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
             'title': 'Cody\'sLab - Videos',
             'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
-            'uploader': 'Cody\'sLab',
-            'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
             'channel': 'Cody\'sLab',
             'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
             'tags': [],
             'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
-            'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
             'channel_follower_count': int
         },
         'playlist_mincount': 650,
@@ -5839,9 +5867,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
         'info_dict': {
             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
-            'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
             'title': 'Uploads from Royalty Free Music - Topic',
-            'uploader': 'Royalty Free Music - Topic',
             'modified_date': r're:\d{8}',
             'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
             'description': '',
@@ -5849,14 +5875,15 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'tags': [],
             'channel': 'Royalty Free Music - Topic',
             'view_count': int,
-            'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
             'availability': 'public',
+            'uploader': 'Royalty Free Music - Topic',
         },
         'playlist_mincount': 101,
         'params': {
             'skip_download': True,
             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
         },
+        'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
     }, {
         'note': 'non-standard redirect to regional channel',
         'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
@@ -5869,15 +5896,15 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'modified_date': '20220407',
             'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
             'tags': [],
-            'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
-            'uploader': 'pukkandan',
             'availability': 'unlisted',
             'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
             'channel': 'pukkandan',
             'description': 'Test for collaborative playlist',
             'title': 'yt-dlp test - collaborative playlist',
             'view_count': int,
-            'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
+            'uploader_url': 'https://www.youtube.com/@pukkandan',
+            'uploader_id': '@pukkandan',
+            'uploader': 'pukkandan',
         },
         'playlist_mincount': 2
     }, {
@@ -5886,14 +5913,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
         'info_dict': {
             'id': 'UCiu-3thuViMebBjw_5nWYrA',
             'tags': [],
-            'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
             'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
             'description': 'test description',
             'title': 'cole-dlp-test-acc - 再生リスト',
-            'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
-            'uploader': 'cole-dlp-test-acc',
             'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
             'channel': 'cole-dlp-test-acc',
+            'uploader_url': 'https://www.youtube.com/@coletdjnz',
+            'uploader_id': '@coletdjnz',
+            'uploader': 'cole-dlp-test-acc',
         },
         'playlist_mincount': 1,
         'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
@@ -5907,14 +5934,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'tags': [],
             'view_count': int,
             'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
-            'uploader': 'cole-dlp-test-acc',
-            'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
             'channel': 'cole-dlp-test-acc',
             'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
             'description': 'test',
-            'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
             'title': 'dlp test playlist',
             'availability': 'public',
+            'uploader_url': 'https://www.youtube.com/@coletdjnz',
+            'uploader_id': '@coletdjnz',
+            'uploader': 'cole-dlp-test-acc',
         },
         'playlist_mincount': 1,
         'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
@@ -5966,29 +5993,30 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'channel_follower_count': int,
             'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
             'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
-            'uploader': 'Polka Ch. 尾丸ポルカ',
-            'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9',
+            'description': 'md5:e56b74b5bb7e9c701522162e9abfb822',
             'channel': 'Polka Ch. 尾丸ポルカ',
             'tags': 'count:35',
-            'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
-            'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
+            'uploader_url': 'https://www.youtube.com/@OmaruPolka',
+            'uploader': 'Polka Ch. 尾丸ポルカ',
+            'uploader_id': '@OmaruPolka',
         },
         'playlist_count': 3,
     }, {
         # Shorts tab with channel with handle
+        # TODO: fix channel description
         'url': 'https://www.youtube.com/@NotJustBikes/shorts',
         'info_dict': {
             'id': 'UC0intLFzLaudFG-xAvUEO-A',
             'title': 'Not Just Bikes - Shorts',
             'tags': 'count:12',
-            'uploader': 'Not Just Bikes',
             'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
-            'description': 'md5:7513148b1f02b924783157d84c4ea555',
+            'description': 'md5:26bc55af26855a608a5cf89dfa595c8d',
             'channel_follower_count': int,
-            'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',
             'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
-            'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
             'channel': 'Not Just Bikes',
+            'uploader_url': 'https://www.youtube.com/@NotJustBikes',
+            'uploader': 'Not Just Bikes',
+            'uploader_id': '@NotJustBikes',
         },
         'playlist_mincount': 10,
     }, {
@@ -6000,12 +6028,12 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'tags': 'count:7',
             'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
             'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
-            'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
             'channel': '中村悠一',
-            'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
             'channel_follower_count': int,
-            'uploader': '中村悠一',
             'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
+            'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
+            'uploader_id': '@Yuichi-Nakamura',
+            'uploader': '中村悠一',
         },
         'playlist_mincount': 60,
     }, {
@@ -6024,15 +6052,15 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
         'info_dict': {
             'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
             'title': 'Shorts Break - Shorts',
-            'tags': 'count:32',
+            'tags': 'count:48',
             'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
             'channel': 'Shorts Break',
-            'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',
-            'uploader': 'Shorts Break',
+            'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
             'channel_follower_count': int,
-            'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
-            'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
             'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
+            'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
+            'uploader': 'Shorts Break',
+            'uploader_id': '@ShortsBreak_Official',
         },
         'playlist_mincount': 30,
     }, {
@@ -6055,30 +6083,28 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
         'playlist_mincount': 30,
     }, {
         # Shorts url result in shorts tab
+        # TODO: Fix channel id extraction
         'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
         'info_dict': {
             'id': 'UCiu-3thuViMebBjw_5nWYrA',
             'title': 'cole-dlp-test-acc - Shorts',
-            'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
             'channel': 'cole-dlp-test-acc',
             'description': 'test description',
             'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
             'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
             'tags': [],
+            'uploader_url': 'https://www.youtube.com/@coletdjnz',
+            'uploader_id': '@coletdjnz',
             'uploader': 'cole-dlp-test-acc',
-            'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
-
         },
         'playlist': [{
             'info_dict': {
+                # Channel data is not currently available for short renderers (as of 2023-03-01)
                 '_type': 'url',
                 'ie_key': 'Youtube',
                 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
                 'id': 'sSM9J5YH_60',
-                'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
                 'title': 'SHORT short',
-                'channel': 'cole-dlp-test-acc',
-                'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
                 'view_count': int,
                 'thumbnails': list,
             }
@@ -6114,15 +6140,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
         'info_dict': {
             'id': 'UCiu-3thuViMebBjw_5nWYrA',
             'title': 'cole-dlp-test-acc - Channels',
-            'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
             'channel': 'cole-dlp-test-acc',
             'description': 'test description',
             'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
             'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
             'tags': [],
+            'uploader_url': 'https://www.youtube.com/@coletdjnz',
+            'uploader_id': '@coletdjnz',
             'uploader': 'cole-dlp-test-acc',
-            'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
-
         },
         'playlist': [{
             'info_dict': {
@@ -6136,7 +6161,10 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
                 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
                 'thumbnails': list,
                 'channel_follower_count': int,
-                'playlist_count': int
+                'playlist_count': int,
+                'uploader': 'PewDiePie',
+                'uploader_url': 'https://www.youtube.com/@PewDiePie',
+                'uploader_id': '@PewDiePie',
             }
         }],
         'params': {'extract_flat': True},
@@ -6146,15 +6174,15 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'id': 'UCYO_jab_esuFRV4b17AJtAw',
             'tags': ['Mathematics'],
             'title': '3Blue1Brown - About',
-            'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
             'channel_follower_count': int,
             'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
-            'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
             'channel': '3Blue1Brown',
-            'uploader': '3Blue1Brown',
             'view_count': int,
             'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
+            'uploader_url': 'https://www.youtube.com/@3blue1brown',
+            'uploader_id': '@3blue1brown',
+            'uploader': '3Blue1Brown',
         },
         'playlist_count': 0,
     }]
@@ -6373,15 +6401,15 @@ class YoutubePlaylistIE(InfoExtractor):
             'title': '[OLD]Team Fortress 2 (Class-based LP)',
             'id': 'PLBB231211A4F62143',
             'uploader': 'Wickman',
-            'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
+            'uploader_id': '@WickmanVT',
             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
             'view_count': int,
-            'uploader_url': 'https://www.youtube.com/c/WickmanVT',
+            'uploader_url': 'https://www.youtube.com/@WickmanVT',
             'modified_date': r're:\d{8}',
             'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
             'channel': 'Wickman',
             'tags': [],
-            'channel_url': 'https://www.youtube.com/c/WickmanVT',
+            'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
             'availability': 'public',
         },
         'playlist_mincount': 29,
@@ -6401,7 +6429,7 @@ class YoutubePlaylistIE(InfoExtractor):
             'title': 'JODA15',
             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
             'uploader': 'milan',
-            'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
+            'uploader_id': '@milan5503',
             'description': '',
             'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
             'tags': [],
@@ -6409,7 +6437,7 @@ class YoutubePlaylistIE(InfoExtractor):
             'view_count': int,
             'channel': 'milan',
             'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
-            'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
+            'uploader_url': 'https://www.youtube.com/@milan5503',
             'availability': 'public',
         },
         'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
@@ -6420,13 +6448,13 @@ class YoutubePlaylistIE(InfoExtractor):
             'title': '2018 Chinese New Singles (11/6 updated)',
             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
             'uploader': 'LBK',
-            'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
+            'uploader_id': '@music_king',
             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
             'channel': 'LBK',
             'view_count': int,
-            'channel_url': 'https://www.youtube.com/c/愛低音的國王',
+            'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
             'tags': [],
-            'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
+            'uploader_url': 'https://www.youtube.com/@music_king',
             'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
             'modified_date': r're:\d{8}',
             'availability': 'public',
@@ -6472,8 +6500,8 @@ class YoutubeYtBeIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Small Scale Baler and Braiding Rugs',
             'uploader': 'Backus-Page House Museum',
-            'uploader_id': 'backuspagemuseum',
-            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
+            'uploader_id': '@backuspagemuseum',
+            'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
             'upload_date': '20161008',
             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
             'categories': ['Nonprofits & Activism'],
@@ -6481,7 +6509,7 @@ class YoutubeYtBeIE(InfoExtractor):
             'like_count': int,
             'age_limit': 0,
             'playable_in_embed': True,
-            'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
+            'thumbnail': r're:^https?://.*\.webp',
             'channel': 'Backus-Page House Museum',
             'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
             'live_status': 'not_live',
@@ -6597,7 +6625,7 @@ def _extract_notification_renderer(self, notification):
         if not video_id:
             browse_ep = traverse_obj(
                 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
-            channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
+            channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))
             post_id = self._search_regex(
                 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
                 'post id', default=None)
@@ -6627,6 +6655,7 @@ def _extract_notification_renderer(self, notification):
             'title': title,
             'channel_id': channel_id,
             'channel': channel,
+            'uploader': channel,
             'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
             'timestamp': timestamp,
         }
@@ -6731,7 +6760,10 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
                 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
                 'playlist_count': int,  # XXX: should have a way of saying > 1
                 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
-                'thumbnails': list
+                'thumbnails': list,
+                'uploader_id': '@kurzgesagt',
+                'uploader_url': 'https://www.youtube.com/@kurzgesagt',
+                'uploader': 'Kurzgesagt – In a Nutshell',
             }
         }],
         'params': {'extract_flat': True, 'playlist_items': '1'},
@@ -6989,11 +7021,12 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
             'title': 'Mobile Games on Console - Scott The Woz',
             'upload_date': '20210920',
             'uploader': 'Scott The Woz',
-            'uploader_id': 'scottthewoz',
-            'uploader_url': 'http://www.youtube.com/user/scottthewoz',
+            'uploader_id': '@ScottTheWoz',
+            'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
             'view_count': int,
             'live_status': 'not_live',
-            'channel_follower_count': int
+            'channel_follower_count': int,
+            'chapters': 'count:20',
         }
     }]
 
@@ -7031,13 +7064,13 @@ class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
             'id': 'qVv6vCqciTM',
             'ext': 'mp4',
             'age_limit': 0,
-            'uploader_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
+            'uploader_id': '@sana_natori',
             'comment_count': int,
             'chapters': 'count:13',
             'upload_date': '20221223',
             'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
             'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
-            'uploader_url': 'http://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
+            'uploader_url': 'https://www.youtube.com/@sana_natori',
             'like_count': int,
             'release_date': '20221223',
             'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],

From 84ffeb7d5e72e3829319ba7720a8480fc4c7503b Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 16 Apr 2023 03:16:23 +0530
Subject: [PATCH 58/97] [extractor] Do not warn for invalid chapter data in
 description

Fixes https://github.com/yt-dlp/yt-dlp/issues/6811#issuecomment-1509876209
---
 yt_dlp/extractor/common.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 838899052..78288f809 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -3658,18 +3658,22 @@ def _extract_chapters_helper(self, chapter_list, start_function, title_function,
             'start_time': start_function(chapter),
             'title': title_function(chapter),
         } for chapter in chapter_list or []]
-        if not strict:
+        if strict:
+            warn = self.report_warning
+        else:
+            warn = self.write_debug
             chapter_list.sort(key=lambda c: c['start_time'] or 0)
 
         chapters = [{'start_time': 0}]
         for idx, chapter in enumerate(chapter_list):
             if chapter['start_time'] is None:
-                self.report_warning(f'Incomplete chapter {idx}')
+                warn(f'Incomplete chapter {idx}')
             elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
                 chapters.append(chapter)
             elif chapter not in chapters:
-                self.report_warning(
-                    f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
+                issue = (f'{chapter["start_time"]} > {duration}' if chapter['start_time'] > duration
+                         else f'{chapter["start_time"]} < {chapters[-1]["start_time"]}')
+                warn(f'Invalid start time ({issue}) for chapter "{chapter["title"]}"')
         return chapters[1:]
 
     def _extract_chapters_from_description(self, description, duration):

From 9874e82b5a61582169300bea561b3e8899ad1ef7 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 16 Apr 2023 08:54:48 +0530
Subject: [PATCH 59/97] Do not translate newlines in `--print-to-file`

Fixes https://github.com/yt-dlp/yt-dlp/issues/6808#issuecomment-1509361107
---
 yt_dlp/YoutubeDL.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 7b6fef204..31f7645dc 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -2890,8 +2890,8 @@ def format_tmpl(tmpl):
             tmpl = format_tmpl(tmpl)
             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
             if self._ensure_dir_exists(filename):
-                with open(filename, 'a', encoding='utf-8') as f:
-                    f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
+                with open(filename, 'a', encoding='utf-8', newline='') as f:
+                    f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
 
     def __forced_printings(self, info_dict, filename, incomplete):
         def print_mandatory(field, actual_field=None):

From ea0570820336a0fe9c3b530d1b0d1e59313274f4 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sun, 16 Apr 2023 12:01:19 -0500
Subject: [PATCH 60/97] [extractor/adobepass] Handle `Charter_Direct` MSO as
 `Spectrum` (#6824)

Authored by: bashonly
---
 yt_dlp/extractor/adobepass.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py
index e5944f714..68a970f68 100644
--- a/yt_dlp/extractor/adobepass.py
+++ b/yt_dlp/extractor/adobepass.py
@@ -1573,7 +1573,7 @@ def extract_redirect_url(html, url=None, fatal=False):
                         }), headers={
                             'Content-Type': 'application/x-www-form-urlencoded'
                         })
-                elif mso_id == 'Spectrum':
+                elif mso_id in ('Spectrum', 'Charter_Direct'):
                     # Spectrum's login for is dynamically loaded via JS so we need to hardcode the flow
                     # as a one-off implementation.
                     provider_redirect_page, urlh = provider_redirect_page_res

From 7a6f6f24592a8065376f11a58e44878807732cf6 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sun, 16 Apr 2023 12:07:55 -0500
Subject: [PATCH 61/97] [extractor/reddit] Support cookies and short URLs
 (#6825)

Closes #6665, Closes #6753
Authored by: bashonly
---
 yt_dlp/extractor/reddit.py | 72 ++++++++++++++++++++++++++++++--------
 1 file changed, 58 insertions(+), 14 deletions(-)

diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py
index 9dba3eca8..3e458456c 100644
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@@ -1,4 +1,3 @@
-import random
 import urllib.parse
 
 from .common import InfoExtractor
@@ -14,7 +13,7 @@
 
 
 class RedditIE(InfoExtractor):
-    _VALID_URL = r'https?://(?P<subdomain>[^/]+\.)?reddit(?:media)?\.com/(?P<slug>(?:r|user)/[^/]+/comments/(?P<id>[^/?#&]+))'
+    _VALID_URL = r'https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))'
     _TESTS = [{
         'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
         'info_dict': {
@@ -109,6 +108,46 @@ class RedditIE(InfoExtractor):
             'age_limit': 0,
             'channel_id': 'dumbfuckers_club',
         },
+    }, {
+        # post link without subreddit
+        'url': 'https://www.reddit.com/comments/124pp33',
+        'md5': '15eec9d828adcef4468b741a7e45a395',
+        'info_dict': {
+            'id': 'antsenjc2jqa1',
+            'ext': 'mp4',
+            'display_id': '124pp33',
+            'title': 'Harmless prank of some old friends',
+            'uploader': 'Dudezila',
+            'channel_id': 'ContagiousLaughter',
+            'duration': 17,
+            'upload_date': '20230328',
+            'timestamp': 1680012043,
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+            'age_limit': 0,
+            'comment_count': int,
+            'dislike_count': int,
+            'like_count': int,
+        },
+    }, {
+        # quarantined subreddit post
+        'url': 'https://old.reddit.com/r/GenZedong/comments/12fujy3/based_hasan/',
+        'md5': '3156ea69e3c1f1b6259683c5abd36e71',
+        'info_dict': {
+            'id': '8bwtclfggpsa1',
+            'ext': 'mp4',
+            'display_id': '12fujy3',
+            'title': 'Based Hasan?',
+            'uploader': 'KingNigelXLII',
+            'channel_id': 'GenZedong',
+            'duration': 16,
+            'upload_date': '20230408',
+            'timestamp': 1680979138,
+            'age_limit': 0,
+            'comment_count': int,
+            'dislike_count': int,
+            'like_count': int,
+        },
+        'skip': 'Requires account that has opted-in to the GenZedong subreddit',
     }, {
         'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
         'only_matching': True,
@@ -137,21 +176,26 @@ class RedditIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    @staticmethod
-    def _gen_session_id():
-        id_length = 16
-        rand_max = 1 << (id_length * 4)
-        return '%0.*x' % (id_length, random.randrange(rand_max))
-
     def _real_extract(self, url):
-        subdomain, slug, video_id = self._match_valid_url(url).group('subdomain', 'slug', 'id')
+        host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id')
 
-        self._set_cookie('.reddit.com', 'reddit_session', self._gen_session_id())
-        self._set_cookie('.reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D')
-        data = self._download_json(f'https://{subdomain}reddit.com/{slug}/.json', video_id, fatal=False)
+        data = self._download_json(
+            f'https://{host}/{slug}/.json', video_id, fatal=False, expected_status=403)
         if not data:
-            # Fall back to old.reddit.com in case the requested subdomain fails
-            data = self._download_json(f'https://old.reddit.com/{slug}/.json', video_id)
+            fallback_host = 'old.reddit.com' if host != 'old.reddit.com' else 'www.reddit.com'
+            self.to_screen(f'{host} request failed, retrying with {fallback_host}')
+            data = self._download_json(
+                f'https://{fallback_host}/{slug}/.json', video_id, expected_status=403)
+
+        if traverse_obj(data, 'error') == 403:
+            reason = data.get('reason')
+            if reason == 'quarantined':
+                self.raise_login_required('Quarantined subreddit; an account that has opted in is required')
+            elif reason == 'private':
+                self.raise_login_required('Private subreddit; an account that has been approved is required')
+            else:
+                raise ExtractorError(f'HTTP Error 403 Forbidden; reason given: {reason}')
+
         data = data[0]['data']['children'][0]['data']
         video_url = data['url']
 

From 9c92b803fa24e48543ce969468d5404376e315b7 Mon Sep 17 00:00:00 2001
From: satan1st <satan1st@users.noreply.github.com>
Date: Sun, 16 Apr 2023 19:20:10 +0200
Subject: [PATCH 62/97] [extractor/gronkh] Extract duration and chapters
 (#6817)

Authored by: satan1st
---
 yt_dlp/extractor/gronkh.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py
index b9370e36c..1ae0a6893 100644
--- a/yt_dlp/extractor/gronkh.py
+++ b/yt_dlp/extractor/gronkh.py
@@ -3,6 +3,7 @@
 from .common import InfoExtractor
 from ..utils import (
     OnDemandPagedList,
+    float_or_none,
     traverse_obj,
     unified_strdate,
 )
@@ -19,7 +20,9 @@ class GronkhIE(InfoExtractor):
             'title': 'H.O.R.D.E. - DAS ZWEiTE ZEiTALTER 🎲 Session 1',
             'view_count': int,
             'thumbnail': 'https://01.cdn.vod.farm/preview/9e2555d3a23bf4e5c5b7c6b3b70a9d84.jpg',
-            'upload_date': '20221111'
+            'upload_date': '20221111',
+            'chapters': 'count:3',
+            'duration': 31463,
         },
         'params': {'skip_download': True}
     }, {
@@ -30,7 +33,8 @@ class GronkhIE(InfoExtractor):
             'title': 'GTV0536, 2021-10-01 - MARTHA IS DEAD  #FREiAB1830  !FF7 !horde !archiv',
             'view_count': int,
             'thumbnail': 'https://01.cdn.vod.farm/preview/6436746cce14e25f751260a692872b9b.jpg',
-            'upload_date': '20211001'
+            'upload_date': '20211001',
+            'duration': 32058,
         },
         'params': {'skip_download': True}
     }, {
@@ -56,6 +60,12 @@ def _real_extract(self, url):
             'upload_date': unified_strdate(data_json.get('created_at')),
             'formats': formats,
             'subtitles': subtitles,
+            'duration': float_or_none(data_json.get('source_length')),
+            'chapters': traverse_obj(data_json, (
+                'chapters', lambda _, v: float_or_none(v['offset']) is not None, {
+                    'title': 'title',
+                    'start_time': ('offset', {float_or_none}),
+                })) or None,
         }
 
 

From 2c566ed14101673c651c08c306c30fa5b4010b85 Mon Sep 17 00:00:00 2001
From: CoryTibbettsDev <70112527+CoryTibbettsDev@users.noreply.github.com>
Date: Sun, 16 Apr 2023 17:26:37 +0000
Subject: [PATCH 63/97] [extractor/whyp] Add extractor (#6803)

Authored by: CoryTibbettsDev
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/whyp.py        | 50 +++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)
 create mode 100644 yt_dlp/extractor/whyp.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 09903423d..b08b3095e 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -2295,6 +2295,7 @@
     WeiboMobileIE
 )
 from .weiqitv import WeiqiTVIE
+from .whyp import WhypIE
 from .wikimedia import WikimediaIE
 from .willow import WillowIE
 from .wimtv import WimTVIE
diff --git a/yt_dlp/extractor/whyp.py b/yt_dlp/extractor/whyp.py
new file mode 100644
index 000000000..fef89c351
--- /dev/null
+++ b/yt_dlp/extractor/whyp.py
@@ -0,0 +1,50 @@
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    str_or_none,
+    traverse_obj,
+    url_or_none,
+)
+
+
+class WhypIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?whyp\.it/tracks/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.whyp.it/tracks/18337/home-page-example-track-b4kq7',
+        'md5': 'c1187b42ebf8605284e3dc92aeb33d16',
+        'info_dict': {
+            'url': 'https://cdn.whyp.it/50eb17cc-e9ff-4e18-b89b-dc9206a95cb1.mp3',
+            'id': '18337',
+            'title': 'Home Page Example Track',
+            'description': 'md5:bd758000fb93f3159339c852b5b9133c',
+            'ext': 'mp3',
+            'duration': 52.82,
+            'uploader': 'Brad',
+            'uploader_id': '1',
+            'thumbnail': 'https://cdn.whyp.it/a537bb36-3373-4c61-96c8-27fc1b2f427a.jpg',
+        },
+    }, {
+        'url': 'https://www.whyp.it/tracks/18337',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        unique_id = self._match_id(url)
+        webpage = self._download_webpage(url, unique_id)
+        data = self._search_nuxt_data(webpage, unique_id)['rawTrack']
+
+        return {
+            'url': data['audio_url'],
+            'id': unique_id,
+            **traverse_obj(data, {
+                'title': 'title',
+                'description': 'description',
+                'duration': ('duration', {float_or_none}),
+                'uploader': ('user', 'username'),
+                'uploader_id': ('user', 'id', {str_or_none}),
+                'thumbnail': ('artwork_url', {url_or_none}),
+            }),
+            'ext': 'mp3',
+            'vcodec': 'none',
+            'http_headers': {'Referer': 'https://whyp.it/'},
+        }

From cbdf9408e6f1e35e98fd6477b3d6902df5b8a47f Mon Sep 17 00:00:00 2001
From: zhgwn <130610452+zhgwn@users.noreply.github.com>
Date: Tue, 18 Apr 2023 04:18:29 +0200
Subject: [PATCH 64/97] [extractor/pornez] Support new URL formats (#6792)

Closes #6791, Closes #6298
Authored by: zhgwn
---
 yt_dlp/extractor/pornez.py | 64 ++++++++++++++++++++++++--------------
 1 file changed, 41 insertions(+), 23 deletions(-)

diff --git a/yt_dlp/extractor/pornez.py b/yt_dlp/extractor/pornez.py
index 3a22cb821..bc45f865e 100644
--- a/yt_dlp/extractor/pornez.py
+++ b/yt_dlp/extractor/pornez.py
@@ -1,42 +1,60 @@
 from .common import InfoExtractor
-from ..utils import int_or_none, urljoin
+from ..utils import (
+    clean_html,
+    int_or_none,
+    get_element_by_class,
+    urljoin,
+)
 
 
 class PornezIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?pornez\.net/video(?P<id>[0-9]+)/'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?pornez\.net/(?:video(?P<id>\w+)|watch)/'
+    _TESTS = [{
         'url': 'https://pornez.net/video344819/mistresst-funny_penis_names-wmv/',
-        'md5': '2e19a0a1cff3a5dbea0ef1b9e80bcbbc',
         'info_dict': {
             'id': '344819',
             'ext': 'mp4',
-            'title': r'mistresst funny_penis_names wmv',
+            'title': 'mistresst funny_penis_names wmv',
             'thumbnail': r're:^https?://.*\.jpg$',
             'age_limit': 18,
-        }
-    }
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://pornez.net/watch/leana+lovings+stiff+for+stepdaughter/',
+        'info_dict': {
+            'id': '156161',
+            'ext': 'mp4',
+            'title': 'Watch leana lovings stiff for stepdaughter porn video.',
+            'age_limit': 18,
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://pornez.net/videovzs27fj/tutor4k-e14-blue-wave-1080p-nbq-tutor4k-e14-blue-wave/',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        iframe_src = self._html_search_regex(
-            r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe', fatal=True)
-        iframe_src = urljoin('https://pornez.net', iframe_src)
-        title = self._html_search_meta(['name', 'twitter:title', 'og:title'], webpage, 'title', default=None)
-        if title is None:
-            title = self._search_regex(r'<h1>(.*?)</h1>', webpage, 'title', fatal=True)
-        thumbnail = self._html_search_meta(['thumbnailUrl'], webpage, 'title', default=None)
-        webpage = self._download_webpage(iframe_src, video_id)
-        entries = self._parse_html5_media_entries(iframe_src, webpage, video_id)[0]
-        for format in entries['formats']:
-            height = self._search_regex(r'_(\d+)\.m3u8', format['url'], 'height')
-            format['format_id'] = '%sp' % height
-            format['height'] = int_or_none(height)
+        if not video_id:
+            video_id = self._search_regex(
+                r'<link[^>]+\bhref=["\']https?://pornez.net/\?p=(\w+)["\']', webpage, 'id')
+
+        iframe_src = self._html_search_regex(r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe')
+        iframe = self._download_webpage(urljoin('https://pornez.net', iframe_src), video_id)
+
+        entries = self._parse_html5_media_entries(iframe_src, iframe, video_id)[0]
+        for fmt in entries['formats']:
+            height = self._search_regex(r'_(\d+)\.m3u8', fmt['url'], 'height')
+            fmt['format_id'] = '%sp' % height
+            fmt['height'] = int_or_none(height)
 
         entries.update({
             'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'age_limit': 18
+            'title': (clean_html(get_element_by_class('video-title', webpage))
+                      or self._html_search_meta(
+                      ['twitter:title', 'og:title', 'description'], webpage, 'title', default=None)),
+            'thumbnail': self._html_search_meta(['thumbnailUrl'], webpage, 'thumb', default=None),
+            'age_limit': 18,
         })
         return entries

From e5265dc6517478e589ee3c1ff0cb19bdf4e35ce1 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Mon, 17 Apr 2023 21:27:33 -0500
Subject: [PATCH 65/97] [extractor/stageplus] Add extractor (#6838)

Closes #6806
Authored by: bashonly
---
 yt_dlp/extractor/_extractors.py |   1 +
 yt_dlp/extractor/stageplus.py   | 518 ++++++++++++++++++++++++++++++++
 2 files changed, 519 insertions(+)
 create mode 100644 yt_dlp/extractor/stageplus.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index b08b3095e..deb92b5fc 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1786,6 +1786,7 @@
     BellatorIE,
     ParamountNetworkIE,
 )
+from .stageplus import StagePlusVODConcertIE
 from .startrek import StarTrekIE
 from .stitcher import (
     StitcherIE,
diff --git a/yt_dlp/extractor/stageplus.py b/yt_dlp/extractor/stageplus.py
new file mode 100644
index 000000000..adb4ebbc2
--- /dev/null
+++ b/yt_dlp/extractor/stageplus.py
@@ -0,0 +1,518 @@
+import json
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    traverse_obj,
+    try_call,
+    unified_timestamp,
+    url_or_none,
+)
+
+
+class StagePlusVODConcertIE(InfoExtractor):
+    _NETRC_MACHINE = 'stageplus'
+    _VALID_URL = r'https?://(?:www\.)?stage-plus\.com/video/(?P<id>vod_concert_\w+)'
+    _TESTS = [{
+        'url': 'https://www.stage-plus.com/video/vod_concert_APNM8GRFDPHMASJKBSPJACG',
+        'playlist_count': 6,
+        'info_dict': {
+            'id': 'vod_concert_APNM8GRFDPHMASJKBSPJACG',
+            'title': 'Yuja Wang plays Rachmaninoff\'s Piano Concerto No. 2 – from Odeonsplatz',
+            'description': 'md5:50f78ec180518c9bdb876bac550996fc',
+            'artist': ['Yuja Wang', 'Lorenzo Viotti'],
+            'upload_date': '20230331',
+            'timestamp': 1680249600,
+            'release_date': '20210709',
+            'release_timestamp': 1625788800,
+            'thumbnails': 'count:3',
+        },
+        'playlist': [{
+            'info_dict': {
+                'id': 'performance_work_A1IN4PJFE9MM2RJ3CLBMUSJBBSOJAD9O',
+                'ext': 'mp4',
+                'title': 'Piano Concerto No. 2 in C Minor, Op. 18',
+                'description': 'md5:50f78ec180518c9bdb876bac550996fc',
+                'upload_date': '20230331',
+                'timestamp': 1680249600,
+                'release_date': '20210709',
+                'release_timestamp': 1625788800,
+                'duration': 2207,
+                'chapters': 'count:5',
+                'artist': ['Yuja Wang'],
+                'composer': ['Sergei Rachmaninoff'],
+                'album': 'Yuja Wang plays Rachmaninoff\'s Piano Concerto No. 2 – from Odeonsplatz',
+                'album_artist': ['Yuja Wang', 'Lorenzo Viotti'],
+                'track': 'Piano Concerto No. 2 in C Minor, Op. 18',
+                'track_number': 1,
+                'genre': 'Instrumental Concerto',
+            },
+        }],
+        'params': {'skip_download': 'm3u8'},
+    }]
+
+    # TODO: Prune this after livestream and/or album extractors are added
+    _GRAPHQL_QUERY = '''query videoDetailPage($videoId: ID!, $sliderItemsFirst: Int = 24) {
+  node(id: $videoId) {
+    __typename
+    ...LiveConcertFields
+    ... on LiveConcert {
+      artists {
+        edges {
+          role {
+            ...RoleFields
+          }
+          node {
+            id
+            name
+            sortName
+          }
+        }
+      }
+      isAtmos
+      maxResolution
+      groups {
+        id
+        name
+        typeDisplayName
+      }
+      shortDescription
+      performanceWorks {
+        ...livePerformanceWorkFields
+      }
+      totalDuration
+      sliders {
+        ...contentContainerFields
+      }
+      vodConcert {
+        __typename
+        id
+      }
+    }
+    ...VideoFields
+    ... on Video {
+      artists {
+        edges {
+          role {
+            ...RoleFields
+          }
+          node {
+            id
+            name
+            sortName
+          }
+        }
+      }
+      isAtmos
+      maxResolution
+      isLossless
+      description
+      productionDate
+      takedownDate
+      sliders {
+        ...contentContainerFields
+      }
+    }
+    ...VodConcertFields
+    ... on VodConcert {
+      artists {
+        edges {
+          role {
+            ...RoleFields
+          }
+          node {
+            id
+            name
+            sortName
+          }
+        }
+      }
+      isAtmos
+      maxResolution
+      groups {
+        id
+        name
+        typeDisplayName
+      }
+      performanceWorks {
+        ...PerformanceWorkFields
+      }
+      shortDescription
+      productionDate
+      takedownDate
+      sliders {
+        ...contentContainerFields
+      }
+    }
+  }
+}
+
+fragment LiveConcertFields on LiveConcert {
+  endTime
+  id
+  pictures {
+    ...PictureFields
+  }
+  reruns {
+    ...liveConcertRerunFields
+  }
+  publicationLevel
+  startTime
+  streamStartTime
+  subtitle
+  title
+  typeDisplayName
+  stream {
+    ...liveStreamFields
+  }
+  trailerStream {
+    ...streamFields
+  }
+  geoAccessCountries
+  geoAccessMode
+}
+
+fragment PictureFields on Picture {
+  id
+  url
+  type
+}
+
+fragment liveConcertRerunFields on LiveConcertRerun {
+  streamStartTime
+  endTime
+  startTime
+  stream {
+    ...rerunStreamFields
+  }
+}
+
+fragment rerunStreamFields on RerunStream {
+  publicationLevel
+  streamType
+  url
+}
+
+fragment liveStreamFields on LiveStream {
+  publicationLevel
+  streamType
+  url
+}
+
+fragment streamFields on Stream {
+  publicationLevel
+  streamType
+  url
+}
+
+fragment RoleFields on Role {
+  __typename
+  id
+  type
+  displayName
+}
+
+fragment livePerformanceWorkFields on LivePerformanceWork {
+  __typename
+  id
+  artists {
+    ...artistWithRoleFields
+  }
+  groups {
+    edges {
+      node {
+        id
+        name
+        typeDisplayName
+      }
+    }
+  }
+  work {
+    ...workFields
+  }
+}
+
+fragment artistWithRoleFields on ArtistWithRoleConnection {
+  edges {
+    role {
+      ...RoleFields
+    }
+    node {
+      id
+      name
+      sortName
+    }
+  }
+}
+
+fragment workFields on Work {
+  id
+  title
+  movements {
+    id
+    title
+  }
+  composers {
+    id
+    name
+  }
+  genre {
+    id
+    title
+  }
+}
+
+fragment contentContainerFields on CuratedContentContainer {
+  __typename
+  ...SliderFields
+  ...BannerFields
+}
+
+fragment SliderFields on Slider {
+  id
+  headline
+  items(first: $sliderItemsFirst) {
+    edges {
+      node {
+        id
+        __typename
+        ...AlbumFields
+        ...ArtistFields
+        ...EpochFields
+        ...GenreFields
+        ...GroupFields
+        ...LiveConcertFields
+        ...PartnerFields
+        ...PerformanceWorkFields
+        ...VideoFields
+        ...VodConcertFields
+      }
+    }
+  }
+}
+
+fragment AlbumFields on Album {
+  artistAndGroupDisplayInfo
+  id
+  pictures {
+    ...PictureFields
+  }
+  title
+}
+
+fragment ArtistFields on Artist {
+  id
+  name
+  roles {
+    ...RoleFields
+  }
+  pictures {
+    ...PictureFields
+  }
+}
+
+fragment EpochFields on Epoch {
+  id
+  endYear
+  pictures {
+    ...PictureFields
+  }
+  startYear
+  title
+}
+
+fragment GenreFields on Genre {
+  id
+  pictures {
+    ...PictureFields
+  }
+  title
+}
+
+fragment GroupFields on Group {
+  id
+  name
+  typeDisplayName
+  pictures {
+    ...PictureFields
+  }
+}
+
+fragment PartnerFields on Partner {
+  id
+  name
+  typeDisplayName
+  subtypeDisplayName
+  pictures {
+    ...PictureFields
+  }
+}
+
+fragment PerformanceWorkFields on PerformanceWork {
+  __typename
+  id
+  artists {
+    ...artistWithRoleFields
+  }
+  groups {
+    edges {
+      node {
+        id
+        name
+        typeDisplayName
+      }
+    }
+  }
+  work {
+    ...workFields
+  }
+  stream {
+    ...streamFields
+  }
+  vodConcert {
+    __typename
+    id
+  }
+  duration
+  cuePoints {
+    mark
+    title
+  }
+}
+
+fragment VideoFields on Video {
+  id
+  archiveReleaseDate
+  title
+  subtitle
+  pictures {
+    ...PictureFields
+  }
+  stream {
+    ...streamFields
+  }
+  trailerStream {
+    ...streamFields
+  }
+  duration
+  typeDisplayName
+  duration
+  geoAccessCountries
+  geoAccessMode
+  publicationLevel
+  takedownDate
+}
+
+fragment VodConcertFields on VodConcert {
+  id
+  archiveReleaseDate
+  pictures {
+    ...PictureFields
+  }
+  subtitle
+  title
+  typeDisplayName
+  totalDuration
+  geoAccessCountries
+  geoAccessMode
+  trailerStream {
+   ...streamFields
+  }
+  publicationLevel
+  takedownDate
+}
+
+fragment BannerFields on Banner {
+  description
+  link
+  pictures {
+    ...PictureFields
+  }
+  title
+}'''
+
+    _TOKEN = None
+
+    def _perform_login(self, username, password):
+        auth = self._download_json('https://audience.api.stageplus.io/oauth/token', None, headers={
+            'Content-Type': 'application/json',
+            'Origin': 'https://www.stage-plus.com',
+        }, data=json.dumps({
+            'grant_type': 'password',
+            'username': username,
+            'password': password,
+            'device_info': 'Chrome (Windows)',
+            'client_device_id': str(uuid.uuid4()),
+        }, separators=(',', ':')).encode(), note='Logging in')
+
+        if auth.get('access_token'):
+            self._TOKEN = auth['access_token']
+
+    def _real_initialize(self):
+        if self._TOKEN:
+            return
+
+        self._TOKEN = try_call(
+            lambda: self._get_cookies('https://www.stage-plus.com/')['dgplus_access_token'].value)
+        if not self._TOKEN:
+            self.raise_login_required()
+
+    def _real_extract(self, url):
+        concert_id = self._match_id(url)
+
+        data = self._download_json('https://audience.api.stageplus.io/graphql', concert_id, headers={
+            'authorization': f'Bearer {self._TOKEN}',
+            'content-type': 'application/json',
+            'Origin': 'https://www.stage-plus.com',
+        }, data=json.dumps({
+            'query': self._GRAPHQL_QUERY,
+            'variables': {'videoId': concert_id},
+            'operationName': 'videoDetailPage'
+        }, separators=(',', ':')).encode())['data']['node']
+
+        metadata = traverse_obj(data, {
+            'title': 'title',
+            'description': ('shortDescription', {str}),
+            'artist': ('artists', 'edges', ..., 'node', 'name'),
+            'timestamp': ('archiveReleaseDate', {unified_timestamp}),
+            'release_timestamp': ('productionDate', {unified_timestamp}),
+        })
+
+        thumbnails = traverse_obj(data, ('pictures', lambda _, v: url_or_none(v['url']), {
+            'id': 'name',
+            'url': 'url',
+        })) or None
+
+        m3u8_headers = {'jwt': self._TOKEN}
+
+        entries = []
+        for idx, video in enumerate(traverse_obj(data, (
+                'performanceWorks', lambda _, v: v['id'] and url_or_none(v['stream']['url']))), 1):
+            formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+                video['stream']['url'], video['id'], 'mp4', m3u8_id='hls', headers=m3u8_headers)
+            entries.append({
+                'id': video['id'],
+                'formats': formats,
+                'subtitles': subtitles,
+                'http_headers': m3u8_headers,
+                'album': metadata.get('title'),
+                'album_artist': metadata.get('artist'),
+                'track_number': idx,
+                **metadata,
+                **traverse_obj(video, {
+                    'title': ('work', 'title'),
+                    'track': ('work', 'title'),
+                    'duration': ('duration', {float_or_none}),
+                    'chapters': (
+                        'cuePoints', lambda _, v: float_or_none(v['mark']) is not None, {
+                            'title': 'title',
+                            'start_time': ('mark', {float_or_none}),
+                        }),
+                    'artist': ('artists', 'edges', ..., 'node', 'name'),
+                    'composer': ('work', 'composers', ..., 'name'),
+                    'genre': ('work', 'genre', 'title'),
+                }),
+            })
+
+        return self.playlist_result(entries, concert_id, thumbnails=thumbnails, **metadata)

From ab29e47029e2f5b48abbbab78e82faf7cf6e9506 Mon Sep 17 00:00:00 2001
From: qbnu <93988953+qbnu@users.noreply.github.com>
Date: Tue, 18 Apr 2023 02:37:37 +0000
Subject: [PATCH 66/97] [extractor/bilibili] Support festival videos (#6547)

Closes #6138
Authored by: qbnu
---
 yt_dlp/extractor/bilibili.py | 86 ++++++++++++++++++++++++++++++------
 1 file changed, 73 insertions(+), 13 deletions(-)

diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index 91d436dd8..faa2218ce 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -134,7 +134,7 @@ def _get_all_children(self, reply):
 
 
 class BiliBiliIE(BilibiliBaseIE):
-    _VALID_URL = r'https?://www\.bilibili\.com/video/[aAbB][vV](?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
 
     _TESTS = [{
         'url': 'https://www.bilibili.com/video/BV13x41117TL',
@@ -282,19 +282,60 @@ class BiliBiliIE(BilibiliBaseIE):
             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
         },
         'params': {'skip_download': True},
+    }, {
+        'note': 'video redirects to festival page',
+        'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
+        'info_dict': {
+            'id': 'BV1wP4y1P72h',
+            'ext': 'mp4',
+            'title': '牛虎年相交之际，一首传统民族打击乐《牛斗虎》祝大家新春快乐，虎年大吉！【bilibili音乐虎闹新春】',
+            'timestamp': 1643947497,
+            'upload_date': '20220204',
+            'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
+            'uploader': '叨叨冯聊音乐',
+            'duration': 246.719,
+            'uploader_id': '528182630',
+            'view_count': int,
+            'like_count': int,
+            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+        },
+        'params': {'skip_download': True},
+    }, {
+        'note': 'newer festival video',
+        'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
+        'info_dict': {
+            'id': 'BV1ay4y1d77f',
+            'ext': 'mp4',
+            'title': '【崩坏3新春剧场】为特别的你送上祝福！',
+            'timestamp': 1674273600,
+            'upload_date': '20230121',
+            'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
+            'uploader': '果蝇轰',
+            'duration': 1111.722,
+            'uploader_id': '8469526',
+            'view_count': int,
+            'like_count': int,
+            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+        },
+        'params': {'skip_download': True},
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
-        play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
 
-        video_data = initial_state['videoData']
+        is_festival = 'videoData' not in initial_state
+        if is_festival:
+            video_data = initial_state['videoInfo']
+        else:
+            play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
+            video_data = initial_state['videoData']
+
         video_id, title = video_data['bvid'], video_data.get('title')
 
         # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
-        page_list_json = traverse_obj(
+        page_list_json = not is_festival and traverse_obj(
             self._download_json(
                 'https://api.bilibili.com/x/player/pagelist', video_id,
                 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
@@ -317,20 +358,39 @@ def _real_extract(self, url):
 
         cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
 
+        festival_info = {}
+        if is_festival:
+            play_info = self._download_json(
+                'https://api.bilibili.com/x/player/playurl', video_id,
+                query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
+                note='Extracting festival video formats')['data']
+
+            festival_info = traverse_obj(initial_state, {
+                'uploader': ('videoInfo', 'upName'),
+                'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
+                'like_count': ('videoStatus', 'like', {int_or_none}),
+                'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
+            }, get_all=False)
+
         return {
+            **traverse_obj(initial_state, {
+                'uploader': ('upData', 'name'),
+                'uploader_id': ('upData', 'mid', {str_or_none}),
+                'like_count': ('videoData', 'stat', 'like', {int_or_none}),
+                'tags': ('tags', ..., 'tag_name'),
+                'thumbnail': ('videoData', 'pic', {url_or_none}),
+            }),
+            **festival_info,
+            **traverse_obj(video_data, {
+                'description': 'desc',
+                'timestamp': ('pubdate', {int_or_none}),
+                'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
+                'comment_count': ('stat', 'reply', {int_or_none}),
+            }, get_all=False),
             'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
             'formats': self.extract_formats(play_info),
             '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
             'title': title,
-            'description': traverse_obj(initial_state, ('videoData', 'desc')),
-            'view_count': traverse_obj(initial_state, ('videoData', 'stat', 'view')),
-            'uploader': traverse_obj(initial_state, ('upData', 'name')),
-            'uploader_id': traverse_obj(initial_state, ('upData', 'mid')),
-            'like_count': traverse_obj(initial_state, ('videoData', 'stat', 'like')),
-            'comment_count': traverse_obj(initial_state, ('videoData', 'stat', 'reply')),
-            'tags': traverse_obj(initial_state, ('tags', ..., 'tag_name')),
-            'thumbnail': traverse_obj(initial_state, ('videoData', 'pic')),
-            'timestamp': traverse_obj(initial_state, ('videoData', 'pubdate')),
             'duration': float_or_none(play_info.get('timelength'), scale=1000),
             'chapters': self._get_chapters(aid, cid),
             'subtitles': self.extract_subtitles(video_id, aid, cid),

From 6a765f135ccb654861336ea27a2c1c24ea8e286f Mon Sep 17 00:00:00 2001
From: vidiot720 <128325907+vidiot720@users.noreply.github.com>
Date: Wed, 19 Apr 2023 09:46:57 +1000
Subject: [PATCH 67/97] [extractor/sbs] Overhaul extractor for new API (#6839)

Closes #6543
Authored by: vidiot720, dirkf, bashonly
---
 yt_dlp/extractor/sbs.py | 109 ++++++++++++++++++++++++++++++----------
 yt_dlp/utils.py         |   4 ++
 2 files changed, 86 insertions(+), 27 deletions(-)

diff --git a/yt_dlp/extractor/sbs.py b/yt_dlp/extractor/sbs.py
index 45320339d..ac0b6de20 100644
--- a/yt_dlp/extractor/sbs.py
+++ b/yt_dlp/extractor/sbs.py
@@ -1,7 +1,13 @@
 from .common import InfoExtractor
 from ..utils import (
-    smuggle_url,
-    ExtractorError,
+    HEADRequest,
+    float_or_none,
+    int_or_none,
+    parse_duration,
+    parse_iso8601,
+    traverse_obj,
+    update_url_query,
+    url_or_none,
 )
 
 
@@ -11,7 +17,7 @@ class SBSIE(InfoExtractor):
         https?://(?:www\.)?sbs\.com\.au/(?:
             ondemand(?:
                 /video/(?:single/)?|
-                /movie/[^/]+/|
+                /(?:movie|tv-program)/[^/]+/|
                 /(?:tv|news)-series/(?:[^/]+/){3}|
                 .*?\bplay=|/watch/
             )|news/(?:embeds/)?video/
@@ -27,18 +33,21 @@ class SBSIE(InfoExtractor):
         # Original URL is handled by the generic IE which finds the iframe:
         # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation
         'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
-        'md5': '3150cf278965eeabb5b4cea1c963fe0a',
+        'md5': '31f84a7a19b53635db63c73f8ab0c4a7',
         'info_dict': {
-            'id': '_rFBPRPO4pMR',
+            'id': '320403011771',  # '_rFBPRPO4pMR',
             'ext': 'mp4',
             'title': 'Dingo Conservation (The Feed)',
             'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
-            'thumbnail': r're:http://.*\.jpg',
+            'thumbnail': r're:https?://.*\.jpg',
             'duration': 308,
             'timestamp': 1408613220,
             'upload_date': '20140821',
             'uploader': 'SBSC',
+            'tags': None,
+            'categories': None,
         },
+        'expected_warnings': ['Unable to download JSON metadata'],
     }, {
         'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
         'only_matching': True,
@@ -70,34 +79,80 @@ class SBSIE(InfoExtractor):
     }, {
         'url': 'https://www.sbs.com.au/ondemand/tv-series/the-handmaids-tale/season-5/the-handmaids-tale-s5-ep1/2065631811776',
         'only_matching': True,
+    }, {
+        'url': 'https://www.sbs.com.au/ondemand/tv-program/autun-romes-forgotten-sister/2116212803602',
+        'only_matching': True,
     }]
 
+    _GEO_COUNTRIES = ['AU']
+    _AUS_TV_PARENTAL_GUIDELINES = {
+        'P': 0,
+        'C': 7,
+        'G': 0,
+        'PG': 0,
+        'M': 14,
+        'MA15+': 15,
+        'MAV15+': 15,
+        'R18+': 18,
+    }
+    _PLAYER_API = 'https://www.sbs.com.au/api/v3'
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        player_params = self._download_json(
-            'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id)
+        formats, subtitles = self._extract_smil_formats_and_subtitles(
+            update_url_query(f'{self._PLAYER_API}/video_smil', {'id': video_id}), video_id)
 
-        error = player_params.get('error')
-        if error:
-            error_message = 'Sorry, The video you are looking for does not exist.'
-            video_data = error.get('results') or {}
-            error_code = error.get('errorCode')
-            if error_code == 'ComingSoon':
-                error_message = '%s is not yet available.' % video_data.get('title', '')
-            elif error_code in ('Forbidden', 'intranetAccessOnly'):
-                error_message = 'Sorry, This video cannot be accessed via this website'
-            elif error_code == 'Expired':
-                error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '')
-            raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
+        if not formats:
+            urlh = self._request_webpage(
+                HEADRequest('https://sbs-vod-prod-01.akamaized.net/'), video_id,
+                note='Checking geo-restriction', fatal=False, expected_status=403)
+            if urlh:
+                error_reasons = urlh.headers.get_all('x-error-reason') or []
+                if 'geo-blocked' in error_reasons:
+                    self.raise_geo_restricted(countries=['AU'])
+            self.raise_no_formats('No formats are available', video_id=video_id)
 
-        urls = player_params['releaseUrls']
-        theplatform_url = (urls.get('progressive') or urls.get('html')
-                           or urls.get('standard') or player_params['relatedItemsURL'])
+        media = traverse_obj(self._download_json(
+            f'{self._PLAYER_API}/video_stream', video_id, fatal=False,
+            query={'id': video_id, 'context': 'tv'}), ('video_object', {dict})) or {}
+
+        media.update(self._download_json(
+            f'https://catalogue.pr.sbsod.com/mpx-media/{video_id}',
+            video_id, fatal=not media) or {})
+
+        # For named episodes, use the catalogue's title to set episode, rather than generic 'Episode N'.
+        if traverse_obj(media, ('partOfSeries', {dict})):
+            media['epName'] = traverse_obj(media, ('title', {str}))
 
         return {
-            '_type': 'url_transparent',
-            'ie_key': 'ThePlatform',
             'id': video_id,
-            'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}),
-            'is_live': player_params.get('streamType') == 'live',
+            **traverse_obj(media, {
+                'title': ('name', {str}),
+                'description': ('description', {str}),
+                'channel': ('taxonomy', 'channel', 'name', {str}),
+                'series': ((('partOfSeries', 'name'), 'seriesTitle'), {str}),
+                'series_id': ((('partOfSeries', 'uuid'), 'seriesID'), {str}),
+                'season_number': ('seasonNumber', {int_or_none}),
+                'episode': ('epName', {str}),
+                'episode_number': ('episodeNumber', {int_or_none}),
+                'timestamp': (('datePublished', ('publication', 'startDate')), {parse_iso8601}),
+                'release_year': ('releaseYear', {int_or_none}),
+                'duration': ('duration', ({float_or_none}, {parse_duration})),
+                'is_live': ('liveStream', {bool}),
+                'age_limit': (
+                    ('classificationID', 'contentRating'), {str.upper}, {self._AUS_TV_PARENTAL_GUIDELINES.get}),
+            }, get_all=False),
+            **traverse_obj(media, {
+                'categories': (('genres', ...), ('taxonomy', ('genre', 'subgenre'), 'name'), {str}),
+                'tags': (('consumerAdviceTexts', ('sbsSubCertification', 'consumerAdvice')), ..., {str}),
+                'thumbnails': ('thumbnails', lambda _, v: url_or_none(v['contentUrl']), {
+                    'id': ('name', {str}),
+                    'url': 'contentUrl',
+                    'width': ('width', {int_or_none}),
+                    'height': ('height', {int_or_none}),
+                }),
+            }),
+            'formats': formats,
+            'subtitles': subtitles,
+            'uploader': 'SBSC',
         }
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 40533c2cb..746a2885d 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -4093,6 +4093,10 @@ def data(self, data):
         def close(self):
             return self._out.strip()
 
+    # Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
+    # This will not trigger false positives since only UTF-8 text is being replaced
+    dfxp_data = dfxp_data.replace(b'encoding=\'UTF-16\'', b'encoding=\'UTF-8\'')
+
     def parse_node(node):
         target = TTMLPElementParser()
         parser = xml.etree.ElementTree.XMLParser(target=target)

From 8f0be90ecb3b8d862397177bb226f17b245ef933 Mon Sep 17 00:00:00 2001
From: garret <76261416+garret1317@users.noreply.github.com>
Date: Wed, 19 Apr 2023 05:21:24 +0100
Subject: [PATCH 68/97] [extractor/nhk] Add `NhkRadiru` extractor (#6819)

* Add `NhkRadioNewsPage` extractor

Authored by: garret1317
---
 yt_dlp/extractor/_extractors.py |   2 +
 yt_dlp/extractor/nhk.py         | 140 +++++++++++++++++++++++++++++++-
 2 files changed, 141 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index deb92b5fc..58137d7f6 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1232,6 +1232,8 @@
     NhkForSchoolBangumiIE,
     NhkForSchoolSubjectIE,
     NhkForSchoolProgramListIE,
+    NhkRadioNewsPageIE,
+    NhkRadiruIE,
 )
 from .nhl import NHLIE
 from .nick import (
diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py
index 59702b247..1597962ac 100644
--- a/yt_dlp/extractor/nhk.py
+++ b/yt_dlp/extractor/nhk.py
@@ -6,7 +6,8 @@
     traverse_obj,
     unescapeHTML,
     unified_timestamp,
-    urljoin
+    urljoin,
+    url_or_none
 )
 
 
@@ -334,3 +335,140 @@ def _real_extract(self, url):
             for x in traverse_obj(bangumi_list, ('part', ..., 'part-video-dasid')) or []]
 
         return self.playlist_result(bangumis, program_id, title, description)
+
+
+class NhkRadiruIE(InfoExtractor):
+    _GEO_COUNTRIES = ['JP']
+    IE_DESC = 'NHK らじる (Radiru/Rajiru)'
+    _VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
+    _TESTS = [{
+        'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3853544',
+        'skip': 'Episode expired on 2023-04-16',
+        'info_dict': {
+            'channel': 'NHK-FM',
+            'description': 'md5:94b08bdeadde81a97df4ec882acce3e9',
+            'ext': 'm4a',
+            'id': '0449_01_3853544',
+            'series': 'ジャズ・トゥナイト',
+            'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
+            'timestamp': 1680969600,
+            'title': 'ジャズ・トゥナイト　ＮＥＷジャズ特集',
+            'upload_date': '20230408',
+            'release_timestamp': 1680962400,
+            'release_date': '20230408',
+            'was_live': True,
+        },
+    }, {
+        # playlist, airs every weekday so it should _hopefully_ be okay forever
+        'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=0458_01',
+        'info_dict': {
+            'id': '0458_01',
+            'title': 'ベストオブクラシック',
+            'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
+            'channel': 'NHK-FM',
+            'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
+        },
+        'playlist_mincount': 3,
+    }, {
+        # one with letters in the id
+        'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F300_06_3738470',
+        'note': 'Expires on 2024-03-31',
+        'info_dict': {
+            'id': 'F300_06_3738470',
+            'ext': 'm4a',
+            'title': '有島武郎「一房のぶどう」',
+            'description': '朗読：川野一宇（ラジオ深夜便アンカー）\r\n\r\n（2016年12月8日放送「ラジオ深夜便『アンカー朗読シリーズ』」より）',
+            'channel': 'NHKラジオ第1、NHK-FM',
+            'timestamp': 1635757200,
+            'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F300/img/corner/box_109_thumbnail.jpg',
+            'release_date': '20161207',
+            'series': 'らじる文庫 by ラジオ深夜便 ',
+            'release_timestamp': 1481126700,
+            'upload_date': '20211101',
+        }
+    }, {
+        # news
+        'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
+        'skip': 'Expires on 2023-04-17',
+        'info_dict': {
+            'id': 'F261_01_3855109',
+            'ext': 'm4a',
+            'channel': 'NHKラジオ第1',
+            'timestamp': 1681635900,
+            'release_date': '20230416',
+            'series': 'NHKラジオニュース',
+            'title': '午後６時のNHKニュース',
+            'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
+            'upload_date': '20230416',
+            'release_timestamp': 1681635600,
+        },
+    }]
+
+    def _extract_episode_info(self, headline, programme_id, series_meta):
+        episode_id = f'{programme_id}_{headline["headline_id"]}'
+        episode = traverse_obj(headline, ('file_list', 0, {dict}))
+
+        return {
+            **series_meta,
+            'id': episode_id,
+            'formats': self._extract_m3u8_formats(episode.get('file_name'), episode_id, fatal=False),
+            'container': 'm4a_dash',  # force fixup, AAC-only HLS
+            'was_live': True,
+            'series': series_meta.get('title'),
+            'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
+            **traverse_obj(episode, {
+                'title': 'file_title',
+                'description': 'file_title_sub',
+                'timestamp': ('open_time', {unified_timestamp}),
+                'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
+            }),
+        }
+
+    def _real_extract(self, url):
+        site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
+        programme_id = f'{site_id}_{corner_id}'
+
+        if site_id == 'F261':
+            json_url = 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json'
+        else:
+            json_url = f'https://www.nhk.or.jp/radioondemand/json/{site_id}/bangumi_{programme_id}.json'
+
+        meta = self._download_json(json_url, programme_id)['main']
+
+        series_meta = traverse_obj(meta, {
+            'title': 'program_name',
+            'channel': 'media_name',
+            'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}),
+        }, get_all=False)
+
+        if headline_id:
+            return self._extract_episode_info(
+                traverse_obj(meta, (
+                    'detail_list', lambda _, v: v['headline_id'] == headline_id), get_all=False),
+                programme_id, series_meta)
+
+        def entries():
+            for headline in traverse_obj(meta, ('detail_list', ..., {dict})):
+                yield self._extract_episode_info(headline, programme_id, series_meta)
+
+        return self.playlist_result(
+            entries(), programme_id, playlist_description=meta.get('site_detail'), **series_meta)
+
+
+class NhkRadioNewsPageIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.nhk\.or\.jp/radionews/?(?:$|[?#])'
+    _TESTS = [{
+        # airs daily, on-the-hour most hours
+        'url': 'https://www.nhk.or.jp/radionews/',
+        'playlist_mincount': 5,
+        'info_dict': {
+            'id': 'F261_01',
+            'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
+            'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d',
+            'channel': 'NHKラジオ第1',
+            'title': 'NHKラジオニュース',
+        }
+    }]
+
+    def _real_extract(self, url):
+        return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE)

From 1ea15603d852971ed7d92f4de12808b27b3d9370 Mon Sep 17 00:00:00 2001
From: truedread <truedread11@gmail.com>
Date: Fri, 21 Apr 2023 20:11:51 -0400
Subject: [PATCH 69/97] [extractor/wevidi] Add extractor (#6868)

Closes #6129
Authored by: truedread
---
 yt_dlp/extractor/_extractors.py |   1 +
 yt_dlp/extractor/wevidi.py      | 108 ++++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+)
 create mode 100644 yt_dlp/extractor/wevidi.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 58137d7f6..a81682e43 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -2298,6 +2298,7 @@
     WeiboMobileIE
 )
 from .weiqitv import WeiqiTVIE
+from .wevidi import WeVidiIE
 from .whyp import WhypIE
 from .wikimedia import WikimediaIE
 from .willow import WillowIE
diff --git a/yt_dlp/extractor/wevidi.py b/yt_dlp/extractor/wevidi.py
new file mode 100644
index 000000000..3b6d03238
--- /dev/null
+++ b/yt_dlp/extractor/wevidi.py
@@ -0,0 +1,108 @@
+from .common import InfoExtractor
+from ..utils import clean_html, float_or_none, get_element_by_class, js_to_json, traverse_obj
+
+
+class WeVidiIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?wevidi\.net/watch/(?P<id>[\w-]{11})'
+    _TESTS = [{
+        'url': 'https://wevidi.net/watch/2th7UO5F4KV',
+        'md5': 'b913d1ff5bbad499e2c7ef4aa6d829d7',
+        'info_dict': {
+            'id': '2th7UO5F4KV',
+            'ext': 'mp4',
+            'title': 'YouTube Alternative: WeVidi - customizable channels & more',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'md5:73a27d0a87d49fbcc5584566326ebeed',
+            'uploader': 'eclecRC',
+            'duration': 932.098,
+        }
+    }, {
+        'url': 'https://wevidi.net/watch/ievRuuQHbPS',
+        'md5': 'ce8a94989a959bff9003fa27ee572935',
+        'info_dict': {
+            'id': 'ievRuuQHbPS',
+            'ext': 'mp4',
+            'title': 'WeVidi Playlists',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'md5:32cdfca272687390d9bd9b0c9c6153ee',
+            'uploader': 'WeVidi',
+            'duration': 36.1999,
+        }
+    }, {
+        'url': 'https://wevidi.net/watch/PcMzDWaQSWb',
+        'md5': '55ee0d3434be5d9e5cc76b83f2bb57ec',
+        'info_dict': {
+            'id': 'PcMzDWaQSWb',
+            'ext': 'mp4',
+            'title': 'Cat blep',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'md5:e2c9e2b54b8bb424cc64937c8fdc068f',
+            'uploader': 'WeVidi',
+            'duration': 41.972,
+        }
+    }, {
+        'url': 'https://wevidi.net/watch/wJnRqDHNe_u',
+        'md5': 'c8f263dd47e66cc17546b3abf47b5a77',
+        'info_dict': {
+            'id': 'wJnRqDHNe_u',
+            'ext': 'mp4',
+            'title': 'Gissy Talks: YouTube Alternatives',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'md5:e65036f0d4af80e0af191bd11af5195e',
+            'uploader': 'GissyEva',
+            'duration': 630.451,
+        }
+    }, {
+        'url': 'https://wevidi.net/watch/4m1c4yJR_yc',
+        'md5': 'c63ce5ca6990dce86855fc02ca5bc1ed',
+        'info_dict': {
+            'id': '4m1c4yJR_yc',
+            'ext': 'mp4',
+            'title': 'Enough of that! - Awesome Exilez Podcast',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'md5:96af99dd63468b2dfab3020560e3e9b2',
+            'uploader': 'eclecRC',
+            'duration': 6.804,
+        }
+    }]
+
+    def _extract_formats(self, wvplayer_props):
+        # Taken from WeVidi player JS: https://wevidi.net/layouts/default/static/player.min.js
+        resolution_map = {
+            1: 144,
+            2: 240,
+            3: 360,
+            4: 480,
+            5: 720,
+            6: 1080
+        }
+
+        src_path = f'{wvplayer_props["srcVID"]}/{wvplayer_props["srcUID"]}/{wvplayer_props["srcNAME"]}'
+        for res in traverse_obj(wvplayer_props, ('resolutions', ..., {int}, {lambda x: x or None})):
+            format_id = str(-(res // -2) - 1)
+            yield {
+                'acodec': 'mp4a.40.2',
+                'ext': 'mp4',
+                'format_id': format_id,
+                'height': resolution_map.get(res),
+                'url': f'https://www.wevidi.net/videoplayback/{src_path}/{format_id}',
+                'vcodec': 'avc1.42E01E',
+            }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        wvplayer_props = self._search_json(
+            r'WVPlayer\(', webpage, 'player', video_id,
+            transform_source=lambda x: js_to_json(x.replace('||', '}')))
+
+        return {
+            'id': video_id,
+            'title': clean_html(get_element_by_class('video_title', webpage)),
+            'description': clean_html(get_element_by_class('descr_long', webpage)),
+            'uploader': clean_html(get_element_by_class('username', webpage)),
+            'formats': list(self._extract_formats(wvplayer_props)),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'duration': float_or_none(wvplayer_props.get('duration')),
+        }

From 80b732b7a9585b2a61e456dc0d2d014a439cbaee Mon Sep 17 00:00:00 2001
From: JC-Chung <52159296+JC-Chung@users.noreply.github.com>
Date: Sun, 23 Apr 2023 07:25:04 +0800
Subject: [PATCH 70/97] [extractor/twitch] Extract original size thumbnail
 (#6629)

Authored by: JC-Chung
---
 yt_dlp/extractor/twitch.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py
index 6321297bb..9b333f6f6 100644
--- a/yt_dlp/extractor/twitch.py
+++ b/yt_dlp/extractor/twitch.py
@@ -179,6 +179,14 @@ def _download_access_token(self, video_id, token_kind, param_name):
             video_id, ops,
             'Downloading %s access token GraphQL' % token_kind)['data'][method]
 
+    def _get_thumbnails(self, thumbnail):
+        return [{
+            'url': re.sub(r'\d+x\d+(\.\w+)($|(?=[?#]))', r'0x0\g<1>', thumbnail),
+            'preference': 1,
+        }, {
+            'url': thumbnail,
+        }] if thumbnail else None
+
 
 class TwitchVodIE(TwitchBaseIE):
     IE_NAME = 'twitch:vod'
@@ -460,15 +468,13 @@ def _extract_info_gql(self, info, item_id):
                 is_live, thumbnail = True, None
             else:
                 is_live = False
-                for p in ('width', 'height'):
-                    thumbnail = thumbnail.replace('{%s}' % p, '0')
 
         return {
             'id': vod_id,
             'title': info.get('title') or 'Untitled Broadcast',
             'description': info.get('description'),
             'duration': int_or_none(info.get('lengthSeconds')),
-            'thumbnail': thumbnail,
+            'thumbnails': self._get_thumbnails(thumbnail),
             'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
             'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
             'timestamp': unified_timestamp(info.get('publishedAt')),
@@ -1053,7 +1059,7 @@ def _real_extract(self, url):
             'display_id': channel_name,
             'title': title,
             'description': description,
-            'thumbnail': thumbnail,
+            'thumbnails': self._get_thumbnails(thumbnail),
             'uploader': uploader,
             'uploader_id': channel_name,
             'timestamp': timestamp,

From 78fde6e3398ff11e5d383a66b28664badeab5180 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 24 Apr 2023 17:21:20 +0530
Subject: [PATCH 71/97] [outtmpl] Allow `\n` in replacements and default.

Fixes: https://github.com/yt-dlp/yt-dlp/issues/6808#issuecomment-1510055357
Fixes: https://github.com/yt-dlp/yt-dlp/issues/6808#issuecomment-1510363645
---
 test/test_YoutubeDL.py | 1 +
 yt_dlp/YoutubeDL.py    | 2 +-
 yt_dlp/options.py      | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 8da1e5e4b..49ae9e2b1 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -822,6 +822,7 @@ def expect_same_infodict(out):
         test('%(title&foo|baz)s.bar', 'baz.bar')
         test('%(x,id&foo|baz)s.bar', 'foo.bar')
         test('%(x,title&foo|baz)s.bar', 'baz.bar')
+        test('%(title&\n|)s', '\n')
 
         # Laziness
         def gen():
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 31f7645dc..61c149e47 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1156,7 +1156,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
         }
         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
-        INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
+        INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
             (?P<negate>-)?
             (?P<fields>{FIELD_RE})
             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index 84aeda7f1..d334a9caa 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -243,7 +243,7 @@ def _dict_from_options_callback(
         if multiple_keys:
             allowed_keys = fr'({allowed_keys})(,({allowed_keys}))*'
         mobj = re.match(
-            fr'(?i)(?P<keys>{allowed_keys}){delimiter}(?P<val>.*)$',
+            fr'(?is)(?P<keys>{allowed_keys}){delimiter}(?P<val>.*)$',
             value[0] if multiple_args else value)
         if mobj is not None:
             keys, val = mobj.group('keys').split(','), mobj.group('val')

From ec9311c41b111110bc52cfbd6ea682c6fb23f77a Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 24 Apr 2023 18:31:36 +0530
Subject: [PATCH 72/97] [outtmpl] Support `str.format` syntax inside
 replacements

Closes #6843
---
 README.md              |  2 +-
 test/test_YoutubeDL.py |  5 ++++-
 yt_dlp/YoutubeDL.py    | 18 ++++++++++++++++--
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 35229f728..efb490ab1 100644
--- a/README.md
+++ b/README.md
@@ -1246,7 +1246,7 @@ # OUTPUT TEMPLATE
 
 1. **Alternatives**: Alternate fields can be specified separated with a `,`. E.g. `%(release_date>%Y,upload_date>%Y|Unknown)s`
 
-1. **Replacement**: A replacement value can be specified using a `&` separator. If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty.
+1. **Replacement**: A replacement value can be specified using a `&` separator according to the [`str.format` mini-language](https://docs.python.org/3/library/string.html#format-specification-mini-language). If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty. E.g. `%(chapters&has chapters|no chapters)s`, `%(title&TITLE={:>20}|NO TITLE)s`
 
 1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-placeholder`. E.g. `%(uploader|Unknown)s`
 
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 49ae9e2b1..3c26bd7c6 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -822,7 +822,10 @@ def expect_same_infodict(out):
         test('%(title&foo|baz)s.bar', 'baz.bar')
         test('%(x,id&foo|baz)s.bar', 'foo.bar')
         test('%(x,title&foo|baz)s.bar', 'baz.bar')
-        test('%(title&\n|)s', '\n')
+        test('%(id&a\nb|)s', ('a\nb', 'a b'))
+        test('%(id&hi {:>10} {}|)s', 'hi       1234 1234')
+        test(R'%(id&{0} {}|)s', 'NA')
+        test(R'%(id&{0.1}|)s', 'NA')
 
         # Laziness
         def gen():
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 61c149e47..dce6cf928 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -21,7 +21,7 @@
 import traceback
 import unicodedata
 import urllib.request
-from string import ascii_letters
+from string import Formatter, ascii_letters
 
 from .cache import Cache
 from .compat import compat_os_name, compat_shlex_quote
@@ -1237,6 +1237,14 @@ def _dumpjson_default(obj):
                 return list(obj)
             return repr(obj)
 
+        class _ReplacementFormatter(Formatter):
+            def get_field(self, field_name, args, kwargs):
+                if field_name.isdigit():
+                    return args[0], -1
+                raise ValueError('Unsupported field')
+
+        replacement_formatter = _ReplacementFormatter()
+
         def create_key(outer_mobj):
             if not outer_mobj.group('has_key'):
                 return outer_mobj.group(0)
@@ -1258,7 +1266,13 @@ def create_key(outer_mobj):
             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
                 fmt = f'0{field_size_compat_map[key]:d}d'
 
-            value = default if value is None else value if replacement is None else replacement
+            if value is None:
+                value = default
+            elif replacement is not None:
+                try:
+                    value = replacement_formatter.format(replacement, value)
+                except ValueError:
+                    value = na
 
             flags = outer_mobj.group('conversion') or ''
             str_fmt = f'{fmt[:-1]}s'

From d669772c65e8630162fd6555d0a578b246591921 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 24 Apr 2023 18:52:09 +0530
Subject: [PATCH 73/97] Add `--no-quiet`

Closes #6796
---
 README.md          | 1 +
 yt_dlp/__init__.py | 3 ++-
 yt_dlp/options.py  | 6 +++++-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index efb490ab1..ef0c236b5 100644
--- a/README.md
+++ b/README.md
@@ -752,6 +752,7 @@ ## Internet Shortcut Options:
 ## Verbosity and Simulation Options:
     -q, --quiet                     Activate quiet mode. If used with --verbose,
                                     print the log to stderr
+    --no-quiet                      Deactivate quiet mode. (Default)
     --no-warnings                   Ignore warnings
     -s, --simulate                  Do not download the video and do not write
                                     anything to disk
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index bdac1212c..79b9a7679 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -704,7 +704,8 @@ def parse_options(argv=None):
         'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename',
         'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
     ))
-    opts.quiet = opts.quiet or any_getting or opts.print_json or bool(opts.forceprint)
+    if opts.quiet is None:
+        opts.quiet = any_getting or opts.print_json or bool(opts.forceprint)
 
     playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist']
     write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index d334a9caa..a2f508552 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -1079,8 +1079,12 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
     verbosity = optparse.OptionGroup(parser, 'Verbosity and Simulation Options')
     verbosity.add_option(
         '-q', '--quiet',
-        action='store_true', dest='quiet', default=False,
+        action='store_true', dest='quiet', default=None,
         help='Activate quiet mode. If used with --verbose, print the log to stderr')
+    verbosity.add_option(
+        '--no-quiet',
+        action='store_false', dest='quiet',
+        help='Deactivate quiet mode. (Default)')
     verbosity.add_option(
         '--no-warnings',
         dest='no_warnings', action='store_true', default=False,

From 04f8018a0544736a18494bc3899d06b05b78fae6 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 24 Apr 2023 18:59:07 +0530
Subject: [PATCH 74/97] [extractor/hentaistigma] Remove extractor

Piracy site

Closes #6870
---
 yt_dlp/extractor/_extractors.py  |  1 -
 yt_dlp/extractor/hentaistigma.py | 37 --------------------------------
 2 files changed, 38 deletions(-)
 delete mode 100644 yt_dlp/extractor/hentaistigma.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index a81682e43..750708d77 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -721,7 +721,6 @@
 from .heise import HeiseIE
 from .hellporno import HellPornoIE
 from .helsinki import HelsinkiIE
-from .hentaistigma import HentaiStigmaIE
 from .hgtv import HGTVComShowIE
 from .hketv import HKETVIE
 from .hidive import HiDiveIE
diff --git a/yt_dlp/extractor/hentaistigma.py b/yt_dlp/extractor/hentaistigma.py
deleted file mode 100644
index ca5ffc2ae..000000000
--- a/yt_dlp/extractor/hentaistigma.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from .common import InfoExtractor
-
-
-class HentaiStigmaIE(InfoExtractor):
-    _VALID_URL = r'^https?://hentai\.animestigma\.com/(?P<id>[^/]+)'
-    _TEST = {
-        'url': 'http://hentai.animestigma.com/inyouchuu-etsu-bonus/',
-        'md5': '4e3d07422a68a4cc363d8f57c8bf0d23',
-        'info_dict': {
-            'id': 'inyouchuu-etsu-bonus',
-            'ext': 'mp4',
-            'title': 'Inyouchuu Etsu Bonus',
-            'age_limit': 18,
-        }
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        title = self._html_search_regex(
-            r'<h2[^>]+class="posttitle"[^>]*><a[^>]*>([^<]+)</a>',
-            webpage, 'title')
-        wrap_url = self._html_search_regex(
-            r'<iframe[^>]+src="([^"]+mp4)"', webpage, 'wrapper url')
-        wrap_webpage = self._download_webpage(wrap_url, video_id)
-
-        video_url = self._html_search_regex(
-            r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url')
-
-        return {
-            'id': video_id,
-            'url': video_url,
-            'title': title,
-            'age_limit': 18,
-        }

From c16644642b08e2bf4130a6c5fa01395d8718c990 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 24 Apr 2023 19:38:58 +0530
Subject: [PATCH 75/97] Add option `--xff`

Deprecates `--geo-bypass`, `--no-geo-bypass, `--geo-bypass-country`, `--geo-bypass-ip-block`
---
 README.md          | 18 +++++++++---------
 yt_dlp/__init__.py | 13 +++++++++----
 yt_dlp/options.py  | 25 +++++++++++++++----------
 3 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index ef0c236b5..47da19011 100644
--- a/README.md
+++ b/README.md
@@ -463,15 +463,11 @@ ## Geo-restriction:
                                     specified by --proxy (or none, if the option
                                     is not present) is used for the actual
                                     downloading
-    --geo-bypass                    Bypass geographic restriction via faking
-                                    X-Forwarded-For HTTP header (default)
-    --no-geo-bypass                 Do not bypass geographic restriction via
-                                    faking X-Forwarded-For HTTP header
-    --geo-bypass-country CODE       Force bypass geographic restriction with
-                                    explicitly provided two-letter ISO 3166-2
-                                    country code
-    --geo-bypass-ip-block IP_BLOCK  Force bypass geographic restriction with
-                                    explicitly provided IP block in CIDR notation
+    --xff VALUE                     How to fake X-Forwarded-For HTTP header to
+                                    try bypassing geographic restriction. One of
+                                    "default" (Only when known to be useful),
+                                    "never", a two-letter ISO 3166-2 country
+                                    code, or an IP block in CIDR notation
 
 ## Video Selection:
     -I, --playlist-items ITEM_SPEC  Comma separated playlist_index of the items
@@ -2168,6 +2164,10 @@ #### Not recommended
     --youtube-skip-hls-manifest      --extractor-args "youtube:skip=hls" (Alias: --no-youtube-include-hls-manifest)
     --youtube-include-dash-manifest  Default (Alias: --no-youtube-skip-dash-manifest)
     --youtube-include-hls-manifest   Default (Alias: --no-youtube-skip-hls-manifest)
+    --geo-bypass                     --xff "default"
+    --no-geo-bypass                  --xff "never"
+    --geo-bypass-country CODE        --xff CODE
+    --geo-bypass-ip-block IP_BLOCK   --xff IP_BLOCK
 
 
 #### Developer options
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index 79b9a7679..47ee3cc02 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -396,12 +396,17 @@ def metadataparser_actions(f):
         except Exception as err:
             raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}')
 
-    geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country
-    if geo_bypass_code is not None:
+    opts.geo_bypass_country, opts.geo_bypass_ip_block = None, None
+    if opts.geo_bypass.lower() not in ('default', 'never'):
         try:
-            GeoUtils.random_ipv4(geo_bypass_code)
+            GeoUtils.random_ipv4(opts.geo_bypass)
         except Exception:
-            raise ValueError('unsupported geo-bypass country or ip-block')
+            raise ValueError(f'Unsupported --xff "{opts.geo_bypass}"')
+        if len(opts.geo_bypass) == 2:
+            opts.geo_bypass_country = opts.geo_bypass
+        else:
+            opts.geo_bypass_ip_block = opts.geo_bypass
+    opts.geo_bypass = opts.geo_bypass.lower() != 'never'
 
     opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter)
 
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index a2f508552..362a648cd 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -519,22 +519,27 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
         '--cn-verification-proxy',
         dest='cn_verification_proxy', default=None, metavar='URL',
         help=optparse.SUPPRESS_HELP)
+    geo.add_option(
+        '--xff', metavar='VALUE',
+        dest='geo_bypass', default="default",
+        help=(
+            'How to fake X-Forwarded-For HTTP header to try bypassing geographic restriction. '
+            'One of "default" (Only when known to be useful), "never", '
+            'a two-letter ISO 3166-2 country code, or an IP block in CIDR notation'))
     geo.add_option(
         '--geo-bypass',
-        action='store_true', dest='geo_bypass', default=True,
-        help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (default)')
+        action='store_const', dest='geo_bypass', const='default',
+        help=optparse.SUPPRESS_HELP)
     geo.add_option(
         '--no-geo-bypass',
-        action='store_false', dest='geo_bypass',
-        help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header')
+        action='store_const', dest='geo_bypass', const='never',
+        help=optparse.SUPPRESS_HELP)
     geo.add_option(
-        '--geo-bypass-country', metavar='CODE',
-        dest='geo_bypass_country', default=None,
-        help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code')
+        '--geo-bypass-country', metavar='CODE', dest='geo_bypass',
+        help=optparse.SUPPRESS_HELP)
     geo.add_option(
-        '--geo-bypass-ip-block', metavar='IP_BLOCK',
-        dest='geo_bypass_ip_block', default=None,
-        help='Force bypass geographic restriction with explicitly provided IP block in CIDR notation')
+        '--geo-bypass-ip-block', metavar='IP_BLOCK', dest='geo_bypass',
+        help=optparse.SUPPRESS_HELP)
 
     selection = optparse.OptionGroup(parser, 'Video Selection')
     selection.add_option(

From 21b5ec86c2c37d10c5bb97edd7051d3aac16bb3e Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Mon, 24 Apr 2023 19:56:35 +0200
Subject: [PATCH 76/97] [utils] `traverse_obj`: Allow iterables in traversal
 (#6902)

Authored by: Grub4K
---
 test/test_utils.py | 4 ++++
 yt_dlp/utils.py    | 7 +++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index d4a301583..f2f3b8170 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -2016,6 +2016,8 @@ def test_traverse_obj(self):
                          msg='nested `...` queries should work')
         self.assertCountEqual(traverse_obj(_TEST_DATA, (..., ..., 'index')), range(4),
                               msg='`...` query result should be flattened')
+        self.assertEqual(traverse_obj(range(4), ...), list(range(4)),
+                         msg='`...` should accept iterables')
 
         # Test function as key
         self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
@@ -2023,6 +2025,8 @@ def test_traverse_obj(self):
                          msg='function as query key should perform a filter based on (key, value)')
         self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'},
                               msg='exceptions in the query function should be catched')
+        self.assertEqual(traverse_obj(range(4), lambda _, x: x % 2 == 0), [0, 2],
+                         msg='function key should accept iterables')
         if __debug__:
             with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
                 traverse_obj(_TEST_DATA, lambda a: ...)
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 746a2885d..f69311462 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -5528,7 +5528,6 @@ def traverse_obj(
                             If no `default` is given and the last path branches, a `list` of results
                             is always returned. If a path ends on a `dict` that result will always be a `dict`.
     """
-    is_sequence = lambda x: isinstance(x, collections.abc.Sequence) and not isinstance(x, (str, bytes))
     casefold = lambda k: k.casefold() if isinstance(k, str) else k
 
     if isinstance(expected_type, type):
@@ -5564,7 +5563,7 @@ def apply_key(key, obj, is_last):
             branching = True
             if isinstance(obj, collections.abc.Mapping):
                 result = obj.values()
-            elif is_sequence(obj):
+            elif isinstance(obj, collections.abc.Iterable) and not isinstance(obj, (str, bytes)):
                 result = obj
             elif isinstance(obj, re.Match):
                 result = obj.groups()
@@ -5578,7 +5577,7 @@ def apply_key(key, obj, is_last):
             branching = True
             if isinstance(obj, collections.abc.Mapping):
                 iter_obj = obj.items()
-            elif is_sequence(obj):
+            elif isinstance(obj, collections.abc.Iterable) and not isinstance(obj, (str, bytes)):
                 iter_obj = enumerate(obj)
             elif isinstance(obj, re.Match):
                 iter_obj = itertools.chain(
@@ -5614,7 +5613,7 @@ def apply_key(key, obj, is_last):
                 result = next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
 
         elif isinstance(key, (int, slice)):
-            if is_sequence(obj):
+            if isinstance(obj, collections.abc.Sequence) and not isinstance(obj, (str, bytes)):
                 branching = isinstance(key, slice)
                 with contextlib.suppress(IndexError):
                     result = obj[key]

From 9b30cd3dfce83c2f0201b28a7a3ef44ab9722664 Mon Sep 17 00:00:00 2001
From: Elyse <26639800+elyse0@users.noreply.github.com>
Date: Mon, 24 Apr 2023 13:16:22 -0600
Subject: [PATCH 77/97] [extractors/rtvc] Add extractors (#6578)

* Add `RTVCPlay` extractor
* Add `RTVCPlayEmbed` extractor
* Add `RTVCKaltura` extractor
* Add `SenalColombiaLive` extractor

Closes #6457
Authored by: elyse0
---
 yt_dlp/extractor/_extractors.py   |   6 +
 yt_dlp/extractor/rtvcplay.py      | 285 ++++++++++++++++++++++++++++++
 yt_dlp/extractor/senalcolombia.py |  31 ++++
 3 files changed, 322 insertions(+)
 create mode 100644 yt_dlp/extractor/rtvcplay.py
 create mode 100644 yt_dlp/extractor/senalcolombia.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 750708d77..b82f52bca 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1621,6 +1621,11 @@
 from .rtp import RTPIE
 from .rtrfm import RTRFMIE
 from .rts import RTSIE
+from .rtvcplay import (
+    RTVCPlayIE,
+    RTVCPlayEmbedIE,
+    RTVCKalturaIE,
+)
 from .rtve import (
     RTVEALaCartaIE,
     RTVEAudioIE,
@@ -1690,6 +1695,7 @@
 )
 from .scrolller import ScrolllerIE
 from .seeker import SeekerIE
+from .senalcolombia import SenalColombiaLiveIE
 from .senategov import SenateISVPIE, SenateGovIE
 from .sendtonews import SendtoNewsIE
 from .servus import ServusIE
diff --git a/yt_dlp/extractor/rtvcplay.py b/yt_dlp/extractor/rtvcplay.py
new file mode 100644
index 000000000..741c47262
--- /dev/null
+++ b/yt_dlp/extractor/rtvcplay.py
@@ -0,0 +1,285 @@
+import re
+
+from .common import InfoExtractor, ExtractorError
+from ..utils import (
+    clean_html,
+    determine_ext,
+    int_or_none,
+    float_or_none,
+    js_to_json,
+    mimetype2ext,
+    traverse_obj,
+    urljoin,
+    url_or_none,
+)
+
+
+class RTVCPlayBaseIE(InfoExtractor):
+    _BASE_VALID_URL = r'https?://(?:www\.)?rtvcplay\.co'
+
+    def _extract_player_config(self, webpage, video_id):
+        return self._search_json(
+            r'<script\b[^>]*>[^<]*(?:var|let|const)\s+config\s*=', re.sub(r'"\s*\+\s*"', '', webpage),
+            'player_config', video_id, transform_source=js_to_json)
+
+    def _extract_formats_and_subtitles_player_config(self, player_config, video_id):
+        formats, subtitles = [], {}
+        for source in traverse_obj(player_config, ('sources', ..., lambda _, v: url_or_none(v['url']))):
+            ext = mimetype2ext(source.get('mimetype'), default=determine_ext(source['url']))
+            if ext == 'm3u8':
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                    source['url'], video_id, 'mp4', fatal=False)
+                formats.extend(fmts)
+                self._merge_subtitles(subs, target=subtitles)
+            else:
+                formats.append({
+                    'url': source['url'],
+                    'ext': ext,
+                })
+
+        return formats, subtitles
+
+
+class RTVCPlayIE(RTVCPlayBaseIE):
+    _VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/(?P<category>(?!embed)[^/]+)/(?:[^?#]+/)?(?P<id>[\w-]+)'
+
+    _TESTS = [{
+        'url': 'https://www.rtvcplay.co/en-vivo/canal-institucional',
+        'info_dict': {
+            'id': 'canal-institucional',
+            'title': r're:^Canal Institucional',
+            'description': 'md5:eff9e548394175928059320c006031ea',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+            'live_status': 'is_live',
+            'ext': 'mp4',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }, {
+        'url': 'https://www.rtvcplay.co/en-vivo/senal-colombia',
+        'info_dict': {
+            'id': 'senal-colombia',
+            'title': r're:^Señal Colombia',
+            'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+            'live_status': 'is_live',
+            'ext': 'mp4',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }, {
+        'url': 'https://www.rtvcplay.co/en-vivo/radio-nacional',
+        'info_dict': {
+            'id': 'radio-nacional',
+            'title': r're:^Radio Nacional',
+            'description': 'md5:5de009bc6a9fa79d2a6cf0b73f977d53',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+            'live_status': 'is_live',
+            'ext': 'mp4',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }, {
+        'url': 'https://www.rtvcplay.co/peliculas-ficcion/senoritas',
+        'md5': '1288ee6f6d1330d880f98bff2ed710a3',
+        'info_dict': {
+            'id': 'senoritas',
+            'title': 'Señoritas',
+            'description': 'md5:f095a2bb52cb6cf279daf6302f86fb32',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+            'ext': 'mp4',
+        },
+    }, {
+        'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa/james-regresa-clases-28022022',
+        'md5': 'f040a7380a269ad633cf837384d5e9fc',
+        'info_dict': {
+            'id': 'james-regresa-clases-28022022',
+            'title': 'James regresa a clases - 28/02/2022',
+            'description': 'md5:c5dcdf757c7ab29305e8763c6007e675',
+            'ext': 'mp4',
+        },
+    }, {
+        'url': 'https://www.rtvcplay.co/peliculas-documentales/llinas-el-cerebro-y-el-universo',
+        'info_dict': {
+            'id': 'llinas-el-cerebro-y-el-universo',
+            'title': 'Llinás, el cerebro y el universo',
+            'description': 'md5:add875bf2309bb52b3e8b9b06116d9b0',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+        },
+        'playlist_mincount': 3,
+    }, {
+        'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa',
+        'info_dict': {
+            'id': 'profe-en-tu-casa',
+            'title': 'Profe en tu casa',
+            'description': 'md5:47dbe20e263194413b1db2a2805a4f2e',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+        },
+        'playlist_mincount': 537,
+    }, {
+        'url': 'https://www.rtvcplay.co/series-al-oido/relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura',
+        'info_dict': {
+            'id': 'relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura',
+            'title': 'Relato de un náufrago: una travesía del periodismo a la literatura',
+            'description': 'md5:6da28fdca4a5a568ea47ef65ef775603',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+        },
+        'playlist_mincount': 5,
+    }, {
+        'url': 'https://www.rtvcplay.co/series-al-oido/diez-versiones',
+        'info_dict': {
+            'id': 'diez-versiones',
+            'title': 'Diez versiones',
+            'description': 'md5:997471ed971cb3fd8e41969457675306',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+        },
+        'playlist_mincount': 20,
+    }]
+
+    def _real_extract(self, url):
+        video_id, category = self._match_valid_url(url).group('id', 'category')
+        webpage = self._download_webpage(url, video_id)
+
+        hydration = self._search_json(
+            r'window\.__RTVCPLAY_STATE__\s*=', webpage, 'hydration',
+            video_id, transform_source=js_to_json)['content']['currentContent']
+
+        asset_id = traverse_obj(hydration, ('video', 'assetid'))
+        if asset_id:
+            hls_url = hydration['base_url_hls'].replace('[node:field_asset_id]', asset_id)
+        else:
+            hls_url = traverse_obj(hydration, ('channel', 'hls'))
+
+        metadata = traverse_obj(hydration, {
+            'title': 'title',
+            'description': 'description',
+            'thumbnail': ((('channel', 'image', 'logo'), ('resource', 'image', 'cover_desktop')), 'path'),
+        }, get_all=False)
+
+        # Probably it's a program's page
+        if not hls_url:
+            seasons = traverse_obj(
+                hydration, ('widgets', lambda _, y: y['type'] == 'seasonList', 'contents'),
+                get_all=False)
+            if not seasons:
+                podcast_episodes = hydration.get('audios')
+                if not podcast_episodes:
+                    raise ExtractorError('Could not find asset_id nor program playlist nor podcast episodes')
+
+                return self.playlist_result([
+                    self.url_result(episode['file'], url_transparent=True, **traverse_obj(episode, {
+                        'title': 'title',
+                        'description': ('description', {clean_html}),
+                        'episode_number': ('chapter_number', {float_or_none}, {int_or_none}),
+                        'season_number': ('season', {int_or_none}),
+                    })) for episode in podcast_episodes], video_id, **metadata)
+
+            entries = [self.url_result(
+                urljoin(url, episode['slug']), url_transparent=True,
+                **traverse_obj(season, {
+                    'season': 'title',
+                    'season_number': ('season', {int_or_none}),
+                }), **traverse_obj(episode, {
+                    'title': 'title',
+                    'thumbnail': ('image', 'cover', 'path'),
+                    'episode_number': ('chapter_number', {int_or_none}),
+                })) for season in seasons for episode in traverse_obj(season, ('contents', ...))]
+
+            return self.playlist_result(entries, video_id, **metadata)
+
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls_url, video_id, 'mp4')
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            'is_live': category == 'en-vivo',
+            **metadata,
+        }
+
+
+class RTVCPlayEmbedIE(RTVCPlayBaseIE):
+    _VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/embed/(?P<id>[\w-]+)'
+
+    _TESTS = [{
+        'url': 'https://www.rtvcplay.co/embed/72b0e699-248b-4929-a4a8-3782702fa7f9',
+        'md5': 'ed529aeaee7aa2a72afe91ac7d1177a8',
+        'info_dict': {
+            'id': '72b0e699-248b-4929-a4a8-3782702fa7f9',
+            'title': 'Tráiler: Señoritas',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+            'ext': 'mp4',
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        player_config = self._extract_player_config(webpage, video_id)
+        formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id)
+
+        asset_id = traverse_obj(player_config, ('rtvcplay', 'assetid'))
+        metadata = {} if not asset_id else self._download_json(
+            f'https://cms.rtvcplay.co/api/v1/video/asset-id/{asset_id}', video_id, fatal=False)
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            **traverse_obj(metadata, {
+                'title': 'title',
+                'description': 'description',
+                'thumbnail': ('image', ..., 'thumbnail', 'path'),
+            }, get_all=False)
+        }
+
+
+class RTVCKalturaIE(RTVCPlayBaseIE):
+    _VALID_URL = r'https?://media\.rtvc\.gov\.co/kalturartvc/(?P<id>[\w-]+)'
+
+    _TESTS = [{
+        'url': 'https://media.rtvc.gov.co/kalturartvc/indexSC.html',
+        'info_dict': {
+            'id': 'indexSC',
+            'title': r're:^Señal Colombia',
+            'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+            'live_status': 'is_live',
+            'ext': 'mp4',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        player_config = self._extract_player_config(webpage, video_id)
+        formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id)
+
+        channel_id = traverse_obj(player_config, ('rtvcplay', 'channelId'))
+        metadata = {} if not channel_id else self._download_json(
+            f'https://cms.rtvcplay.co/api/v1/taxonomy_term/streaming/{channel_id}', video_id, fatal=False)
+
+        fmts, subs = self._extract_m3u8_formats_and_subtitles(
+            traverse_obj(metadata, ('channel', 'hls')), video_id, 'mp4', fatal=False)
+        formats.extend(fmts)
+        self._merge_subtitles(subs, target=subtitles)
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            'is_live': True,
+            **traverse_obj(metadata, {
+                'title': 'title',
+                'description': 'description',
+                'thumbnail': ('channel', 'image', 'logo', 'path'),
+            })
+        }
diff --git a/yt_dlp/extractor/senalcolombia.py b/yt_dlp/extractor/senalcolombia.py
new file mode 100644
index 000000000..f3c066da7
--- /dev/null
+++ b/yt_dlp/extractor/senalcolombia.py
@@ -0,0 +1,31 @@
+from .common import InfoExtractor
+from .rtvcplay import RTVCKalturaIE
+
+
+class SenalColombiaLiveIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?senalcolombia\.tv/(?P<id>senal-en-vivo)'
+
+    _TESTS = [{
+        'url': 'https://www.senalcolombia.tv/senal-en-vivo',
+        'info_dict': {
+            'id': 'indexSC',
+            'title': 're:^Señal Colombia',
+            'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+            'live_status': 'is_live',
+            'ext': 'mp4',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        hydration = self._search_json(
+            r'<script\b[^>]*data-drupal-selector\s*=\s*"[^"]*drupal-settings-json[^"]*"[^>]*>',
+            webpage, 'hydration', display_id)
+
+        return self.url_result(hydration['envivosrc'], RTVCKalturaIE, display_id)

From c86e433c35fe5da6cb29f3539eef97497f84ed38 Mon Sep 17 00:00:00 2001
From: sqrtNOT <77981959+sqrtNOT@users.noreply.github.com>
Date: Tue, 25 Apr 2023 10:21:06 +0000
Subject: [PATCH 78/97] [extractor/NiconicoSeries] Fix extraction (#6898)

Authored by: sqrtNOT
---
 yt_dlp/extractor/niconico.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index cacefeb42..30b4d7216 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -660,10 +660,10 @@ def _real_extract(self, url):
 
 class NiconicoSeriesIE(InfoExtractor):
     IE_NAME = 'niconico:series'
-    _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/series/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp(?:/user/\d+)?|nico\.ms)/series/(?P<id>\d+)'
 
     _TESTS = [{
-        'url': 'https://www.nicovideo.jp/series/110226',
+        'url': 'https://www.nicovideo.jp/user/44113208/series/110226',
         'info_dict': {
             'id': '110226',
             'title': 'ご立派ァ！のシリーズ',
@@ -683,7 +683,7 @@ class NiconicoSeriesIE(InfoExtractor):
 
     def _real_extract(self, url):
         list_id = self._match_id(url)
-        webpage = self._download_webpage(f'https://www.nicovideo.jp/series/{list_id}', list_id)
+        webpage = self._download_webpage(url, list_id)
 
         title = self._search_regex(
             (r'<title>「(.+)（全',
@@ -691,10 +691,9 @@ def _real_extract(self, url):
             webpage, 'title', fatal=False)
         if title:
             title = unescapeHTML(title)
-        playlist = [
-            self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id)
-            for v_id in re.findall(r'data-href=[\'"](?:https://www\.nicovideo\.jp)?/watch/([a-z0-9]+)', webpage)]
-        return self.playlist_result(playlist, list_id, title)
+        json_data = next(self._yield_json_ld(webpage, None, fatal=False))
+        return self.playlist_from_matches(
+            traverse_obj(json_data, ('itemListElement', ..., 'url')), list_id, title, ie=NiconicoIE)
 
 
 class NiconicoHistoryIE(NiconicoPlaylistBaseIE):

From 0c4e0fbcade0fc92d14c2a6d63e360fe067f6192 Mon Sep 17 00:00:00 2001
From: Neurognostic <donovan@tremura.email>
Date: Tue, 25 Apr 2023 12:13:54 -0400
Subject: [PATCH 79/97] [extractor/bitchute] Add more fallback subdomains
 (#6907)

Authored by: Neurognostic
---
 yt_dlp/extractor/bitchute.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py
index 10e7b0b2b..a6779505e 100644
--- a/yt_dlp/extractor/bitchute.py
+++ b/yt_dlp/extractor/bitchute.py
@@ -77,7 +77,10 @@ class BitChuteIE(InfoExtractor):
     def _check_format(self, video_url, video_id):
         urls = orderedSet(
             re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url)
-            for host in (r'\g<2>', 'seed150', 'seed151', 'seed152', 'seed153'))
+            for host in (r'\g<2>', 'seed122', 'seed125', 'seed126', 'seed128',
+                         'seed132', 'seed150', 'seed151', 'seed152', 'seed153',
+                         'seed167', 'seed171', 'seed177', 'seed305', 'seed307',
+                         'seedp29xb', 'zb10-7gsop1v78'))
         for url in urls:
             try:
                 response = self._request_webpage(

From 62beefa818c75c20b6941389bb197051554a5d41 Mon Sep 17 00:00:00 2001
From: Noah <nkempers@outlook.de>
Date: Tue, 25 Apr 2023 22:46:14 +0200
Subject: [PATCH 80/97] [extractor/pornhub] Set access cookies to fix
 extraction (#6685)

Closes #4299
Authored by: Schmoaaaaah, arobase-che

Co-authored-by: Noah <nkempers@outlook.de>
Co-authored-by: ache <ache@ache.one>
---
 yt_dlp/extractor/pornhub.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py
index 5d8d7c100..2f5a572a5 100644
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -58,6 +58,11 @@ def dl(*args, **kwargs):
     def _real_initialize(self):
         self._logged_in = False
 
+    def _set_age_cookies(self, host):
+        self._set_cookie(host, 'age_verified', '1')
+        self._set_cookie(host, 'accessAgeDisclaimerPH', '1')
+        self._set_cookie(host, 'accessPH', '1')
+
     def _login(self, host):
         if self._logged_in:
             return
@@ -267,8 +272,7 @@ def _real_extract(self, url):
         video_id = mobj.group('id')
 
         self._login(host)
-
-        self._set_cookie(host, 'age_verified', '1')
+        self._set_age_cookies(host)
 
         def dl_webpage(platform):
             self._set_cookie(host, 'platform', platform)
@@ -569,6 +573,7 @@ def _real_extract(self, url):
         mobj = self._match_valid_url(url)
         user_id = mobj.group('id')
         videos_url = '%s/videos' % mobj.group('url')
+        self._set_age_cookies(mobj.group('host'))
         page = self._extract_page(url)
         if page:
             videos_url = update_url_query(videos_url, {'page': page})
@@ -633,6 +638,7 @@ def _real_extract(self, url):
         item_id = mobj.group('id')
 
         self._login(host)
+        self._set_age_cookies(host)
 
         return self.playlist_result(self._entries(url, host, item_id), item_id)
 
@@ -812,5 +818,6 @@ def _real_extract(self, url):
         item_id = mobj.group('id')
 
         self._login(host)
+        self._set_age_cookies(host)
 
         return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id)

From ed81b74802b4247ee8d9dc0ef87eb52baefede1c Mon Sep 17 00:00:00 2001
From: Alex Klapheke <alexklapheke@gmail.com>
Date: Wed, 26 Apr 2023 02:53:07 -0400
Subject: [PATCH 81/97] [extractor/aeonco] Support Youtube embeds (#6591)

Authored by: alexklapheke
---
 yt_dlp/extractor/aeonco.py | 52 +++++++++++++++++++++++++++++++-------
 1 file changed, 43 insertions(+), 9 deletions(-)

diff --git a/yt_dlp/extractor/aeonco.py b/yt_dlp/extractor/aeonco.py
index 4655862e3..390eae32b 100644
--- a/yt_dlp/extractor/aeonco.py
+++ b/yt_dlp/extractor/aeonco.py
@@ -1,5 +1,6 @@
 from .common import InfoExtractor
 from .vimeo import VimeoIE
+from ..utils import ExtractorError, traverse_obj, url_or_none
 
 
 class AeonCoIE(InfoExtractor):
@@ -19,22 +20,55 @@ class AeonCoIE(InfoExtractor):
         }
     }, {
         'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
-        'md5': '4e5f3dad9dbda0dbfa2da41a851e631e',
+        'md5': '03582d795382e49f2fd0b427b55de409',
         'info_dict': {
-            'id': '728595228',
+            'id': '759576926',
             'ext': 'mp4',
             'title': 'Wrought',
-            'thumbnail': 'https://i.vimeocdn.com/video/1484618528-c91452611f9a4e4497735a533da60d45b2fe472deb0c880f0afaab0cd2efb22a-d_1280',
-            'uploader': 'Biofilm Productions',
-            'uploader_id': 'user140352216',
-            'uploader_url': 'https://vimeo.com/user140352216',
+            'thumbnail': 'https://i.vimeocdn.com/video/1525599692-84614af88e446612f49ca966cf8f80eab2c73376bedd80555741c521c26f9a3e-d_1280',
+            'uploader': 'Aeon Video',
+            'uploader_id': 'aeonvideo',
+            'uploader_url': 'https://vimeo.com/aeonvideo',
             'duration': 1344
         }
+    }, {
+        'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out',
+        'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b',
+        'info_dict': {
+            'id': 'emyi4z-O0ls',
+            'ext': 'mp4',
+            'title': 'How to outsmart the Prisoner’s Dilemma - Lucas Husted',
+            'thumbnail': 'https://i.ytimg.com/vi_webp/emyi4z-O0ls/maxresdefault.webp',
+            'uploader': 'TED-Ed',
+            'uploader_id': '@TEDEd',
+            'uploader_url': 'https://www.youtube.com/@TEDEd',
+            'duration': 344,
+            'upload_date': '20200827',
+            'channel_id': 'UCsooa4yRKGN_zEE8iknghZA',
+            'playable_in_embed': True,
+            'description': 'md5:c0959524f08cb60f96fd010f3dfb17f3',
+            'categories': ['Education'],
+            'like_count': int,
+            'channel': 'TED-Ed',
+            'chapters': 'count:7',
+            'channel_url': 'https://www.youtube.com/channel/UCsooa4yRKGN_zEE8iknghZA',
+            'tags': 'count:26',
+            'availability': 'public',
+            'channel_follower_count': int,
+            'view_count': int,
+            'age_limit': 0,
+            'live_status': 'not_live',
+            'comment_count': int,
+        },
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        vimeo_id = self._search_regex(r'hosterId":\s*"(?P<id>[0-9]+)', webpage, 'vimeo id')
-        vimeo_url = VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{vimeo_id}', 'https://aeon.co')
-        return self.url_result(vimeo_url, VimeoIE)
+        embed_url = traverse_obj(self._yield_json_ld(webpage, video_id), (
+            lambda _, v: v['@type'] == 'VideoObject', 'embedUrl', {url_or_none}), get_all=False)
+        if not embed_url:
+            raise ExtractorError('No embed URL found in webpage')
+        if 'player.vimeo.com' in embed_url:
+            embed_url = VimeoIE._smuggle_referrer(embed_url, 'https://aeon.co/')
+        return self.url_result(embed_url)

From 30647668a92a0ca5cd108776804baac0996bd9f7 Mon Sep 17 00:00:00 2001
From: garret <76261416+garret1317@users.noreply.github.com>
Date: Thu, 27 Apr 2023 00:42:07 +0100
Subject: [PATCH 82/97] [extractor/globalplayer] Add extractors (#6903)

Authored by: garret1317
---
 yt_dlp/extractor/_extractors.py  |   7 +
 yt_dlp/extractor/globalplayer.py | 254 +++++++++++++++++++++++++++++++
 2 files changed, 261 insertions(+)
 create mode 100755 yt_dlp/extractor/globalplayer.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index b82f52bca..3b5ae63b1 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -685,6 +685,13 @@
 from .giantbomb import GiantBombIE
 from .giga import GigaIE
 from .glide import GlideIE
+from .globalplayer import (
+    GlobalPlayerLiveIE,
+    GlobalPlayerLivePlaylistIE,
+    GlobalPlayerAudioIE,
+    GlobalPlayerAudioEpisodeIE,
+    GlobalPlayerVideoIE
+)
 from .globo import (
     GloboIE,
     GloboArticleIE,
diff --git a/yt_dlp/extractor/globalplayer.py b/yt_dlp/extractor/globalplayer.py
new file mode 100755
index 000000000..e0c0d58fd
--- /dev/null
+++ b/yt_dlp/extractor/globalplayer.py
@@ -0,0 +1,254 @@
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    join_nonempty,
+    parse_duration,
+    str_or_none,
+    traverse_obj,
+    unified_strdate,
+    unified_timestamp,
+    urlhandle_detect_ext,
+)
+
+
+class GlobalPlayerBaseIE(InfoExtractor):
+    def _get_page_props(self, url, video_id):
+        webpage = self._download_webpage(url, video_id)
+        return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
+
+    def _request_ext(self, url, video_id):
+        return urlhandle_detect_ext(self._request_webpage(  # Server rejects HEAD requests
+            url, video_id, note='Determining source extension'))
+
+    def _extract_audio(self, episode, series):
+        return {
+            'vcodec': 'none',
+            **traverse_obj(series, {
+                'series': 'title',
+                'series_id': 'id',
+                'thumbnail': 'imageUrl',
+                'uploader': 'itunesAuthor',  # podcasts only
+            }),
+            **traverse_obj(episode, {
+                'id': 'id',
+                'description': ('description', {clean_html}),
+                'duration': ('duration', {parse_duration}),
+                'thumbnail': 'imageUrl',
+                'url': 'streamUrl',
+                'timestamp': (('pubDate', 'startDate'), {unified_timestamp}),
+                'title': 'title',
+            }, get_all=False)
+        }
+
+
+class GlobalPlayerLiveIE(GlobalPlayerBaseIE):
+    _VALID_URL = r'https?://www\.globalplayer\.com/live/(?P<id>\w+)/\w+'
+    _TESTS = [{
+        'url': 'https://www.globalplayer.com/live/smoothchill/uk/',
+        'info_dict': {
+            'id': '2mx1E',
+            'ext': 'aac',
+            'display_id': 'smoothchill-uk',
+            'title': 're:^Smooth Chill.+$',
+            'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png',
+            'description': 'Music To Chill To',
+            'live_status': 'is_live',
+        },
+    }, {
+        # national station
+        'url': 'https://www.globalplayer.com/live/heart/uk/',
+        'info_dict': {
+            'id': '2mwx4',
+            'ext': 'aac',
+            'description': 'turn up the feel good!',
+            'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
+            'live_status': 'is_live',
+            'title': 're:^Heart UK.+$',
+            'display_id': 'heart-uk',
+        },
+    }, {
+        # regional variation
+        'url': 'https://www.globalplayer.com/live/heart/london/',
+        'info_dict': {
+            'id': 'AMqg',
+            'ext': 'aac',
+            'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
+            'title': 're:^Heart London.+$',
+            'live_status': 'is_live',
+            'display_id': 'heart-london',
+            'description': 'turn up the feel good!',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        station = self._get_page_props(url, video_id)['station']
+        stream_url = station['streamUrl']
+
+        return {
+            'id': station['id'],
+            'display_id': join_nonempty('brandSlug', 'slug', from_dict=station) or station.get('legacyStationPrefix'),
+            'url': stream_url,
+            'ext': self._request_ext(stream_url, video_id),
+            'vcodec': 'none',
+            'is_live': True,
+            **traverse_obj(station, {
+                'title': (('name', 'brandName'), {str_or_none}),
+                'description': 'tagline',
+                'thumbnail': 'brandLogo',
+            }, get_all=False),
+        }
+
+
+class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
+    _VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P<id>\w+)'
+    _TESTS = [{
+        # "live playlist"
+        'url': 'https://www.globalplayer.com/playlists/8bLk/',
+        'info_dict': {
+            'id': '8bLk',
+            'ext': 'aac',
+            'live_status': 'is_live',
+            'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d',
+            'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
+            'title': 're:^Classic FM Hall of Fame.+$'
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        station = self._get_page_props(url, video_id)['playlistData']
+        stream_url = station['streamUrl']
+
+        return {
+            'id': video_id,
+            'url': stream_url,
+            'ext': self._request_ext(stream_url, video_id),
+            'vcodec': 'none',
+            'is_live': True,
+            **traverse_obj(station, {
+                'title': 'title',
+                'description': 'description',
+                'thumbnail': 'image',
+            }),
+        }
+
+
+class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
+    _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)/|catchup/\w+/\w+/)(?P<id>\w+)/?(?:$|[?#])'
+    _TESTS = [{
+        # podcast
+        'url': 'https://www.globalplayer.com/podcasts/42KuaM/',
+        'playlist_mincount': 5,
+        'info_dict': {
+            'id': '42KuaM',
+            'title': 'Filthy Ritual',
+            'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
+            'categories': ['Society & Culture', 'True Crime'],
+            'uploader': 'Global',
+            'description': 'md5:da5b918eac9ae319454a10a563afacf9',
+        },
+    }, {
+        # radio catchup
+        'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/',
+        'playlist_mincount': 3,
+        'info_dict': {
+            'id': '46vyD7z',
+            'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
+            'title': 'Nick Ferrari',
+            'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
+        props = self._get_page_props(url, video_id)
+        series = props['podcastInfo'] if podcast else props['catchupInfo']
+
+        return {
+            '_type': 'playlist',
+            'id': video_id,
+            'entries': [self._extract_audio(ep, series) for ep in traverse_obj(
+                        series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
+            'categories': traverse_obj(series, ('categories', ..., 'name')) or None,
+            **traverse_obj(series, {
+                'description': 'description',
+                'thumbnail': 'imageUrl',
+                'title': 'title',
+                'uploader': 'itunesAuthor',  # podcasts only
+            }),
+        }
+
+
+class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE):
+    _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)|catchup/\w+/\w+)/episodes/(?P<id>\w+)/?(?:$|[?#])'
+    _TESTS = [{
+        # podcast
+        'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/',
+        'info_dict': {
+            'id': '7DrfNnE',
+            'ext': 'mp3',
+            'title': 'Filthy Ritual - Trailer',
+            'description': 'md5:1f1562fd0f01b4773b590984f94223e0',
+            'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
+            'duration': 225.0,
+            'timestamp': 1681254900,
+            'series': 'Filthy Ritual',
+            'series_id': '42KuaM',
+            'upload_date': '20230411',
+            'uploader': 'Global',
+        },
+    }, {
+        # radio catchup
+        'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/',
+        'info_dict': {
+            'id': '2zGq26Vcv1fCWhddC4JAwETXWe',
+            'ext': 'm4a',
+            'timestamp': 1682056800,
+            'series': 'Nick Ferrari',
+            'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
+            'upload_date': '20230421',
+            'series_id': '46vyD7z',
+            'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
+            'title': 'Nick Ferrari',
+            'duration': 10800.0,
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
+        props = self._get_page_props(url, video_id)
+        episode = props['podcastEpisode'] if podcast else props['catchupEpisode']
+
+        return self._extract_audio(
+            episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {})
+
+
+class GlobalPlayerVideoIE(GlobalPlayerBaseIE):
+    _VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P<id>\w+)'
+    _TESTS = [{
+        'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/',
+        'info_dict': {
+            'id': '2JsSZ7Gm2uP',
+            'ext': 'mp4',
+            'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd',
+            'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550',
+            'upload_date': '20230420',
+            'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        meta = self._get_page_props(url, video_id)['videoData']
+
+        return {
+            'id': video_id,
+            **traverse_obj(meta, {
+                'url': 'url',
+                'thumbnail': ('image', 'url'),
+                'title': 'title',
+                'upload_date': ('publish_date', {unified_strdate}),
+                'description': 'description',
+            }),
+        }

From 170605840ea9d5ad75da6576485ea7d125b428ee Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 27 Apr 2023 05:52:22 +0530
Subject: [PATCH 83/97] Populate `filename` and `urls` fields at all stages of
 `--print`

Closes https://github.com/yt-dlp/yt-dlp/issues/6920
---
 yt_dlp/YoutubeDL.py | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index dce6cf928..482b1a49e 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1677,7 +1677,7 @@ def process_ie_result(self, ie_result, download=True, extra_info=None):
                 self.add_extra_info(info_copy, extra_info)
                 info_copy, _ = self.pre_process(info_copy)
                 self._fill_common_fields(info_copy, False)
-                self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
+                self.__forced_printings(info_copy)
                 self._raise_pending_errors(info_copy)
                 if self.params.get('force_write_download_archive', False):
                     self.record_download_archive(info_copy)
@@ -2719,7 +2719,7 @@ def is_wellformed(f):
             self.list_formats(info_dict)
         if list_only:
             # Without this printing, -F --print-json will not work
-            self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
+            self.__forced_printings(info_dict)
             return info_dict
 
         format_selector = self.format_selector
@@ -2879,6 +2879,12 @@ def _forceprint(self, key, info_dict):
         if info_dict is None:
             return
         info_copy = info_dict.copy()
+        info_copy.setdefault('filename', self.prepare_filename(info_dict))
+        if info_dict.get('requested_formats') is not None:
+            # For RTMP URLs, also include the playpath
+            info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
+        elif info_dict.get('url'):
+            info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
         info_copy['formats_table'] = self.render_formats_table(info_dict)
         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
@@ -2907,7 +2913,9 @@ def format_tmpl(tmpl):
                 with open(filename, 'a', encoding='utf-8', newline='') as f:
                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
 
-    def __forced_printings(self, info_dict, filename, incomplete):
+        return info_copy
+
+    def __forced_printings(self, info_dict, filename=None, incomplete=True):
         def print_mandatory(field, actual_field=None):
             if actual_field is None:
                 actual_field = field
@@ -2920,20 +2928,14 @@ def print_optional(field):
                     and info_dict.get(field) is not None):
                 self.to_stdout(info_dict[field])
 
-        info_dict = info_dict.copy()
-        if filename is not None:
-            info_dict['filename'] = filename
-        if info_dict.get('requested_formats') is not None:
-            # For RTMP URLs, also include the playpath
-            info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
-        elif info_dict.get('url'):
-            info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
-
         if (self.params.get('forcejson')
                 or self.params['forceprint'].get('video')
                 or self.params['print_to_file'].get('video')):
             self.post_extract(info_dict)
-        self._forceprint('video', info_dict)
+
+        if filename:
+            info_dict['filename'] = filename
+        info_dict = self._forceprint('video', info_dict)
 
         print_mandatory('title')
         print_mandatory('id')
@@ -3493,10 +3495,10 @@ def run_pp(self, pp, infodict):
         return infodict
 
     def run_all_pps(self, key, info, *, additional_pps=None):
-        if key != 'video':
-            self._forceprint(key, info)
         for pp in (additional_pps or []) + self._pps[key]:
             info = self.run_pp(pp, info)
+        if key != 'video':
+            self._forceprint(key, info)
         return info
 
     def pre_process(self, ie_info, key='pre_process', files_to_move=None):

From 7cf51f21916292cd80bdeceb37489f5322f166dd Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 27 Apr 2023 07:42:17 +0530
Subject: [PATCH 84/97] [jsinterp] Handle negative numbers better

Closes #6131
---
 test/test_jsinterp.py          | 16 ++++++++++++++++
 test/test_youtube_signature.py |  4 ++++
 yt_dlp/jsinterp.py             |  8 +++++---
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index e090dc791..3283657d7 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -445,6 +445,22 @@ def test_bitwise_operators_overflow(self):
         jsi = JSInterpreter('function x(){return 1236566549 << 5}')
         self.assertEqual(jsi.call_function('x'), 915423904)
 
+    def test_negative(self):
+        jsi = JSInterpreter("function f(){return 2    *    -2.0;}")
+        self.assertEqual(jsi.call_function('f'), -4)
+
+        jsi = JSInterpreter('function f(){return 2    -    - -2;}')
+        self.assertEqual(jsi.call_function('f'), 0)
+
+        jsi = JSInterpreter('function f(){return 2    -    - - -2;}')
+        self.assertEqual(jsi.call_function('f'), 4)
+
+        jsi = JSInterpreter('function f(){return 2    -    + + - -2;}')
+        self.assertEqual(jsi.call_function('f'), 0)
+
+        jsi = JSInterpreter('function f(){return 2    +    - + - -2;}')
+        self.assertEqual(jsi.call_function('f'), 0)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 336e80291..e2b3f0870 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -142,6 +142,10 @@
         'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
         'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
     ),
+    (
+        'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js',
+        'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw',
+    ),
 ]
 
 
diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py
index db6526009..5571ecfeb 100644
--- a/yt_dlp/jsinterp.py
+++ b/yt_dlp/jsinterp.py
@@ -243,7 +243,7 @@ def _separate(expr, delim=',', max_split=None):
             return
         counters = {k: 0 for k in _MATCHING_PARENS.values()}
         start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
-        in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
+        in_quote, escaping, after_op, in_regex_char_group, in_unary_op = None, False, True, False, False
         for idx, char in enumerate(expr):
             if not in_quote and char in _MATCHING_PARENS:
                 counters[_MATCHING_PARENS[char]] += 1
@@ -258,9 +258,11 @@ def _separate(expr, delim=',', max_split=None):
                 elif in_quote == '/' and char in '[]':
                     in_regex_char_group = char == '['
             escaping = not escaping and in_quote and char == '\\'
-            after_op = not in_quote and char in OP_CHARS or (char.isspace() and after_op)
+            in_unary_op = (not in_quote and not in_regex_char_group
+                           and after_op not in (True, False) and char in '-+')
+            after_op = char if (not in_quote and char in OP_CHARS) else (char.isspace() and after_op)
 
-            if char != delim[pos] or any(counters.values()) or in_quote:
+            if char != delim[pos] or any(counters.values()) or in_quote or in_unary_op:
                 pos = 0
                 continue
             elif pos != delim_len:

From b5f61b69d4561b81fc98c226b176f0c15493e688 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 27 Apr 2023 19:35:28 +0530
Subject: [PATCH 85/97] Fix bug in 170605840ea9d5ad75da6576485ea7d125b428ee

and related refactor
---
 yt_dlp/YoutubeDL.py | 40 +++++++++++++++++-----------------------
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 482b1a49e..a8b4a650e 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -2916,36 +2916,30 @@ def format_tmpl(tmpl):
         return info_copy
 
     def __forced_printings(self, info_dict, filename=None, incomplete=True):
-        def print_mandatory(field, actual_field=None):
-            if actual_field is None:
-                actual_field = field
-            if (self.params.get('force%s' % field, False)
-                    and (not incomplete or info_dict.get(actual_field) is not None)):
-                self.to_stdout(info_dict[actual_field])
-
-        def print_optional(field):
-            if (self.params.get('force%s' % field, False)
-                    and info_dict.get(field) is not None):
-                self.to_stdout(info_dict[field])
-
         if (self.params.get('forcejson')
                 or self.params['forceprint'].get('video')
                 or self.params['print_to_file'].get('video')):
             self.post_extract(info_dict)
-
         if filename:
             info_dict['filename'] = filename
-        info_dict = self._forceprint('video', info_dict)
+        info_copy = self._forceprint('video', info_dict)
 
-        print_mandatory('title')
-        print_mandatory('id')
-        print_mandatory('url', 'urls')
-        print_optional('thumbnail')
-        print_optional('description')
-        print_optional('filename')
-        if self.params.get('forceduration') and info_dict.get('duration') is not None:
-            self.to_stdout(formatSeconds(info_dict['duration']))
-        print_mandatory('format')
+        def print_field(field, actual_field=None, optional=False):
+            if actual_field is None:
+                actual_field = field
+            if self.params.get(f'force{field}') and (
+                    info_copy.get(field) is not None or (not optional and not incomplete)):
+                self.to_stdout(info_copy[actual_field])
+
+        print_field('title')
+        print_field('id')
+        print_field('url', 'urls')
+        print_field('thumbnail', optional=True)
+        print_field('description', optional=True)
+        print_field('filename', optional=True)
+        if self.params.get('forceduration') and info_copy.get('duration') is not None:
+            self.to_stdout(formatSeconds(info_copy['duration']))
+        print_field('format')
 
         if self.params.get('forcejson'):
             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))

From 7a7b1376fbce0067cf37566bb47131bc0022638d Mon Sep 17 00:00:00 2001
From: makeworld <25111343+makew0rld@users.noreply.github.com>
Date: Thu, 27 Apr 2023 22:42:25 -0400
Subject: [PATCH 86/97] [extractor/cbc] Fix live extractor, playlist
 `_VALID_URL` (#6625)

Authored by: makew0rld
---
 yt_dlp/extractor/cbc.py | 120 +++++++++++++++++++++++++++-------------
 1 file changed, 83 insertions(+), 37 deletions(-)

diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index eadb3f8c0..e42f06246 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -8,14 +8,16 @@
     compat_str,
 )
 from ..utils import (
+    ExtractorError,
     int_or_none,
     join_nonempty,
     js_to_json,
     orderedSet,
+    parse_iso8601,
     smuggle_url,
     strip_or_none,
+    traverse_obj,
     try_get,
-    ExtractorError,
 )
 
 
@@ -404,7 +406,7 @@ def _real_extract(self, url):
 
 class CBCGemPlaylistIE(InfoExtractor):
     IE_NAME = 'gem.cbc.ca:playlist'
-    _VALID_URL = r'https?://gem\.cbc\.ca/media/(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
+    _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
     _TESTS = [{
         # TV show playlist, all public videos
         'url': 'https://gem.cbc.ca/media/schitts-creek/s06',
@@ -414,6 +416,9 @@ class CBCGemPlaylistIE(InfoExtractor):
             'title': 'Season 6',
             'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
         },
+    }, {
+        'url': 'https://gem.cbc.ca/schitts-creek/s06',
+        'only_matching': True,
     }]
     _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/'
 
@@ -473,49 +478,90 @@ def _real_extract(self, url):
 
 class CBCGemLiveIE(InfoExtractor):
     IE_NAME = 'gem.cbc.ca:live'
-    _VALID_URL = r'https?://gem\.cbc\.ca/live/(?P<id>\d+)'
-    _TEST = {
-        'url': 'https://gem.cbc.ca/live/920604739687',
-        'info_dict': {
-            'title': 'Ottawa',
-            'description': 'The live TV channel and local programming from Ottawa',
-            'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
-            'is_live': True,
-            'id': 'AyqZwxRqh8EH',
-            'ext': 'mp4',
-            'timestamp': 1492106160,
-            'upload_date': '20170413',
-            'uploader': 'CBCC-NEW',
+    _VALID_URL = r'https?://gem\.cbc\.ca/live(?:-event)?/(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'https://gem.cbc.ca/live/920604739687',
+            'info_dict': {
+                'title': 'Ottawa',
+                'description': 'The live TV channel and local programming from Ottawa',
+                'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
+                'is_live': True,
+                'id': 'AyqZwxRqh8EH',
+                'ext': 'mp4',
+                'timestamp': 1492106160,
+                'upload_date': '20170413',
+                'uploader': 'CBCC-NEW',
+            },
+            'skip': 'Live might have ended',
         },
-        'skip': 'Live might have ended',
-    }
-
-    # It's unclear where the chars at the end come from, but they appear to be
-    # constant. Might need updating in the future.
-    # There are two URLs, some livestreams are in one, and some
-    # in the other. The JSON schema is the same for both.
-    _API_URLS = ['https://tpfeed.cbc.ca/f/ExhSPC/t_t3UKJR6MAT', 'https://tpfeed.cbc.ca/f/ExhSPC/FNiv9xQx_BnT']
+        {
+            'url': 'https://gem.cbc.ca/live/44',
+            'info_dict': {
+                'id': '44',
+                'ext': 'mp4',
+                'is_live': True,
+                'title': r're:^Ottawa [0-9\-: ]+',
+                'description': 'The live TV channel and local programming from Ottawa',
+                'live_status': 'is_live',
+                'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*'
+            },
+            'params': {'skip_download': True},
+            'skip': 'Live might have ended',
+        },
+        {
+            'url': 'https://gem.cbc.ca/live-event/10835',
+            'info_dict': {
+                'id': '10835',
+                'ext': 'mp4',
+                'is_live': True,
+                'title': r're:^The National \| Biden’s trip wraps up, Paltrow testifies, Bird flu [0-9\-: ]+',
+                'description': 'March 24, 2023 | President Biden’s Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.',
+                'live_status': 'is_live',
+                'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
+                'timestamp': 1679706000,
+                'upload_date': '20230325',
+            },
+            'params': {'skip_download': True},
+            'skip': 'Live might have ended',
+        }
+    ]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']
 
-        for api_url in self._API_URLS:
-            video_info = next((
-                stream for stream in self._download_json(api_url, video_id)['entries']
-                if stream.get('guid') == video_id), None)
-            if video_info:
-                break
-        else:
+        # Two types of metadata JSON
+        if not video_info.get('formattedIdMedia'):
+            video_info = traverse_obj(
+                video_info, (('freeTv', ('streams', ...)), 'items', lambda _, v: v['key'] == video_id, {dict}),
+                get_all=False, default={})
+
+        video_stream_id = video_info.get('formattedIdMedia')
+        if not video_stream_id:
             raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
 
+        stream_data = self._download_json(
+            'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
+                'appCode': 'mpx',
+                'connectionType': 'hd',
+                'deviceType': 'ipad',
+                'idMedia': video_stream_id,
+                'multibitrate': 'true',
+                'output': 'json',
+                'tech': 'hls',
+                'manifestType': 'desktop',
+            })
+
         return {
-            '_type': 'url_transparent',
-            'ie_key': 'ThePlatform',
-            'url': video_info['content'][0]['url'],
             'id': video_id,
-            'title': video_info.get('title'),
-            'description': video_info.get('description'),
-            'tags': try_get(video_info, lambda x: x['keywords'].split(', ')),
-            'thumbnail': video_info.get('cbc$staticImage'),
+            'formats': self._extract_m3u8_formats(stream_data['url'], video_id, 'mp4', live=True),
             'is_live': True,
+            **traverse_obj(video_info, {
+                'title': 'title',
+                'description': 'description',
+                'thumbnail': ('images', 'card', 'url'),
+                'timestamp': ('airDate', {parse_iso8601}),
+            })
         }

From f005a35aa7e4f67a0c603a946c0dd714c151b2d6 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sat, 29 Apr 2023 00:58:48 +0530
Subject: [PATCH 87/97] Ensure pre-processor errors do not block `--print`

Closes #6937
---
 yt_dlp/YoutubeDL.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index a8b4a650e..857b7ea37 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -3489,10 +3489,12 @@ def run_pp(self, pp, infodict):
         return infodict
 
     def run_all_pps(self, key, info, *, additional_pps=None):
-        for pp in (additional_pps or []) + self._pps[key]:
-            info = self.run_pp(pp, info)
-        if key != 'video':
-            self._forceprint(key, info)
+        try:
+            for pp in (additional_pps or []) + self._pps[key]:
+                info = self.run_pp(pp, info)
+        finally:
+            if key != 'video':
+                self._forceprint(key, info)
         return info
 
     def pre_process(self, ie_info, key='pre_process', files_to_move=None):

From 17ba4343cf99701692a7f4798fd42b50f644faba Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sat, 29 Apr 2023 02:57:50 +0530
Subject: [PATCH 88/97] Fix f005a35aa7e4f67a0c603a946c0dd714c151b2d6

Printing inside `finally` causes the order of logging to change
when there is an error, which is undesirable. So this is reverted.

The issue of `--print` being blocked by pre-processors was an
unintentional side-effect of changing the operation orders in
170605840ea9d5ad75da6576485ea7d125b428ee, and this is also partially
reverted.
---
 yt_dlp/YoutubeDL.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 857b7ea37..8ee42b86a 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -3488,13 +3488,11 @@ def run_pp(self, pp, infodict):
                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
         return infodict
 
-    def run_all_pps(self, key, info, *, additional_pps=None):
-        try:
-            for pp in (additional_pps or []) + self._pps[key]:
-                info = self.run_pp(pp, info)
-        finally:
-            if key != 'video':
-                self._forceprint(key, info)
+    def run_all_pps(self, key, info, *, additional_pps=None, fatal=True):
+        if key != 'video':
+            self._forceprint(key, info)
+        for pp in (additional_pps or []) + self._pps[key]:
+            info = self.run_pp(pp, info)
         return info
 
     def pre_process(self, ie_info, key='pre_process', files_to_move=None):

From 4d9280c9c853733534dda60486fa949bcca36c9e Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sat, 29 Apr 2023 13:19:35 -0500
Subject: [PATCH 89/97] [extractor/reddit] Add login support (#6950)

Closes #6949
Authored by: bashonly
---
 yt_dlp/extractor/reddit.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py
index 3e458456c..13615e82f 100644
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@@ -8,11 +8,13 @@
     traverse_obj,
     try_get,
     unescapeHTML,
+    urlencode_postdata,
     url_or_none,
 )
 
 
 class RedditIE(InfoExtractor):
+    _NETRC_MACHINE = 'reddit'
     _VALID_URL = r'https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))'
     _TESTS = [{
         'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
@@ -176,6 +178,25 @@ class RedditIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    def _perform_login(self, username, password):
+        captcha = self._download_json(
+            'https://www.reddit.com/api/requires_captcha/login.json', None,
+            'Checking login requirement')['required']
+        if captcha:
+            raise ExtractorError('Reddit is requiring captcha before login', expected=True)
+        login = self._download_json(
+            f'https://www.reddit.com/api/login/{username}', None, data=urlencode_postdata({
+                'op': 'login-main',
+                'user': username,
+                'passwd': password,
+                'api_type': 'json',
+            }), note='Logging in', errnote='Login request failed')
+        errors = '; '.join(traverse_obj(login, ('json', 'errors', ..., 1)))
+        if errors:
+            raise ExtractorError(f'Unable to login, Reddit API says {errors}', expected=True)
+        elif not traverse_obj(login, ('json', 'data', 'cookie', {str})):
+            raise ExtractorError('Unable to login, no cookie was returned')
+
     def _real_extract(self, url):
         host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id')
 

From b079c26f0af8085bccdadc72c61c8164ca5ab0f8 Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Sun, 30 Apr 2023 19:50:22 +0200
Subject: [PATCH 90/97] [utils] `traverse_obj`: More fixes (#6959)

- Fix result when branching with `traverse_string`
- Fix `slice` path on `dict`s
- Fix tests and docstrings from 21b5ec86c2c37d10c5bb97edd7051d3aac16bb3e
- Add `is_iterable_like` helper function

Authored by: Grub4K
---
 test/test_utils.py | 21 +++++++++++++++++++--
 yt_dlp/utils.py    | 28 ++++++++++++++++++----------
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index f2f3b8170..e1bf6ac20 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -2016,7 +2016,7 @@ def test_traverse_obj(self):
                          msg='nested `...` queries should work')
         self.assertCountEqual(traverse_obj(_TEST_DATA, (..., ..., 'index')), range(4),
                               msg='`...` query result should be flattened')
-        self.assertEqual(traverse_obj(range(4), ...), list(range(4)),
+        self.assertEqual(traverse_obj(iter(range(4)), ...), list(range(4)),
                          msg='`...` should accept iterables')
 
         # Test function as key
@@ -2025,7 +2025,7 @@ def test_traverse_obj(self):
                          msg='function as query key should perform a filter based on (key, value)')
         self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'},
                               msg='exceptions in the query function should be catched')
-        self.assertEqual(traverse_obj(range(4), lambda _, x: x % 2 == 0), [0, 2],
+        self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
                          msg='function key should accept iterables')
         if __debug__:
             with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
@@ -2051,6 +2051,17 @@ def test_traverse_obj(self):
             with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
                 traverse_obj(_TEST_DATA, {str.upper, str})
 
+        # Test `slice` as a key
+        _SLICE_DATA = [0, 1, 2, 3, 4]
+        self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
+                         msg='slice on a dictionary should not throw')
+        self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
+                         msg='slice key should apply slice to sequence')
+        self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
+                         msg='slice key should apply slice to sequence')
+        self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
+                         msg='slice key should apply slice to sequence')
+
         # Test alternative paths
         self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
                          msg='multiple `paths` should be treated as alternative paths')
@@ -2234,6 +2245,12 @@ def test_traverse_obj(self):
         self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
                                       traverse_string=True), ['s', 'r'],
                          msg='branching should result in list if `traverse_string`')
+        self.assertEqual(traverse_obj({}, (0, ...), traverse_string=True), [],
+                         msg='branching should result in list if `traverse_string`')
+        self.assertEqual(traverse_obj({}, (0, lambda x, y: True), traverse_string=True), [],
+                         msg='branching should result in list if `traverse_string`')
+        self.assertEqual(traverse_obj({}, (0, slice(1)), traverse_string=True), [],
+                         msg='branching should result in list if `traverse_string`')
 
         # Test is_user_input behavior
         _IS_USER_INPUT_DATA = {'range8': list(range(8))}
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index f69311462..2f5e66720 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -3273,8 +3273,14 @@ def multipart_encode(data, boundary=None):
     return out, content_type
 
 
-def variadic(x, allowed_types=(str, bytes, dict)):
-    return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
+def is_iterable_like(x, allowed_types=collections.abc.Iterable, blocked_types=NO_DEFAULT):
+    if blocked_types is NO_DEFAULT:
+        blocked_types = (str, bytes, collections.abc.Mapping)
+    return isinstance(x, allowed_types) and not isinstance(x, blocked_types)
+
+
+def variadic(x, allowed_types=NO_DEFAULT):
+    return x if is_iterable_like(x, blocked_types=allowed_types) else (x,)
 
 
 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
@@ -5467,7 +5473,7 @@ def traverse_obj(
         obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
         casesense=True, is_user_input=False, traverse_string=False):
     """
-    Safely traverse nested `dict`s and `Sequence`s
+    Safely traverse nested `dict`s and `Iterable`s
 
     >>> obj = [{}, {"key": "value"}]
     >>> traverse_obj(obj, (1, "key"))
@@ -5475,7 +5481,7 @@ def traverse_obj(
 
     Each of the provided `paths` is tested and the first producing a valid result will be returned.
     The next path will also be tested if the path branched but no results could be found.
-    Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
+    Supported values for traversal are `Mapping`, `Iterable` and `re.Match`.
     Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
 
     The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
@@ -5492,7 +5498,7 @@ def traverse_obj(
                             Read as: `[traverse_obj(obj, branch) for branch in branches]`.
         - `function`:       Branch out and return values filtered by the function.
                             Read as: `[value for key, value in obj if function(key, value)]`.
-                            For `Sequence`s, `key` is the index of the value.
+                            For `Iterable`s, `key` is the index of the value.
                             For `re.Match`es, `key` is the group number (0 = full match)
                             as well as additionally any group names, if given.
         - `dict`            Transform the current object and return a matching dict.
@@ -5540,7 +5546,9 @@ def apply_key(key, obj, is_last):
         result = None
 
         if obj is None and traverse_string:
-            pass
+            if key is ... or callable(key) or isinstance(key, slice):
+                branching = True
+                result = ()
 
         elif key is None:
             result = obj
@@ -5563,7 +5571,7 @@ def apply_key(key, obj, is_last):
             branching = True
             if isinstance(obj, collections.abc.Mapping):
                 result = obj.values()
-            elif isinstance(obj, collections.abc.Iterable) and not isinstance(obj, (str, bytes)):
+            elif is_iterable_like(obj):
                 result = obj
             elif isinstance(obj, re.Match):
                 result = obj.groups()
@@ -5577,7 +5585,7 @@ def apply_key(key, obj, is_last):
             branching = True
             if isinstance(obj, collections.abc.Mapping):
                 iter_obj = obj.items()
-            elif isinstance(obj, collections.abc.Iterable) and not isinstance(obj, (str, bytes)):
+            elif is_iterable_like(obj):
                 iter_obj = enumerate(obj)
             elif isinstance(obj, re.Match):
                 iter_obj = itertools.chain(
@@ -5601,7 +5609,7 @@ def apply_key(key, obj, is_last):
             } or None
 
         elif isinstance(obj, collections.abc.Mapping):
-            result = (obj.get(key) if casesense or (key in obj) else
+            result = (try_call(obj.get, args=(key,)) if casesense or try_call(obj.__contains__, args=(key,)) else
                       next((v for k, v in obj.items() if casefold(k) == key), None))
 
         elif isinstance(obj, re.Match):
@@ -5613,7 +5621,7 @@ def apply_key(key, obj, is_last):
                 result = next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
 
         elif isinstance(key, (int, slice)):
-            if isinstance(obj, collections.abc.Sequence) and not isinstance(obj, (str, bytes)):
+            if is_iterable_like(obj, collections.abc.Sequence):
                 branching = isinstance(key, slice)
                 with contextlib.suppress(IndexError):
                     result = obj[key]

From 147e62fc584c3ea6fdb09bb7a47905df68553a22 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Mon, 1 May 2023 18:55:28 -0500
Subject: [PATCH 91/97] [extractor/twitter] Default to GraphQL, handle auth
 errors (#6957)

Closes #6763
Authored by: bashonly
---
 README.md                   |   2 +-
 yt_dlp/extractor/twitter.py | 132 ++++++++++++++++--------------------
 2 files changed, 60 insertions(+), 74 deletions(-)

diff --git a/README.md b/README.md
index 47da19011..c1f34235d 100644
--- a/README.md
+++ b/README.md
@@ -1833,7 +1833,7 @@ #### rokfinchannel
 * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
 
 #### twitter
-* `force_graphql`: Force usage of the GraphQL API. By default it will only be used if login cookies are provided
+* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
 
 **Note**: These options may be changed/removed in the future without concern for backward compatibility
 
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index 3f1899e96..d9a89c44b 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -1,6 +1,5 @@
 import json
 import re
-import urllib.error
 
 from .common import InfoExtractor
 from .periscope import PeriscopeBaseIE, PeriscopeIE
@@ -17,6 +16,7 @@
     format_field,
     int_or_none,
     make_archive_id,
+    remove_end,
     str_or_none,
     strip_or_none,
     traverse_obj,
@@ -32,11 +32,9 @@
 class TwitterBaseIE(InfoExtractor):
     _API_BASE = 'https://api.twitter.com/1.1/'
     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
-    _TOKENS = {
-        'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
-        'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
-    }
     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
+    _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
+    _guest_token = None
 
     def _extract_variant_formats(self, variant, video_id):
         variant_url = variant.get('url')
@@ -94,7 +92,7 @@ def is_logged_in(self):
 
     def _call_api(self, path, video_id, query={}, graphql=False):
         cookies = self._get_cookies(self._API_BASE)
-        headers = {}
+        headers = self._AUTH.copy()
 
         csrf_cookie = cookies.get('ct0')
         if csrf_cookie:
@@ -107,54 +105,34 @@ def _call_api(self, path, video_id, query={}, graphql=False):
                 'x-twitter-active-user': 'yes',
             })
 
-        last_error = None
-        for bearer_token in self._TOKENS:
-            for first_attempt in (True, False):
-                headers['Authorization'] = f'Bearer {bearer_token}'
+        for first_attempt in (True, False):
+            if not self.is_logged_in and not self._guest_token:
+                headers.pop('x-guest-token', None)
+                self._guest_token = traverse_obj(self._download_json(
+                    f'{self._API_BASE}guest/activate.json', video_id,
+                    'Downloading guest token', data=b'', headers=headers), 'guest_token')
+            if self._guest_token:
+                headers['x-guest-token'] = self._guest_token
+            elif not self.is_logged_in:
+                raise ExtractorError('Could not retrieve guest token')
 
-                if not self.is_logged_in:
-                    if not self._TOKENS[bearer_token]:
-                        headers.pop('x-guest-token', None)
-                        guest_token_response = self._download_json(
-                            self._API_BASE + 'guest/activate.json', video_id,
-                            'Downloading guest token', data=b'', headers=headers)
+            allowed_status = {400, 401, 403, 404} if graphql else {403}
+            result = self._download_json(
+                (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
+                video_id, headers=headers, query=query, expected_status=allowed_status,
+                note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
 
-                        self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
-                        if not self._TOKENS[bearer_token]:
-                            raise ExtractorError('Could not retrieve guest token')
+            if result.get('errors'):
+                errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
+                if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
+                    self.to_screen('Guest token has expired. Refreshing guest token')
+                    self._guest_token = None
+                    continue
 
-                    headers['x-guest-token'] = self._TOKENS[bearer_token]
+                raise ExtractorError(
+                    f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
 
-                try:
-                    allowed_status = {400, 403, 404} if graphql else {403}
-                    result = self._download_json(
-                        (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
-                        video_id, headers=headers, query=query, expected_status=allowed_status)
-
-                except ExtractorError as e:
-                    if last_error:
-                        raise last_error
-
-                    if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
-                        raise
-
-                    last_error = e
-                    self.report_warning(
-                        'Twitter API gave 404 response, retrying with deprecated auth token. '
-                        'Only one media item can be extracted')
-                    break  # continue outer loop with next bearer_token
-
-                if result.get('errors'):
-                    errors = traverse_obj(result, ('errors', ..., 'message'), expected_type=str)
-                    if first_attempt and any('bad guest token' in error.lower() for error in errors):
-                        self.to_screen('Guest token has expired. Refreshing guest token')
-                        self._TOKENS[bearer_token] = None
-                        continue
-
-                    error_message = ', '.join(set(errors)) or 'Unknown error'
-                    raise ExtractorError(f'Error(s) while querying API: {error_message}', expected=True)
-
-                return result
+            return result
 
     def _build_graphql_query(self, media_id):
         raise NotImplementedError('Method must be implemented to support GraphQL')
@@ -313,6 +291,7 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': [],
             'age_limit': 18,
         },
@@ -391,6 +370,7 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': ['Damndaniel'],
             'age_limit': 0,
         },
@@ -431,6 +411,7 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -480,6 +461,7 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': ['Maria'],
             'age_limit': 0,
         },
@@ -505,6 +487,7 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -529,6 +512,7 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -589,6 +573,7 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -630,12 +615,12 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': ['HurricaneIan'],
             'age_limit': 0,
         },
     }, {
-        # Adult content, uses old token
-        # Fails if not logged in (GraphQL)
+        # Adult content, fails if not logged in (GraphQL)
         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
         'info_dict': {
             'id': '1575199163847000068',
@@ -655,9 +640,8 @@ class TwitterIE(TwitterBaseIE):
             'age_limit': 18,
             'tags': []
         },
-        'expected_warnings': ['404'],
+        'skip': 'Requires authentication',
     }, {
-        # Description is missing one https://t.co url (GraphQL)
         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
         'playlist_mincount': 2,
         'info_dict': {
@@ -669,14 +653,13 @@ class TwitterIE(TwitterBaseIE):
             'upload_date': '20210519',
             'age_limit': 0,
             'repost_count': int,
-            'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
+            'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
             'uploader_id': 'Srirachachau',
             'comment_count': int,
             'uploader_url': 'https://twitter.com/Srirachachau',
             'timestamp': 1621447860,
         },
     }, {
-        # Description is missing one https://t.co url (GraphQL)
         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
         'playlist_mincount': 2,
         'info_dict': {
@@ -688,7 +671,7 @@ class TwitterIE(TwitterBaseIE):
             'uploader': str,
             'timestamp': 1665143744,
             'uploader_url': 'https://twitter.com/DavidToons_',
-            'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
+            'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
             'tags': [],
             'comment_count': int,
             'upload_date': '20221007',
@@ -752,7 +735,7 @@ class TwitterIE(TwitterBaseIE):
         'info_dict': {
             'id': '1600649511827013632',
             'ext': 'mp4',
-            'title': 'md5:dac4f4d4c591fcc4e88a253eba472dc3',
+            'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
             'thumbnail': r're:^https?://.+\.jpg',
             'timestamp': 1670459604.0,
             'uploader_id': 'CTVJLaidlaw',
@@ -764,6 +747,7 @@ class TwitterIE(TwitterBaseIE):
             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
             'display_id': '1600649710662213632',
             'like_count': int,
+            'view_count': int,
             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
             'upload_date': '20221208',
             'age_limit': 0,
@@ -791,6 +775,7 @@ class TwitterIE(TwitterBaseIE):
             'like_count': int,
             'repost_count': int,
             'comment_count': int,
+            'view_count': int,
         },
     }, {
         'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
@@ -806,6 +791,7 @@ class TwitterIE(TwitterBaseIE):
             'repost_count': int,
             'duration': 9.531,
             'comment_count': int,
+            'view_count': int,
             'upload_date': '20221203',
             'age_limit': 0,
             'timestamp': 1670092210.0,
@@ -815,7 +801,6 @@ class TwitterIE(TwitterBaseIE):
         },
         'params': {'noplaylist': True},
     }, {
-        # Media view count is GraphQL only, force in test
         'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
         'info_dict': {
             'id': '1600009362759733248',
@@ -826,10 +811,10 @@ class TwitterIE(TwitterBaseIE):
             'view_count': int,
             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
             'age_limit': 0,
-            'uploader': 'Mün The Shinobi | BlaqBoi\'s Therapist',
+            'uploader': 'Mün The Shinobi',
             'repost_count': int,
             'upload_date': '20221206',
-            'title': 'Mün The Shinobi | BlaqBoi\'s Therapist - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
+            'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
             'comment_count': int,
             'like_count': int,
             'tags': [],
@@ -837,9 +822,8 @@ class TwitterIE(TwitterBaseIE):
             'duration': 139.987,
             'timestamp': 1670306984.0,
         },
-        'params': {'extractor_args': {'twitter': {'force_graphql': ['']}}},
     }, {
-        # url to retweet id
+        # url to retweet id, legacy API
         'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
         'info_dict': {
             'id': '1623274794488659969',
@@ -860,6 +844,7 @@ class TwitterIE(TwitterBaseIE):
             'repost_count': int,
             'comment_count': int,
         },
+        'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
     }, {
         # onion route
         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@@ -905,11 +890,13 @@ def _graphql_to_legacy(self, data, twid):
             'tweet_results', 'result', ('tweet', None),
         ), expected_type=dict, default={}, get_all=False)
 
-        if result.get('__typename') not in ('Tweet', None):
+        if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
             self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
 
         if 'tombstone' in result:
-            cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
+            cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
+            if cause and 'adult content' in cause:
+                self.raise_login_required(cause)
             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
 
         status = result.get('legacy', {})
@@ -922,7 +909,7 @@ def _graphql_to_legacy(self, data, twid):
         # extra transformation is needed since result does not match legacy format
         binding_values = {
             binding_value.get('key'): binding_value.get('value')
-            for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
+            for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
         }
         if binding_values:
             status['card']['binding_values'] = binding_values
@@ -965,12 +952,7 @@ def _build_graphql_query(self, media_id):
 
     def _real_extract(self, url):
         twid, selected_index = self._match_valid_url(url).group('id', 'index')
-        if self.is_logged_in or self._configuration_arg('force_graphql'):
-            self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
-            result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
-            status = self._graphql_to_legacy(result, twid)
-
-        else:
+        if self._configuration_arg('legacy_api') and not self.is_logged_in:
             status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
                 'cards_platform': 'Web-12',
                 'include_cards': 1,
@@ -978,6 +960,9 @@ def _real_extract(self, url):
                 'include_user_entities': 0,
                 'tweet_mode': 'extended',
             }), 'retweeted_status', None)
+        else:
+            result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
+            status = self._graphql_to_legacy(result, twid)
 
         title = description = status['full_text'].replace('\n', ' ')
         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
@@ -1142,7 +1127,8 @@ def get_binding_value(k):
         if not entries:
             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
             if not expanded_url or expanded_url == url:
-                raise ExtractorError('No video could be found in this tweet', expected=True)
+                self.raise_no_formats('No video could be found in this tweet', expected=True)
+                return info
 
             return self.url_result(expanded_url, display_id=twid, **info)
 

From b423b6a48e0b19260bc95ab7d72d2138d7f124dc Mon Sep 17 00:00:00 2001
From: Nicholas Defranco <39540565+nick-cd@users.noreply.github.com>
Date: Mon, 1 May 2023 20:03:27 -0400
Subject: [PATCH 92/97] [extractor/dlf] Add extractors (#6697)

Closes #6430
Authored by: nick-cd
---
 yt_dlp/extractor/_extractors.py |   4 +
 yt_dlp/extractor/dlf.py         | 192 ++++++++++++++++++++++++++++++++
 2 files changed, 196 insertions(+)
 create mode 100644 yt_dlp/extractor/dlf.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 3b5ae63b1..2d582f67f 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -452,6 +452,10 @@
 )
 from .democracynow import DemocracynowIE
 from .detik import DetikEmbedIE
+from .dlf import (
+    DLFIE,
+    DLFCorpusIE,
+)
 from .dfb import DFBIE
 from .dhm import DHMIE
 from .digg import DiggIE
diff --git a/yt_dlp/extractor/dlf.py b/yt_dlp/extractor/dlf.py
new file mode 100644
index 000000000..88a4149b5
--- /dev/null
+++ b/yt_dlp/extractor/dlf.py
@@ -0,0 +1,192 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    extract_attributes,
+    int_or_none,
+    traverse_obj,
+    url_or_none,
+)
+
+
+class DLFBaseIE(InfoExtractor):
+    _VALID_URL_BASE = r'https?://(?:www\.)?deutschlandfunk\.de/'
+    _BUTTON_REGEX = r'(<button[^>]+alt="Anhören"[^>]+data-audio-diraid[^>]*>)'
+
+    def _parse_button_attrs(self, button, audio_id=None):
+        attrs = extract_attributes(button)
+        audio_id = audio_id or attrs['data-audio-diraid']
+
+        url = traverse_obj(
+            attrs, 'data-audio-download-src', 'data-audio', 'data-audioreference',
+            'data-audio-src', expected_type=url_or_none)
+        ext = determine_ext(url)
+
+        return {
+            'id': audio_id,
+            'extractor_key': DLFIE.ie_key(),
+            'extractor': DLFIE.IE_NAME,
+            **traverse_obj(attrs, {
+                'title': (('data-audiotitle', 'data-audio-title', 'data-audio-download-tracking-title'), {str}),
+                'duration': (('data-audioduration', 'data-audio-duration'), {int_or_none}),
+                'thumbnail': ('data-audioimage', {url_or_none}),
+                'uploader': 'data-audio-producer',
+                'series': 'data-audio-series',
+                'channel': 'data-audio-origin-site-name',
+                'webpage_url': ('data-audio-download-tracking-path', {url_or_none}),
+            }, get_all=False),
+            'formats': (self._extract_m3u8_formats(url, audio_id, fatal=False)
+                        if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}])
+        }
+
+
+class DLFIE(DLFBaseIE):
+    IE_NAME = 'dlf'
+    _VALID_URL = DLFBaseIE._VALID_URL_BASE + r'[\w-]+-dlf-(?P<id>[\da-f]{8})-100\.html'
+    _TESTS = [
+        # Audio as an HLS stream
+        {
+            'url': 'https://www.deutschlandfunk.de/tanz-der-saiteninstrumente-das-wild-strings-trio-aus-slowenien-dlf-03a3eb19-100.html',
+            'info_dict': {
+                'id': '03a3eb19',
+                'title': r're:Tanz der Saiteninstrumente [-/] Das Wild Strings Trio aus Slowenien',
+                'ext': 'm4a',
+                'duration': 3298,
+                'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
+                'uploader': 'Deutschlandfunk',
+                'series': 'On Stage',
+                'channel': 'deutschlandfunk'
+            },
+            'params': {
+                'skip_download': 'm3u8'
+            },
+            'skip': 'This webpage no longer exists'
+        }, {
+            'url': 'https://www.deutschlandfunk.de/russische-athleten-kehren-zurueck-auf-die-sportbuehne-ein-gefaehrlicher-tueroeffner-dlf-d9cc1856-100.html',
+            'info_dict': {
+                'id': 'd9cc1856',
+                'title': 'Russische Athleten kehren zurück auf die Sportbühne: Ein gefährlicher Türöffner',
+                'ext': 'mp3',
+                'duration': 291,
+                'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
+                'uploader': 'Deutschlandfunk',
+                'series': 'Kommentare und Themen der Woche',
+                'channel': 'deutschlandfunk'
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        audio_id = self._match_id(url)
+        webpage = self._download_webpage(url, audio_id)
+
+        return self._parse_button_attrs(
+            self._search_regex(self._BUTTON_REGEX, webpage, 'button'), audio_id)
+
+
+class DLFCorpusIE(DLFBaseIE):
+    IE_NAME = 'dlf:corpus'
+    IE_DESC = 'DLF Multi-feed Archives'
+    _VALID_URL = DLFBaseIE._VALID_URL_BASE + r'(?P<id>(?![\w-]+-dlf-[\da-f]{8})[\w-]+-\d+)\.html'
+    _TESTS = [
+        # Recorded news broadcast with referrals to related broadcasts
+        {
+            'url': 'https://www.deutschlandfunk.de/fechten-russland-belarus-ukraine-protest-100.html',
+            'info_dict': {
+                'id': 'fechten-russland-belarus-ukraine-protest-100',
+                'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
+                'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad'
+            },
+            'playlist_mincount': 5,
+            'playlist': [{
+                'info_dict': {
+                    'id': '1fc5d64a',
+                    'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
+                    'ext': 'mp3',
+                    'duration': 252,
+                    'thumbnail': 'https://assets.deutschlandfunk.de/aad16241-6b76-4a09-958b-96d0ee1d6f57/512x512.jpg?t=1679480020313',
+                    'uploader': 'Deutschlandfunk',
+                    'series': 'Sport',
+                    'channel': 'deutschlandfunk'
+                }
+            }, {
+                'info_dict': {
+                    'id': '2ada145f',
+                    'title': r're:(?:Sportpolitik / )?Fechtverband votiert für Rückkehr russischer Athleten',
+                    'ext': 'mp3',
+                    'duration': 336,
+                    'thumbnail': 'https://assets.deutschlandfunk.de/FILE_93982766f7317df30409b8a184ac044a/512x512.jpg?t=1678547581005',
+                    'uploader': 'Deutschlandfunk',
+                    'series': 'Deutschlandfunk Nova',
+                    'channel': 'deutschlandfunk-nova'
+                }
+            }, {
+                'info_dict': {
+                    'id': '5e55e8c9',
+                    'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
+                    'ext': 'mp3',
+                    'duration': 187,
+                    'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
+                    'uploader': 'Deutschlandfunk',
+                    'series': 'Sport am Samstag',
+                    'channel': 'deutschlandfunk'
+                }
+            }, {
+                'info_dict': {
+                    'id': '47e1a096',
+                    'title': r're:Rückkehr Russlands im Fechten [-/] "Fassungslos, dass es einfach so passiert ist"',
+                    'ext': 'mp3',
+                    'duration': 602,
+                    'thumbnail': 'https://assets.deutschlandfunk.de/da4c494a-21cc-48b4-9cc7-40e09fd442c2/512x512.jpg?t=1678562155770',
+                    'uploader': 'Deutschlandfunk',
+                    'series': 'Sport am Samstag',
+                    'channel': 'deutschlandfunk'
+                }
+            }, {
+                'info_dict': {
+                    'id': '5e55e8c9',
+                    'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
+                    'ext': 'mp3',
+                    'duration': 187,
+                    'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
+                    'uploader': 'Deutschlandfunk',
+                    'series': 'Sport am Samstag',
+                    'channel': 'deutschlandfunk'
+                }
+            }]
+        },
+        # Podcast feed with tag buttons, playlist count fluctuates
+        {
+            'url': 'https://www.deutschlandfunk.de/kommentare-und-themen-der-woche-100.html',
+            'info_dict': {
+                'id': 'kommentare-und-themen-der-woche-100',
+                'title': 'Meinung - Kommentare und Themen der Woche',
+                'description': 'md5:2901bbd65cd2d45e116d399a099ce5d5',
+            },
+            'playlist_mincount': 10,
+        },
+        # Podcast feed with no description
+        {
+            'url': 'https://www.deutschlandfunk.de/podcast-tolle-idee-100.html',
+            'info_dict': {
+                'id': 'podcast-tolle-idee-100',
+                'title': 'Wissenschaftspodcast - Tolle Idee! - Was wurde daraus?',
+            },
+            'playlist_mincount': 11,
+        },
+    ]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'description': self._html_search_meta(
+                ['description', 'og:description', 'twitter:description'], webpage, default=None),
+            'title': self._html_search_meta(
+                ['og:title', 'twitter:title'], webpage, default=None),
+            'entries': map(self._parse_button_attrs, re.findall(self._BUTTON_REGEX, webpage)),
+        }

From 2f07c4c1da4361af213e5791279b9d152d2e4ce3 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Wed, 3 May 2023 15:46:37 -0500
Subject: [PATCH 93/97] [extractor/clipchamp] Add extractor (#6978)

Closes #6973
Authored by: bashonly
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/clipchamp.py   | 61 +++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 yt_dlp/extractor/clipchamp.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 2d582f67f..974c8a254 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -356,6 +356,7 @@
 )
 from .ciscowebex import CiscoWebexIE
 from .cjsw import CJSWIE
+from .clipchamp import ClipchampIE
 from .cliphunter import CliphunterIE
 from .clippit import ClippitIE
 from .cliprs import ClipRsIE
diff --git a/yt_dlp/extractor/clipchamp.py b/yt_dlp/extractor/clipchamp.py
new file mode 100644
index 000000000..a8bdf7e50
--- /dev/null
+++ b/yt_dlp/extractor/clipchamp.py
@@ -0,0 +1,61 @@
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    traverse_obj,
+    unified_timestamp,
+    url_or_none,
+)
+
+
+class ClipchampIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
+        'info_dict': {
+            'id': 'gRXZ4ZhdDaU',
+            'ext': 'mp4',
+            'title': 'Untitled video',
+            'uploader': 'Alexander Schwartz',
+            'timestamp': 1680805580,
+            'upload_date': '20230406',
+            'thumbnail': r're:^https?://.+\.jpg',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+
+    _STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
+    _STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
+
+        storage_location = data.get('storage_location')
+        if storage_location != 'cf_stream':
+            raise ExtractorError(f'Unsupported clip storage location "{storage_location}"')
+
+        path = data['download_url']
+        iframe = self._download_webpage(
+            f'https://iframe.cloudflarestream.com/{path}', video_id, 'Downloading player iframe')
+        subdomain = self._search_regex(
+            r'\bcustomer-domain-prefix=["\']([\w-]+)["\']', iframe,
+            'subdomain', fatal=False) or 'customer-2ut9yn3y6fta1yxe'
+
+        formats = self._extract_mpd_formats(
+            self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
+            query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
+        formats.extend(self._extract_m3u8_formats(
+            self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
+            query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), {str}))) or None,
+            **traverse_obj(data, {
+                'title': ('project', 'project_name', {str}),
+                'timestamp': ('created_at', {unified_timestamp}),
+                'thumbnail': ('thumbnail_url', {url_or_none}),
+            }),
+        }

From 45998b3e371b819ce0dbe50da703809a048cc2fe Mon Sep 17 00:00:00 2001
From: Eveldee <eveldee0680@live.fr>
Date: Fri, 5 May 2023 07:31:41 +0200
Subject: [PATCH 94/97] [utils] `locked_file`: Fix for virtiofs (#6840)

Authored by: brandon-dacrib
Closes #6823
---
 yt_dlp/utils.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 2f5e66720..47aa75c47 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -2187,10 +2187,11 @@ def _lock_file(f, exclusive, block):
                 fcntl.lockf(f, flags)
 
         def _unlock_file(f):
-            try:
-                fcntl.flock(f, fcntl.LOCK_UN)
-            except OSError:
-                fcntl.lockf(f, fcntl.LOCK_UN)
+            with contextlib.suppress(OSError):
+                return fcntl.flock(f, fcntl.LOCK_UN)
+            with contextlib.suppress(OSError):
+                return fcntl.lockf(f, fcntl.LOCK_UN)  # AOSP does not have flock()
+            return fcntl.flock(f, fcntl.LOCK_UN | fcntl.LOCK_NB)  # virtiofs needs LOCK_NB on unlocking
 
     except ImportError:
 

From ddae33754ae1f32dd9c64cf895c47d20f6b5f336 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Fri, 5 May 2023 09:41:56 +0530
Subject: [PATCH 95/97] [extractor/youporn] Extract m3u8 formats

Closes #6977
---
 yt_dlp/extractor/youporn.py | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py
index 8f1b9911b..6ee0abcae 100644
--- a/yt_dlp/extractor/youporn.py
+++ b/yt_dlp/extractor/youporn.py
@@ -6,6 +6,7 @@
     int_or_none,
     merge_dicts,
     str_to_int,
+    traverse_obj,
     unified_strdate,
     url_or_none,
 )
@@ -86,32 +87,31 @@ class YouPornIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = self._match_valid_url(url)
-        video_id = mobj.group('id')
-        display_id = mobj.group('display_id') or video_id
-
+        video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
         definitions = self._download_json(
-            'https://www.youporn.com/api/video/media_definitions/%s/' % video_id,
-            display_id)
+            f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id)
+
+        def get_format_data(data, f):
+            return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl']))
 
         formats = []
-        for definition in definitions:
-            if not isinstance(definition, dict):
-                continue
-            video_url = url_or_none(definition.get('videoUrl'))
-            if not video_url:
-                continue
-            f = {
-                'url': video_url,
-                'filesize': int_or_none(definition.get('videoSize')),
-            }
+        # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
+        for hls_url in traverse_obj(get_format_data(definitions, 'hls'), (
+                lambda _, v: not isinstance(v['defaultQuality'], bool), 'videoUrl'), (..., 'videoUrl')):
+            formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
+
+        for definition in get_format_data(definitions, 'mp4'):
+            f = traverse_obj(definition, {
+                'url': 'videoUrl',
+                'filesize': ('videoSize', {int_or_none})
+            })
             height = int_or_none(definition.get('quality'))
             # Video URL's path looks like this:
             #  /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
             #  /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
             #  /videos/201703/11/109285532/1080P_4000K_109285532.mp4
             # We will benefit from it by extracting some metadata
-            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
+            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', definition['videoUrl'])
             if mobj:
                 if not height:
                     height = int(mobj.group('height'))
@@ -179,6 +179,7 @@ def extract_tag_box(regex, title):
             'tags')
 
         data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False)
+        data.pop('url', None)
         return merge_dicts(data, {
             'id': video_id,
             'display_id': display_id,

From 0c7ce146e4d2a84e656d78f6857952bfd25ab389 Mon Sep 17 00:00:00 2001
From: "lauren n. liberda" <lauren@selfisekai.rocks>
Date: Sat, 6 May 2023 02:09:49 +0200
Subject: [PATCH 96/97] [extractor/tvp] Use new API (#6989)

Authored by: selfisekai
Closes #6987
---
 yt_dlp/extractor/tvp.py | 94 ++++++++++++++++++++++++++++++-----------
 1 file changed, 70 insertions(+), 24 deletions(-)

diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py
index f8ded2646..2aa0dd870 100644
--- a/yt_dlp/extractor/tvp.py
+++ b/yt_dlp/extractor/tvp.py
@@ -482,21 +482,34 @@ def _real_extract(self, url):
 class TVPVODBaseIE(InfoExtractor):
     _API_BASE_URL = 'https://vod.tvp.pl/api/products'
 
-    def _call_api(self, resource, video_id, **kwargs):
-        return self._download_json(
+    def _call_api(self, resource, video_id, query={}, **kwargs):
+        is_valid = lambda x: 200 <= x < 300
+        document, urlh = self._download_json_handle(
             f'{self._API_BASE_URL}/{resource}', video_id,
-            query={'lang': 'pl', 'platform': 'BROWSER'}, **kwargs)
+            query={'lang': 'pl', 'platform': 'BROWSER', **query},
+            expected_status=lambda x: is_valid(x) or 400 <= x < 500, **kwargs)
+        if is_valid(urlh.status):
+            return document
+        raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.status})')
 
-    def _parse_video(self, video):
-        return {
-            '_type': 'url',
-            'url': 'tvp:' + video['externalUid'],
-            'ie_key': TVPEmbedIE.ie_key(),
-            'title': video.get('title'),
-            'description': traverse_obj(video, ('lead', 'description')),
-            'age_limit': int_or_none(video.get('rating')),
-            'duration': int_or_none(video.get('duration')),
-        }
+    def _parse_video(self, video, with_url=True):
+        info_dict = traverse_obj(video, {
+            'id': ('id', {str_or_none}),
+            'title': 'title',
+            'age_limit': ('rating', {int_or_none}),
+            'duration': ('duration', {int_or_none}),
+            'episode_number': ('number', {int_or_none}),
+            'series': ('season', 'serial', 'title', {str_or_none}),
+            'thumbnails': ('images', ..., ..., {'url': ('url', {url_or_none})}),
+        })
+        info_dict['description'] = clean_html(dict_get(video, ('lead', 'description')))
+        if with_url:
+            info_dict.update({
+                '_type': 'url',
+                'url': video['webUrl'],
+                'ie_key': TVPVODVideoIE.ie_key(),
+            })
+        return info_dict
 
 
 class TVPVODVideoIE(TVPVODBaseIE):
@@ -506,37 +519,70 @@ class TVPVODVideoIE(TVPVODBaseIE):
     _TESTS = [{
         'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
         'info_dict': {
-            'id': '60468609',
+            'id': '311357',
             'ext': 'mp4',
-            'title': 'Laboratorium alchemika, Tusze termiczne. Jak zobaczyć niewidoczne. Odcinek 24',
+            'title': 'Tusze termiczne. Jak zobaczyć niewidoczne. Odcinek 24',
             'description': 'md5:1d4098d3e537092ccbac1abf49b7cd4c',
             'duration': 300,
             'episode_number': 24,
             'episode': 'Episode 24',
             'age_limit': 0,
             'series': 'Laboratorium alchemika',
-            'thumbnail': 're:https://.+',
+            'thumbnail': 're:https?://.+',
         },
+        'params': {'skip_download': 'm3u8'},
     }, {
         'url': 'https://vod.tvp.pl/filmy-dokumentalne,163/ukrainski-sluga-narodu,339667',
         'info_dict': {
-            'id': '51640077',
+            'id': '339667',
             'ext': 'mp4',
-            'title': 'Ukraiński sługa narodu, Ukraiński sługa narodu',
-            'series': 'Ukraiński sługa narodu',
+            'title': 'Ukraiński sługa narodu',
             'description': 'md5:b7940c0a8e439b0c81653a986f544ef3',
             'age_limit': 12,
-            'episode': 'Episode 0',
-            'episode_number': 0,
             'duration': 3051,
-            'thumbnail': 're:https://.+',
+            'thumbnail': 're:https?://.+',
+            'subtitles': 'count:2',
         },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'note': 'embed fails with "payment required"',
+        'url': 'https://vod.tvp.pl/seriale,18/polowanie-na-cmy-odcinki,390116/odcinek-7,S01E07,398869',
+        'info_dict': {
+            'id': '398869',
+            'ext': 'mp4',
+            'title': 'odc. 7',
+            'description': 'md5:dd2bb33f023dc5c2fbaddfbe4cb5dba0',
+            'duration': 2750,
+            'age_limit': 16,
+            'series': 'Polowanie na ćmy',
+            'episode_number': 7,
+            'episode': 'Episode 7',
+            'thumbnail': 're:https?://.+',
+        },
+        'params': {'skip_download': 'm3u8'},
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        return self._parse_video(self._call_api(f'vods/{video_id}', video_id))
+        info_dict = self._parse_video(self._call_api(f'vods/{video_id}', video_id), with_url=False)
+
+        playlist = self._call_api(f'{video_id}/videos/playlist', video_id, query={'videoType': 'MOVIE'})
+
+        info_dict['formats'] = []
+        for manifest_url in traverse_obj(playlist, ('sources', 'HLS', ..., 'src')):
+            info_dict['formats'].extend(self._extract_m3u8_formats(manifest_url, video_id, fatal=False))
+        for manifest_url in traverse_obj(playlist, ('sources', 'DASH', ..., 'src')):
+            info_dict['formats'].extend(self._extract_mpd_formats(manifest_url, video_id, fatal=False))
+
+        info_dict['subtitles'] = {}
+        for sub in playlist.get('subtitles') or []:
+            info_dict['subtitles'].setdefault(sub.get('language') or 'und', []).append({
+                'url': sub['url'],
+                'ext': 'ttml',
+            })
+
+        return info_dict
 
 
 class TVPVODSeriesIE(TVPVODBaseIE):
@@ -551,7 +597,7 @@ class TVPVODSeriesIE(TVPVODBaseIE):
             'age_limit': 12,
             'categories': ['seriale'],
         },
-        'playlist_count': 129,
+        'playlist_count': 130,
     }, {
         'url': 'https://vod.tvp.pl/programy,88/rolnik-szuka-zony-odcinki,284514',
         'only_matching': True,

From c449c0655d7c8549e6e1389c26b628053b253d39 Mon Sep 17 00:00:00 2001
From: Lesmiscore <nao20010128@gmail.com>
Date: Sat, 6 May 2023 18:14:40 +0900
Subject: [PATCH 97/97] [extractor/abematv] Add fallback for title and
 description extraction and extract more metadata (#6994)

Authored by: Lesmiscore
---
 yt_dlp/extractor/abematv.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
index f611c1f2c..c9166b6b8 100644
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@@ -436,6 +436,16 @@ def _real_extract(self, url):
             if 3 not in ondemand_types:
                 # cannot acquire decryption key for these streams
                 self.report_warning('This is a premium-only stream')
+            info.update(traverse_obj(api_response, {
+                'series': ('series', 'title'),
+                'season': ('season', 'title'),
+                'season_number': ('season', 'sequence'),
+                'episode_number': ('episode', 'number'),
+            }))
+            if not title:
+                title = traverse_obj(api_response, ('episode', 'title'))
+            if not description:
+                description = traverse_obj(api_response, ('episode', 'content'))
 
             m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
         elif video_type == 'slots':