From 2d5cae9636714ff922d28c548c349d5f2b48f317 Mon Sep 17 00:00:00 2001 From: D0LLYNH0 <67797325+D0LLYNH0@users.noreply.github.com> Date: Thu, 9 Mar 2023 04:18:14 -0300 Subject: [PATCH 01/13] [extractor/iq] Set more language codes (#6476) Authored by: D0LLYNH0 --- yt_dlp/extractor/iqiyi.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index 4443b1991..ebf49e835 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -440,12 +440,14 @@ class IqIE(InfoExtractor): '1': 'zh_CN', '2': 'zh_TW', '3': 'en', - '4': 'kor', + '4': 'ko', + '5': 'ja', '18': 'th', '21': 'my', '23': 'vi', '24': 'id', '26': 'es', + '27': 'pt', '28': 'ar', } From 3588be59cee429a0ab5c4ceb2f162298bb44147d Mon Sep 17 00:00:00 2001 From: Daniel Vogt Date: Thu, 9 Mar 2023 17:21:39 +0100 Subject: [PATCH 02/13] [extractor/opencast] Add ltitools to `_VALID_URL` (#6371) Authored by: C0D3D3V --- yt_dlp/extractor/opencast.py | 37 +++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/yt_dlp/extractor/opencast.py b/yt_dlp/extractor/opencast.py index fa46757f7..235ca341c 100644 --- a/yt_dlp/extractor/opencast.py +++ b/yt_dlp/extractor/opencast.py @@ -105,10 +105,9 @@ def _parse_mediapackage(self, video): class OpencastIE(OpencastBaseIE): - _VALID_URL = r'''(?x) - https?://(?P%s)/paella/ui/watch.html\?.*? - id=(?P%s) - ''' % (OpencastBaseIE._INSTANCES_RE, OpencastBaseIE._UUID_RE) + _VALID_URL = rf'''(?x) + https?://(?P{OpencastBaseIE._INSTANCES_RE})/paella/ui/watch\.html\? + (?:[^#]+&)?id=(?P{OpencastBaseIE._UUID_RE})''' _API_BASE = 'https://%s/search/episode.json?id=%s' @@ -123,6 +122,9 @@ class OpencastIE(OpencastBaseIE): 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1606208400, 'upload_date': '20201124', + 'season_id': 'cf68a4a1-36b1-4a53-a6ba-61af5705a0d0', + 'series': 'Kryptographie - WiSe 15/16', + 'creator': 'Alexander May', }, } ] @@ -134,10 +136,11 @@ def _real_extract(self, url): class OpencastPlaylistIE(OpencastBaseIE): - _VALID_URL = r'''(?x) - https?://(?P%s)/engage/ui/index.html\?.*? - epFrom=(?P%s) - ''' % (OpencastBaseIE._INSTANCES_RE, OpencastBaseIE._UUID_RE) + _VALID_URL = rf'''(?x) + https?://(?P{OpencastBaseIE._INSTANCES_RE})(?: + /engage/ui/index\.html\?(?:[^#]+&)?epFrom=| + /ltitools/index\.html\?(?:[^#]+&)?series= + )(?P{OpencastBaseIE._UUID_RE})''' _API_BASE = 'https://%s/search/episode.json?sid=%s' @@ -148,15 +151,23 @@ class OpencastPlaylistIE(OpencastBaseIE): 'id': 'cf68a4a1-36b1-4a53-a6ba-61af5705a0d0', 'title': 'Kryptographie - WiSe 15/16', }, - 'playlist_mincount': 28, + 'playlist_mincount': 29, }, { - 'url': 'https://oc-video.ruhr-uni-bochum.de/engage/ui/index.html?e=1&p=1&epFrom=b1a54262-3684-403f-9731-8e77c3766f9a', + 'url': 'https://oc-video1.ruhr-uni-bochum.de/ltitools/index.html?subtool=series&series=cf68a4a1-36b1-4a53-a6ba-61af5705a0d0&lng=de', 'info_dict': { - 'id': 'b1a54262-3684-403f-9731-8e77c3766f9a', - 'title': 'inSTUDIES-Social movements and prefigurative politics in a global perspective', + 'id': 'cf68a4a1-36b1-4a53-a6ba-61af5705a0d0', + 'title': 'Kryptographie - WiSe 15/16', }, - 'playlist_mincount': 6, + 'playlist_mincount': 29, + }, + { + 'url': 'https://electures.uni-muenster.de/engage/ui/index.html?e=1&p=1&epFrom=39391d10-a711-4d23-b21d-afd2ed7d758c', + 'info_dict': { + 'id': '39391d10-a711-4d23-b21d-afd2ed7d758c', + 'title': '021670 Theologische Themen bei Hans Blumenberg WiSe 2017/18', + }, + 'playlist_mincount': 13, }, ] From 66aeaac9aa30b5959069ba84e53a5508232deb38 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 9 Mar 2023 21:57:44 +0530 Subject: [PATCH 03/13] [downloader/curl] Fix progress reporting Bug in 8c53322cda75394a8d551dde20b2529ee5ad6e89 Closes #6490 --- yt_dlp/downloader/external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 5f54017a8..ee130c827 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -176,7 +176,7 @@ def _call_downloader(self, tmpfilename, info_dict): return 0 def _call_process(self, cmd, info_dict): - return Popen.run(cmd, text=True, stderr=subprocess.PIPE) + return Popen.run(cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None) class CurlFD(ExternalFD): From c9abebb851e6188cb34b9eb744c1863dd46af919 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 9 Mar 2023 22:09:23 +0530 Subject: [PATCH 04/13] [extractor/youtube] Bypass throttling for `-f17` and related cleanup Thanks @AudricV for the finding --- yt_dlp/extractor/youtube.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index acd4077f4..6e6abd65b 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3745,13 +3745,11 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l if mime_mobj: dct['ext'] = mimetype2ext(mime_mobj.group(1)) dct.update(parse_codecs(mime_mobj.group(2))) - no_audio = dct.get('acodec') == 'none' - no_video = dct.get('vcodec') == 'none' - if no_audio: - dct['vbr'] = tbr - if no_video: - dct['abr'] = tbr - if no_audio or no_video: + + single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec')) + if single_stream and dct.get('ext'): + dct['container'] = dct['ext'] + '_dash' + if single_stream or itag == '17': CHUNK_SIZE = 10 << 20 dct.update({ 'protocol': 'http_dash_segments', @@ -3760,13 +3758,10 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, dct["filesize"])}' }) } for range_start in range(0, dct['filesize'], CHUNK_SIZE)] - } if dct['filesize'] else { - 'downloader_options': {'http_chunk_size': CHUNK_SIZE} # No longer useful? + } if itag != '17' and dct['filesize'] else { + 'downloader_options': {'http_chunk_size': CHUNK_SIZE} }) - if dct.get('ext'): - dct['container'] = dct['ext'] + '_dash' - if itag: itags[itag].add(('https', dct.get('language'))) stream_ids.append(stream_id) From 0551511b45f7847f40e4314aa9e624e80d086539 Mon Sep 17 00:00:00 2001 From: Elyse <26639800+elyse0@users.noreply.github.com> Date: Fri, 10 Mar 2023 01:12:38 -0600 Subject: [PATCH 05/13] [extractor/twitch] Fix `is_live` (#6500) Closes #6494 Authored by: elyse0 --- yt_dlp/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index efc7db2c9..6321297bb 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -456,7 +456,7 @@ def _extract_info_gql(self, info, item_id): thumbnail = url_or_none(info.get('previewThumbnailURL')) is_live = None if thumbnail: - if thumbnail.endswith('/404_processing_{width}x{height}.png'): + if re.findall(r'/404_processing_[^.?#]+\.png', thumbnail): is_live, thumbnail = True, None else: is_live = False From 871c907454693940cb56906ed9ea49fcb7154829 Mon Sep 17 00:00:00 2001 From: makeworld <25111343+makeworld-the-better-one@users.noreply.github.com> Date: Fri, 10 Mar 2023 02:53:19 -0500 Subject: [PATCH 06/13] [extractor/cbc:gem] Update `_VALID_URL` (#6499) Authored by: makeworld-the-better-one Closes #6395 --- yt_dlp/extractor/cbc.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 210f5f8ee..eadb3f8c0 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -202,7 +202,7 @@ def _real_extract(self, url): class CBCGemIE(InfoExtractor): IE_NAME = 'gem.cbc.ca' - _VALID_URL = r'https?://gem\.cbc\.ca/media/(?P[0-9a-z-]+/s[0-9]+[a-z][0-9]+)' + _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P[0-9a-z-]+/s[0-9]+[a-z][0-9]+)' _TESTS = [{ # This is a normal, public, TV show video 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01', @@ -245,6 +245,9 @@ class CBCGemIE(InfoExtractor): }, 'params': {'format': 'bv'}, 'skip': 'Geo-restricted to Canada', + }, { + 'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01', + 'only_matching': True, }] _GEO_COUNTRIES = ['CA'] From ab1de9cb1e39cf421c2b7dc6756c6ff1955bb313 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 10 Mar 2023 14:12:08 +0530 Subject: [PATCH 07/13] Support loading info.json with a list at it's root --- yt_dlp/YoutubeDL.py | 23 ++++++++++++----------- yt_dlp/__init__.py | 2 ++ 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index f701738c9..a7dced8e8 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3376,18 +3376,19 @@ def download_with_info_file(self, info_filename): [info_filename], mode='r', openhook=fileinput.hook_encoded('utf-8'))) as f: # FileInput doesn't have a read method, we can't call json.load - info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True)) - try: - self.__download_wrapper(self.process_ie_result)(info, download=True) - except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: - if not isinstance(e, EntryNotInPlaylist): - self.to_stderr('\r') - webpage_url = info.get('webpage_url') - if webpage_url is not None: + infos = [self.sanitize_info(info, self.params.get('clean_infojson', True)) + for info in variadic(json.loads('\n'.join(f)))] + for info in infos: + try: + self.__download_wrapper(self.process_ie_result)(info, download=True) + except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: + if not isinstance(e, EntryNotInPlaylist): + self.to_stderr('\r') + webpage_url = info.get('webpage_url') + if webpage_url is None: + raise self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}') - return self.download([webpage_url]) - else: - raise + self.download([webpage_url]) return self._download_retcode @staticmethod diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 9ef31601c..bdac1212c 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -952,6 +952,8 @@ def _real_main(argv=None): parser.destroy() try: if opts.load_info_filename is not None: + if all_urls: + ydl.report_warning('URLs are ignored due to --load-info-json') return ydl.download_with_info_file(expand_path(opts.load_info_filename)) else: return ydl.download(all_urls) From e6ab678e36c40ded0aae305bbb866cdab554d417 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 10 Mar 2023 17:27:43 +0530 Subject: [PATCH 08/13] [extractor/hidive] Fix login Fixes https://github.com/yt-dlp/yt-dlp/issues/6493#issuecomment-1462906556 --- yt_dlp/extractor/hidive.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index 8a8749859..df6868df6 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -47,15 +47,16 @@ def _perform_login(self, username, password): login_webpage = self._download_webpage( self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data)) # If the user has multiple profiles on their account, select one. For now pick the first profile. - profile_id = self._search_regex(r'