diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 12ec5b0d8..63bc8713a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -468,7 +468,7 @@ jobs: - name: Install Requirements run: | python devscripts/install_deps.py -o --include build - python devscripts/install_deps.py + python devscripts/install_deps.py --include curl-cffi python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.7.0-py3-none-any.whl" - name: Prepare diff --git a/README.md b/README.md index 96ce739f8..f13562ae9 100644 --- a/README.md +++ b/README.md @@ -202,7 +202,7 @@ #### Impersonation * [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE) * Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"` - * Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds + * Currently included in `yt-dlp.exe`, `yt-dlp_x86.exe`, `yt-dlp_linux` and `yt-dlp_macos` builds ### Metadata diff --git a/bundle/docker/static/entrypoint.sh b/bundle/docker/static/entrypoint.sh index 93d84fa9b..220275974 100755 --- a/bundle/docker/static/entrypoint.sh +++ b/bundle/docker/static/entrypoint.sh @@ -2,7 +2,7 @@ set -e source ~/.local/share/pipx/venvs/pyinstaller/bin/activate -python -m devscripts.install_deps --include secretstorage +python -m devscripts.install_deps --include secretstorage --include curl-cffi python -m devscripts.make_lazy_extractors python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}" python -m bundle.pyinstaller diff --git a/pyproject.toml b/pyproject.toml index 39986a355..4561abaf4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,9 @@ dependencies = [ [project.optional-dependencies] default = [] -curl-cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"] +curl-cffi = [ + "curl-cffi>=0.5.10,!=0.6.*,<0.8; implementation_name=='cpython'", +] secretstorage = [ "cffi", "secretstorage", diff --git a/test/test_networking.py b/test/test_networking.py index af3ece3b4..983c89e2e 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -914,7 +914,6 @@ def mock_close(*args, **kwargs): class TestCurlCFFIRequestHandler(TestRequestHandlerBase): @pytest.mark.parametrize('params,extensions', [ - ({}, {'impersonate': ImpersonateTarget('chrome')}), ({'impersonate': ImpersonateTarget('chrome', '110')}, {}), ({'impersonate': ImpersonateTarget('chrome', '99')}, {'impersonate': ImpersonateTarget('chrome', '110')}), ]) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index c3505b14f..aa1dcecf6 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -1458,9 +1458,11 @@ def _real_extract(self, url): if webpage: data = self._get_sigi_state(webpage, uploader or room_id) - room_id = (traverse_obj(data, ('UserModule', 'users', ..., 'roomId', {str_or_none}), get_all=False) - or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None) - or room_id) + room_id = ( + traverse_obj(data, (( + ('LiveRoom', 'liveRoomUserInfo', 'user'), + ('UserModule', 'users', ...)), 'roomId', {str}, any)) + or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=room_id)) uploader = uploader or traverse_obj( data, ('LiveRoom', 'liveRoomUserInfo', 'user', 'uniqueId'), ('UserModule', 'users', ..., 'uniqueId'), get_all=False, expected_type=str) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1c0a70d35..dd98c34b6 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3142,7 +3142,7 @@ def _extract_n_function_name(self, jscode): def _extract_n_function_code(self, video_id, player_url): player_id = self._extract_player_info(player_url) - func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1') + func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09') jscode = func_code or self._load_player(video_id, player_url) jsi = JSInterpreter(jscode) diff --git a/yt_dlp/networking/_curlcffi.py b/yt_dlp/networking/_curlcffi.py index b1f0fb82e..45b25cefb 100644 --- a/yt_dlp/networking/_curlcffi.py +++ b/yt_dlp/networking/_curlcffi.py @@ -2,6 +2,7 @@ import io import math +import re import urllib.parse from ._helper import InstanceStoreMixin, select_proxy @@ -27,11 +28,12 @@ if curl_cffi is None: raise ImportError('curl_cffi is not installed') -curl_cffi_version = tuple(int_or_none(x, default=0) for x in curl_cffi.__version__.split('.')) -if curl_cffi_version != (0, 5, 10): +curl_cffi_version = tuple(map(int, re.split(r'[^\d]+', curl_cffi.__version__)[:3])) + +if curl_cffi_version != (0, 5, 10) and not ((0, 7, 0) <= curl_cffi_version < (0, 8, 0)): curl_cffi._yt_dlp__version = f'{curl_cffi.__version__} (unsupported)' - raise ImportError('Only curl_cffi 0.5.10 is supported') + raise ImportError('Only curl_cffi versions 0.5.10, 0.7.X are supported') import curl_cffi.requests from curl_cffi.const import CurlECode, CurlOpt @@ -110,6 +112,13 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin): _SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY) _SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h') _SUPPORTED_IMPERSONATE_TARGET_MAP = { + **({ + ImpersonateTarget('chrome', '124', 'macos', '14'): curl_cffi.requests.BrowserType.chrome124, + ImpersonateTarget('chrome', '123', 'macos', '14'): curl_cffi.requests.BrowserType.chrome123, + ImpersonateTarget('chrome', '120', 'macos', '14'): curl_cffi.requests.BrowserType.chrome120, + ImpersonateTarget('chrome', '119', 'macos', '14'): curl_cffi.requests.BrowserType.chrome119, + ImpersonateTarget('chrome', '116', 'windows', '10'): curl_cffi.requests.BrowserType.chrome116, + } if curl_cffi_version >= (0, 7, 0) else {}), ImpersonateTarget('chrome', '110', 'windows', '10'): curl_cffi.requests.BrowserType.chrome110, ImpersonateTarget('chrome', '107', 'windows', '10'): curl_cffi.requests.BrowserType.chrome107, ImpersonateTarget('chrome', '104', 'windows', '10'): curl_cffi.requests.BrowserType.chrome104, @@ -118,9 +127,15 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin): ImpersonateTarget('chrome', '99', 'windows', '10'): curl_cffi.requests.BrowserType.chrome99, ImpersonateTarget('edge', '101', 'windows', '10'): curl_cffi.requests.BrowserType.edge101, ImpersonateTarget('edge', '99', 'windows', '10'): curl_cffi.requests.BrowserType.edge99, + **({ + ImpersonateTarget('safari', '17.0', 'macos', '14'): curl_cffi.requests.BrowserType.safari17_0, + } if curl_cffi_version >= (0, 7, 0) else {}), ImpersonateTarget('safari', '15.5', 'macos', '12'): curl_cffi.requests.BrowserType.safari15_5, ImpersonateTarget('safari', '15.3', 'macos', '11'): curl_cffi.requests.BrowserType.safari15_3, ImpersonateTarget('chrome', '99', 'android', '12'): curl_cffi.requests.BrowserType.chrome99_android, + **({ + ImpersonateTarget('safari', '17.2', 'ios', '17.2'): curl_cffi.requests.BrowserType.safari17_2_ios, + } if curl_cffi_version >= (0, 7, 0) else {}), } def _create_instance(self, cookiejar=None): @@ -187,7 +202,7 @@ def _send(self, request: Request): timeout = self._calculate_timeout(request) # set CURLOPT_LOW_SPEED_LIMIT and CURLOPT_LOW_SPEED_TIME to act as a read timeout. [1] - # curl_cffi does not currently do this. [2] + # This is required only for 0.5.10 [2] # Note: CURLOPT_LOW_SPEED_TIME is in seconds, so we need to round up to the nearest second. [3] # [1] https://unix.stackexchange.com/a/305311 # [2] https://github.com/yifeikong/curl_cffi/issues/156 @@ -203,7 +218,7 @@ def _send(self, request: Request): data=request.data, verify=self.verify, max_redirects=5, - timeout=timeout, + timeout=(timeout, timeout), impersonate=self._SUPPORTED_IMPERSONATE_TARGET_MAP.get( self._get_request_target(request)), interface=self.source_address, @@ -222,7 +237,7 @@ def _send(self, request: Request): elif ( e.code == CurlECode.PROXY - or (e.code == CurlECode.RECV_ERROR and 'Received HTTP code 407 from proxy after CONNECT' in str(e)) + or (e.code == CurlECode.RECV_ERROR and 'CONNECT' in str(e)) ): raise ProxyError(cause=e) from e else: