')
uploader_name = extract_attributes(uploader).get('aria-label')
- video_json = try_get(data_json, lambda x: x[serverstate]['exportData']['video'], dict)
- stream_urls = try_get(video_json, lambda x: x['video']['streams'])
+ item_id = traverse_obj(data_json, (serverstate, 'videoViewer', 'openedItemId', {str}))
+ video_json = traverse_obj(data_json, (serverstate, 'videoViewer', 'items', item_id, {dict})) or {}
+
formats, subtitles = [], {}
- for s_url in stream_urls:
+ for s_url in traverse_obj(video_json, ('video', 'streams', ..., {url_or_none})):
ext = determine_ext(s_url)
if ext == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash')
diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py
index 6ee0abcae..6d4e31bf3 100644
--- a/yt_dlp/extractor/youporn.py
+++ b/yt_dlp/extractor/youporn.py
@@ -72,15 +72,15 @@ class YouPornIE(InfoExtractor):
'id': '16290308',
'age_limit': 18,
'categories': [],
- 'description': 'md5:00ea70f642f431c379763c17c2f396bc',
+ 'description': str, # TODO: detect/remove SEO spam description in ytdl backport
'display_id': 'tinderspecial-trailer1',
'duration': 298.0,
'ext': 'mp4',
'upload_date': '20201123',
'uploader': 'Ersties',
'tags': [],
- 'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg',
- 'timestamp': 1606089600,
+ 'thumbnail': r're:https://.+\.jpg',
+ 'timestamp': 1606147564,
'title': 'Tinder In Real Life',
'view_count': int,
}
@@ -88,11 +88,17 @@ class YouPornIE(InfoExtractor):
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
- definitions = self._download_json(
- f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id)
+ self._set_cookie('.youporn.com', 'age_verified', '1')
+ webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id)
+ definitions = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)['mediaDefinitions']
- def get_format_data(data, f):
- return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl']))
+ def get_format_data(data, stream_type):
+ info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any))
+ if not info_url:
+ return []
+ return traverse_obj(
+ self._download_json(info_url, video_id, f'Downloading {stream_type} info JSON', fatal=False),
+ lambda _, v: v['format'] == stream_type and url_or_none(v['videoUrl']))
formats = []
# Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
@@ -123,10 +129,6 @@ def get_format_data(data, f):
f['height'] = height
formats.append(f)
- webpage = self._download_webpage(
- 'http://www.youporn.com/watch/%s' % video_id, display_id,
- headers={'Cookie': 'age_verified=1'})
-
title = self._html_search_regex(
r'(?s)]+class=["\']watchVideoTitle[^>]+>(.+?)
',
webpage, 'title', default=None) or self._og_search_title(
diff --git a/yt_dlp/networking/_curlcffi.py b/yt_dlp/networking/_curlcffi.py
index 39d1f70fb..10751a105 100644
--- a/yt_dlp/networking/_curlcffi.py
+++ b/yt_dlp/networking/_curlcffi.py
@@ -132,6 +132,16 @@ def _check_extensions(self, extensions):
extensions.pop('cookiejar', None)
extensions.pop('timeout', None)
+ def send(self, request: Request) -> Response:
+ target = self._get_request_target(request)
+ try:
+ response = super().send(request)
+ except HTTPError as e:
+ e.response.extensions['impersonate'] = target
+ raise
+ response.extensions['impersonate'] = target
+ return response
+
def _send(self, request: Request):
max_redirects_exceeded = False
session: curl_cffi.requests.Session = self._get_instance(
diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py
index 4c66ba66a..a2217034c 100644
--- a/yt_dlp/networking/common.py
+++ b/yt_dlp/networking/common.py
@@ -497,6 +497,7 @@ class Response(io.IOBase):
@param headers: response headers.
@param status: Response HTTP status code. Default is 200 OK.
@param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
+ @param extensions: Dictionary of handler-specific response extensions.
"""
def __init__(
@@ -505,7 +506,9 @@ def __init__(
url: str,
headers: Mapping[str, str],
status: int = 200,
- reason: str = None):
+ reason: str = None,
+ extensions: dict = None
+ ):
self.fp = fp
self.headers = Message()
@@ -517,6 +520,7 @@ def __init__(
self.reason = reason or HTTPStatus(status).phrase
except ValueError:
self.reason = None
+ self.extensions = extensions or {}
def readable(self):
return self.fp.readable()
diff --git a/yt_dlp/update.py b/yt_dlp/update.py
index f47cbc5b2..ca70f69a7 100644
--- a/yt_dlp/update.py
+++ b/yt_dlp/update.py
@@ -69,6 +69,10 @@ def _get_variant_and_executable_path():
# Ref: https://en.wikipedia.org/wiki/Uname#Examples
if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'):
machine = '_x86' if platform.architecture()[0][:2] == '32' else ''
+ # sys.executable returns a /tmp/ path for staticx builds (linux_static)
+ # Ref: https://staticx.readthedocs.io/en/latest/usage.html#run-time-information
+ if static_exe_path := os.getenv('STATICX_PROG_PATH'):
+ path = static_exe_path
return f'{remove_end(sys.platform, "32")}{machine}_exe', path
path = os.path.dirname(__file__)
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 262788c4b..01d54b846 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -1638,16 +1638,14 @@ def get_filesystem_encoding():
return encoding if encoding is not None else 'utf-8'
-_WINDOWS_QUOTE_TRANS = str.maketrans({'"': '\\"', '\\': '\\\\'})
+_WINDOWS_QUOTE_TRANS = str.maketrans({'"': R'\"'})
_CMD_QUOTE_TRANS = str.maketrans({
# Keep quotes balanced by replacing them with `""` instead of `\\"`
'"': '""',
- # Requires a variable `=` containing `"^\n\n"` (set in `utils.Popen`)
+ # These require an env-variable `=` containing `"^\n\n"` (set in `utils.Popen`)
# `=` should be unique since variables containing `=` cannot be set using cmd
'\n': '%=%',
- # While we are only required to escape backslashes immediately before quotes,
- # we instead escape all of 'em anyways to be consistent
- '\\': '\\\\',
+ '\r': '%=%',
# Use zero length variable replacement so `%` doesn't get expanded
# `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
'%': '%%cd:~,%',
@@ -1656,19 +1654,14 @@ def get_filesystem_encoding():
def shell_quote(args, *, shell=False):
args = list(variadic(args))
- if any(isinstance(item, bytes) for item in args):
- deprecation_warning('Passing bytes to utils.shell_quote is deprecated')
- encoding = get_filesystem_encoding()
- for index, item in enumerate(args):
- if isinstance(item, bytes):
- args[index] = item.decode(encoding)
if compat_os_name != 'nt':
return shlex.join(args)
trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
return ' '.join(
- s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII) else s.translate(trans).join('""')
+ s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII)
+ else re.sub(r'(\\+)("|$)', r'\1\1\2', s).translate(trans).join('""')
for s in args)