mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-02 17:22:31 +01:00
Merge pull request #6463 from jaimeMF/format_spec_fix_dashes
[YoutubeDL] format spec: correctly handle dashes and other unused operators
This commit is contained in:
commit
47f53ad958
@ -105,6 +105,7 @@ def test_prefer_free_formats(self):
|
|||||||
def test_format_selection(self):
|
def test_format_selection(self):
|
||||||
formats = [
|
formats = [
|
||||||
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
|
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
|
||||||
|
{'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL},
|
||||||
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
|
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
|
||||||
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
|
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
|
||||||
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
|
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
|
||||||
@ -136,6 +137,11 @@ def test_format_selection(self):
|
|||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['format_id'], '35')
|
self.assertEqual(downloaded['format_id'], '35')
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'example-with-dashes'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'example-with-dashes')
|
||||||
|
|
||||||
def test_format_selection_audio(self):
|
def test_format_selection_audio(self):
|
||||||
formats = [
|
formats = [
|
||||||
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
|
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
|
||||||
|
@ -933,6 +933,37 @@ def _parse_filter(tokens):
|
|||||||
else:
|
else:
|
||||||
filter_parts.append(string)
|
filter_parts.append(string)
|
||||||
|
|
||||||
|
def _remove_unused_ops(tokens):
|
||||||
|
# Remove operators that we don't use and join them with the sourrounding strings
|
||||||
|
# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
|
||||||
|
ALLOWED_OPS = ('/', '+', ',', '(', ')')
|
||||||
|
last_string, last_start, last_end, last_line = None, None, None, None
|
||||||
|
for type, string, start, end, line in tokens:
|
||||||
|
if type == tokenize.OP and string == '[':
|
||||||
|
if last_string:
|
||||||
|
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
||||||
|
last_string = None
|
||||||
|
yield type, string, start, end, line
|
||||||
|
# everything inside brackets will be handled by _parse_filter
|
||||||
|
for type, string, start, end, line in tokens:
|
||||||
|
yield type, string, start, end, line
|
||||||
|
if type == tokenize.OP and string == ']':
|
||||||
|
break
|
||||||
|
elif type == tokenize.OP and string in ALLOWED_OPS:
|
||||||
|
if last_string:
|
||||||
|
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
||||||
|
last_string = None
|
||||||
|
yield type, string, start, end, line
|
||||||
|
elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
|
||||||
|
if not last_string:
|
||||||
|
last_string = string
|
||||||
|
last_start = start
|
||||||
|
last_end = end
|
||||||
|
else:
|
||||||
|
last_string += string
|
||||||
|
if last_string:
|
||||||
|
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
||||||
|
|
||||||
def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
|
def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
|
||||||
selectors = []
|
selectors = []
|
||||||
current_selector = None
|
current_selector = None
|
||||||
@ -1111,7 +1142,7 @@ def final_selector(formats):
|
|||||||
|
|
||||||
stream = io.BytesIO(format_spec.encode('utf-8'))
|
stream = io.BytesIO(format_spec.encode('utf-8'))
|
||||||
try:
|
try:
|
||||||
tokens = list(compat_tokenize_tokenize(stream.readline))
|
tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
|
||||||
except tokenize.TokenError:
|
except tokenize.TokenError:
|
||||||
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
|
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user