1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 10:42:34 +01:00

[formatter] implement slicing strings as bytes (#4087)

prefixing a slice '[10:30]' with a lowercase b '[b10:30]' encodes
the string to bytes in filesystem encoding before applying the slice
This commit is contained in:
Mike Fährmann 2023-05-22 18:30:45 +02:00
parent 56b8b8cd36
commit 69865dcc05
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 66 additions and 16 deletions

View File

@ -11,14 +11,15 @@ Field names select the metadata value to use in a replacement field.
While simple names are usually enough, more complex forms like accessing values by attribute, element index, or slicing are also supported. While simple names are usually enough, more complex forms like accessing values by attribute, element index, or slicing are also supported.
| | Example | Result | | | Example | Result |
| -------------------- | ----------------- | ---------------------- | | -------------------- | ------------------- | ---------------------- |
| Name | `{title}` | `Hello World` | | Name | `{title}` | `Hello World` |
| Element Index | `{title[6]}` | `W` | | Element Index | `{title[6]}` | `W` |
| Slicing | `{title[3:8]}` | `lo Wo` | | Slicing | `{title[3:8]}` | `lo Wo` |
| Alternatives | `{empty\|title}` | `Hello World` | | Slicing (Bytes) | `{title_ja[b3:18]}` | `ロー・ワー` |
| Element Access | `{user[name]}` | `John Doe` | | Alternatives | `{empty\|title}` | `Hello World` |
| Attribute Access | `{extractor.url}` | `https://example.org/` | | Element Access | `{user[name]}` | `John Doe` |
| Attribute Access | `{extractor.url}` | `https://example.org/` |
All of these methods can be combined as needed. All of these methods can be combined as needed.
For example `{title[24]|empty|extractor.url[15:-1]}` would result in `.org`. For example `{title[24]|empty|extractor.url[15:-1]}` would result in `.org`.
@ -150,6 +151,12 @@ Format specifiers can be used for advanced formatting by using the options provi
<td><code>{foo:[1:-1]}</code></td> <td><code>{foo:[1:-1]}</code></td>
<td><code>oo&nbsp;Ba</code></td> <td><code>oo&nbsp;Ba</code></td>
</tr> </tr>
<tr>
<td><code>[b&lt;start&gt;:&lt;stop&gt;]</code></td>
<td>Same as above, but applies to the <a href="https://docs.python.org/3/library/stdtypes.html#bytes"><code>bytes()</code></a> representation of a string in <a href="https://docs.python.org/3/library/sys.html#sys.getfilesystemencoding">filesystem encoding</a></td>
<td><code>{foo_ja:[b3:-1]}</code></td>
<td><code>ー・バ</code></td>
</tr>
<tr> <tr>
<td rowspan="2"><code>L&lt;maxlen&gt;/&lt;repl&gt;/</code></td> <td rowspan="2"><code>L&lt;maxlen&gt;/&lt;repl&gt;/</code></td>
<td rowspan="2">Replaces the entire output with <code>&lt;repl&gt;</code> if its length exceeds <code>&lt;maxlen&gt;</code></td> <td rowspan="2">Replaces the entire output with <code>&lt;repl&gt;</code> if its length exceeds <code>&lt;maxlen&gt;</code></td>

View File

@ -9,6 +9,7 @@
"""String formatters""" """String formatters"""
import os import os
import sys
import time import time
import string import string
import _string import _string
@ -255,7 +256,11 @@ def parse_field_name(field_name):
func = operator.itemgetter func = operator.itemgetter
try: try:
if ":" in key: if ":" in key:
key = _slice(key) if key[0] == "b":
func = _bytesgetter
key = _slice(key[1:])
else:
key = _slice(key)
else: else:
key = key.strip("\"'") key = key.strip("\"'")
except TypeError: except TypeError:
@ -276,6 +281,14 @@ def _slice(indices):
) )
def _bytesgetter(slice, encoding=sys.getfilesystemencoding()):
def apply_slice_bytes(obj):
return obj.encode(encoding)[slice].decode(encoding, "ignore")
return apply_slice_bytes
def _build_format_func(format_spec, default): def _build_format_func(format_spec, default):
if format_spec: if format_spec:
return _FORMAT_SPECIFIERS.get( return _FORMAT_SPECIFIERS.get(
@ -295,11 +308,20 @@ def _parse_optional(format_spec, default):
def _parse_slice(format_spec, default): def _parse_slice(format_spec, default):
indices, _, format_spec = format_spec.partition("]") indices, _, format_spec = format_spec.partition("]")
slice = _slice(indices[1:])
fmt = _build_format_func(format_spec, default) fmt = _build_format_func(format_spec, default)
def apply_slice(obj): if indices[1] == "b":
return fmt(obj[slice]) slice_bytes = _bytesgetter(_slice(indices[2:]))
def apply_slice(obj):
return fmt(slice_bytes(obj))
else:
slice = _slice(indices[1:])
def apply_slice(obj):
return fmt(obj[slice])
return apply_slice return apply_slice

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2021-2022 Mike Fährmann # Copyright 2021-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@ -23,6 +23,7 @@ class TestFormatter(unittest.TestCase):
kwdict = { kwdict = {
"a": "hElLo wOrLd", "a": "hElLo wOrLd",
"b": "äöü", "b": "äöü",
"j": "げんそうきょう",
"d": {"a": "foo", "b": 0, "c": None}, "d": {"a": "foo", "b": 0, "c": None},
"l": ["a", "b", "c"], "l": ["a", "b", "c"],
"n": None, "n": None,
@ -133,7 +134,7 @@ class TestFormatter(unittest.TestCase):
self._run_test("{d['a']}", "foo") self._run_test("{d['a']}", "foo")
self._run_test('{d["a"]}', "foo") self._run_test('{d["a"]}', "foo")
def test_slicing(self): def test_slice_str(self):
v = self.kwdict["a"] v = self.kwdict["a"]
self._run_test("{a[1:10]}" , v[1:10]) self._run_test("{a[1:10]}" , v[1:10])
self._run_test("{a[-10:-1]}", v[-10:-1]) self._run_test("{a[-10:-1]}", v[-10:-1])
@ -165,6 +166,26 @@ class TestFormatter(unittest.TestCase):
self._run_test("{a:[:50:2]}", v[:50:2]) self._run_test("{a:[:50:2]}", v[:50:2])
self._run_test("{a:[::]}" , v) self._run_test("{a:[::]}" , v)
def test_slice_bytes(self):
v = self.kwdict["j"]
self._run_test("{j[b1:10]}" , v[1:3])
self._run_test("{j[b-10:-1]}", v[-3:-1])
self._run_test("{j[b5:]}" , v[2:])
self._run_test("{j[b50:]}" , v[50:])
self._run_test("{j[b:5]}" , v[:1])
self._run_test("{j[b:50]}" , v[:50])
self._run_test("{j[b:]}" , v)
self._run_test("{j[b::]}" , v)
self._run_test("{j:[b1:10]}" , v[1:3])
self._run_test("{j:[b-10:-1]}", v[-3:-1])
self._run_test("{j:[b5:]}" , v[2:])
self._run_test("{j:[b50:]}" , v[50:])
self._run_test("{j:[b:5]}" , v[:1])
self._run_test("{j:[b:50]}" , v[:50])
self._run_test("{j:[b:]}" , v)
self._run_test("{j:[b::]}" , v)
def test_maxlen(self): def test_maxlen(self):
v = self.kwdict["a"] v = self.kwdict["a"]
self._run_test("{a:L5/foo/}" , "foo") self._run_test("{a:L5/foo/}" , "foo")
@ -413,10 +434,10 @@ def noarg():
fmt4 = formatter.parse("\fM " + path + ":lengths") fmt4 = formatter.parse("\fM " + path + ":lengths")
self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name") self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name")
self.assertEqual(fmt2.format_map(self.kwdict), "89") self.assertEqual(fmt2.format_map(self.kwdict), "96")
self.assertEqual(fmt3.format_map(self.kwdict), "'Title' by Name") self.assertEqual(fmt3.format_map(self.kwdict), "'Title' by Name")
self.assertEqual(fmt4.format_map(self.kwdict), "89") self.assertEqual(fmt4.format_map(self.kwdict), "96")
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
self.assertEqual(fmt0.format_map(self.kwdict), "") self.assertEqual(fmt0.format_map(self.kwdict), "")