1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 10:42:34 +01:00

[formatter] implement slicing strings as bytes (#4087)

prefixing a slice '[10:30]' with a lowercase b '[b10:30]' encodes
the string to bytes in filesystem encoding before applying the slice
This commit is contained in:
Mike Fährmann 2023-05-22 18:30:45 +02:00
parent 56b8b8cd36
commit 69865dcc05
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 66 additions and 16 deletions

View File

@ -12,10 +12,11 @@ Field names select the metadata value to use in a replacement field.
While simple names are usually enough, more complex forms like accessing values by attribute, element index, or slicing are also supported.
| | Example | Result |
| -------------------- | ----------------- | ---------------------- |
| -------------------- | ------------------- | ---------------------- |
| Name | `{title}` | `Hello World` |
| Element Index | `{title[6]}` | `W` |
| Slicing | `{title[3:8]}` | `lo Wo` |
| Slicing (Bytes) | `{title_ja[b3:18]}` | `ロー・ワー` |
| Alternatives | `{empty\|title}` | `Hello World` |
| Element Access | `{user[name]}` | `John Doe` |
| Attribute Access | `{extractor.url}` | `https://example.org/` |
@ -150,6 +151,12 @@ Format specifiers can be used for advanced formatting by using the options provi
<td><code>{foo:[1:-1]}</code></td>
<td><code>oo&nbsp;Ba</code></td>
</tr>
<tr>
<td><code>[b&lt;start&gt;:&lt;stop&gt;]</code></td>
<td>Same as above, but applies to the <a href="https://docs.python.org/3/library/stdtypes.html#bytes"><code>bytes()</code></a> representation of a string in <a href="https://docs.python.org/3/library/sys.html#sys.getfilesystemencoding">filesystem encoding</a></td>
<td><code>{foo_ja:[b3:-1]}</code></td>
<td><code>ー・バ</code></td>
</tr>
<tr>
<td rowspan="2"><code>L&lt;maxlen&gt;/&lt;repl&gt;/</code></td>
<td rowspan="2">Replaces the entire output with <code>&lt;repl&gt;</code> if its length exceeds <code>&lt;maxlen&gt;</code></td>

View File

@ -9,6 +9,7 @@
"""String formatters"""
import os
import sys
import time
import string
import _string
@ -255,6 +256,10 @@ def parse_field_name(field_name):
func = operator.itemgetter
try:
if ":" in key:
if key[0] == "b":
func = _bytesgetter
key = _slice(key[1:])
else:
key = _slice(key)
else:
key = key.strip("\"'")
@ -276,6 +281,14 @@ def _slice(indices):
)
def _bytesgetter(slice, encoding=sys.getfilesystemencoding()):
def apply_slice_bytes(obj):
return obj.encode(encoding)[slice].decode(encoding, "ignore")
return apply_slice_bytes
def _build_format_func(format_spec, default):
if format_spec:
return _FORMAT_SPECIFIERS.get(
@ -295,11 +308,20 @@ def _parse_optional(format_spec, default):
def _parse_slice(format_spec, default):
indices, _, format_spec = format_spec.partition("]")
slice = _slice(indices[1:])
fmt = _build_format_func(format_spec, default)
if indices[1] == "b":
slice_bytes = _bytesgetter(_slice(indices[2:]))
def apply_slice(obj):
return fmt(slice_bytes(obj))
else:
slice = _slice(indices[1:])
def apply_slice(obj):
return fmt(obj[slice])
return apply_slice

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2021-2022 Mike Fährmann
# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -23,6 +23,7 @@ class TestFormatter(unittest.TestCase):
kwdict = {
"a": "hElLo wOrLd",
"b": "äöü",
"j": "げんそうきょう",
"d": {"a": "foo", "b": 0, "c": None},
"l": ["a", "b", "c"],
"n": None,
@ -133,7 +134,7 @@ class TestFormatter(unittest.TestCase):
self._run_test("{d['a']}", "foo")
self._run_test('{d["a"]}', "foo")
def test_slicing(self):
def test_slice_str(self):
v = self.kwdict["a"]
self._run_test("{a[1:10]}" , v[1:10])
self._run_test("{a[-10:-1]}", v[-10:-1])
@ -165,6 +166,26 @@ class TestFormatter(unittest.TestCase):
self._run_test("{a:[:50:2]}", v[:50:2])
self._run_test("{a:[::]}" , v)
def test_slice_bytes(self):
v = self.kwdict["j"]
self._run_test("{j[b1:10]}" , v[1:3])
self._run_test("{j[b-10:-1]}", v[-3:-1])
self._run_test("{j[b5:]}" , v[2:])
self._run_test("{j[b50:]}" , v[50:])
self._run_test("{j[b:5]}" , v[:1])
self._run_test("{j[b:50]}" , v[:50])
self._run_test("{j[b:]}" , v)
self._run_test("{j[b::]}" , v)
self._run_test("{j:[b1:10]}" , v[1:3])
self._run_test("{j:[b-10:-1]}", v[-3:-1])
self._run_test("{j:[b5:]}" , v[2:])
self._run_test("{j:[b50:]}" , v[50:])
self._run_test("{j:[b:5]}" , v[:1])
self._run_test("{j:[b:50]}" , v[:50])
self._run_test("{j:[b:]}" , v)
self._run_test("{j:[b::]}" , v)
def test_maxlen(self):
v = self.kwdict["a"]
self._run_test("{a:L5/foo/}" , "foo")
@ -413,10 +434,10 @@ def noarg():
fmt4 = formatter.parse("\fM " + path + ":lengths")
self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name")
self.assertEqual(fmt2.format_map(self.kwdict), "89")
self.assertEqual(fmt2.format_map(self.kwdict), "96")
self.assertEqual(fmt3.format_map(self.kwdict), "'Title' by Name")
self.assertEqual(fmt4.format_map(self.kwdict), "89")
self.assertEqual(fmt4.format_map(self.kwdict), "96")
with self.assertRaises(TypeError):
self.assertEqual(fmt0.format_map(self.kwdict), "")