mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 10:42:34 +01:00
[formatter] implement slicing strings as bytes (#4087)
prefixing a slice '[10:30]' with a lowercase b '[b10:30]' encodes the string to bytes in filesystem encoding before applying the slice
This commit is contained in:
parent
56b8b8cd36
commit
69865dcc05
@ -12,10 +12,11 @@ Field names select the metadata value to use in a replacement field.
|
||||
While simple names are usually enough, more complex forms like accessing values by attribute, element index, or slicing are also supported.
|
||||
|
||||
| | Example | Result |
|
||||
| -------------------- | ----------------- | ---------------------- |
|
||||
| -------------------- | ------------------- | ---------------------- |
|
||||
| Name | `{title}` | `Hello World` |
|
||||
| Element Index | `{title[6]}` | `W` |
|
||||
| Slicing | `{title[3:8]}` | `lo Wo` |
|
||||
| Slicing (Bytes) | `{title_ja[b3:18]}` | `ロー・ワー` |
|
||||
| Alternatives | `{empty\|title}` | `Hello World` |
|
||||
| Element Access | `{user[name]}` | `John Doe` |
|
||||
| Attribute Access | `{extractor.url}` | `https://example.org/` |
|
||||
@ -150,6 +151,12 @@ Format specifiers can be used for advanced formatting by using the options provi
|
||||
<td><code>{foo:[1:-1]}</code></td>
|
||||
<td><code>oo Ba</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>[b<start>:<stop>]</code></td>
|
||||
<td>Same as above, but applies to the <a href="https://docs.python.org/3/library/stdtypes.html#bytes"><code>bytes()</code></a> representation of a string in <a href="https://docs.python.org/3/library/sys.html#sys.getfilesystemencoding">filesystem encoding</a></td>
|
||||
<td><code>{foo_ja:[b3:-1]}</code></td>
|
||||
<td><code>ー・バ</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td rowspan="2"><code>L<maxlen>/<repl>/</code></td>
|
||||
<td rowspan="2">Replaces the entire output with <code><repl></code> if its length exceeds <code><maxlen></code></td>
|
||||
|
@ -9,6 +9,7 @@
|
||||
"""String formatters"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import string
|
||||
import _string
|
||||
@ -255,6 +256,10 @@ def parse_field_name(field_name):
|
||||
func = operator.itemgetter
|
||||
try:
|
||||
if ":" in key:
|
||||
if key[0] == "b":
|
||||
func = _bytesgetter
|
||||
key = _slice(key[1:])
|
||||
else:
|
||||
key = _slice(key)
|
||||
else:
|
||||
key = key.strip("\"'")
|
||||
@ -276,6 +281,14 @@ def _slice(indices):
|
||||
)
|
||||
|
||||
|
||||
def _bytesgetter(slice, encoding=sys.getfilesystemencoding()):
|
||||
|
||||
def apply_slice_bytes(obj):
|
||||
return obj.encode(encoding)[slice].decode(encoding, "ignore")
|
||||
|
||||
return apply_slice_bytes
|
||||
|
||||
|
||||
def _build_format_func(format_spec, default):
|
||||
if format_spec:
|
||||
return _FORMAT_SPECIFIERS.get(
|
||||
@ -295,11 +308,20 @@ def _parse_optional(format_spec, default):
|
||||
|
||||
def _parse_slice(format_spec, default):
|
||||
indices, _, format_spec = format_spec.partition("]")
|
||||
slice = _slice(indices[1:])
|
||||
fmt = _build_format_func(format_spec, default)
|
||||
|
||||
if indices[1] == "b":
|
||||
slice_bytes = _bytesgetter(_slice(indices[2:]))
|
||||
|
||||
def apply_slice(obj):
|
||||
return fmt(slice_bytes(obj))
|
||||
|
||||
else:
|
||||
slice = _slice(indices[1:])
|
||||
|
||||
def apply_slice(obj):
|
||||
return fmt(obj[slice])
|
||||
|
||||
return apply_slice
|
||||
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2021-2022 Mike Fährmann
|
||||
# Copyright 2021-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -23,6 +23,7 @@ class TestFormatter(unittest.TestCase):
|
||||
kwdict = {
|
||||
"a": "hElLo wOrLd",
|
||||
"b": "äöü",
|
||||
"j": "げんそうきょう",
|
||||
"d": {"a": "foo", "b": 0, "c": None},
|
||||
"l": ["a", "b", "c"],
|
||||
"n": None,
|
||||
@ -133,7 +134,7 @@ class TestFormatter(unittest.TestCase):
|
||||
self._run_test("{d['a']}", "foo")
|
||||
self._run_test('{d["a"]}', "foo")
|
||||
|
||||
def test_slicing(self):
|
||||
def test_slice_str(self):
|
||||
v = self.kwdict["a"]
|
||||
self._run_test("{a[1:10]}" , v[1:10])
|
||||
self._run_test("{a[-10:-1]}", v[-10:-1])
|
||||
@ -165,6 +166,26 @@ class TestFormatter(unittest.TestCase):
|
||||
self._run_test("{a:[:50:2]}", v[:50:2])
|
||||
self._run_test("{a:[::]}" , v)
|
||||
|
||||
def test_slice_bytes(self):
|
||||
v = self.kwdict["j"]
|
||||
self._run_test("{j[b1:10]}" , v[1:3])
|
||||
self._run_test("{j[b-10:-1]}", v[-3:-1])
|
||||
self._run_test("{j[b5:]}" , v[2:])
|
||||
self._run_test("{j[b50:]}" , v[50:])
|
||||
self._run_test("{j[b:5]}" , v[:1])
|
||||
self._run_test("{j[b:50]}" , v[:50])
|
||||
self._run_test("{j[b:]}" , v)
|
||||
self._run_test("{j[b::]}" , v)
|
||||
|
||||
self._run_test("{j:[b1:10]}" , v[1:3])
|
||||
self._run_test("{j:[b-10:-1]}", v[-3:-1])
|
||||
self._run_test("{j:[b5:]}" , v[2:])
|
||||
self._run_test("{j:[b50:]}" , v[50:])
|
||||
self._run_test("{j:[b:5]}" , v[:1])
|
||||
self._run_test("{j:[b:50]}" , v[:50])
|
||||
self._run_test("{j:[b:]}" , v)
|
||||
self._run_test("{j:[b::]}" , v)
|
||||
|
||||
def test_maxlen(self):
|
||||
v = self.kwdict["a"]
|
||||
self._run_test("{a:L5/foo/}" , "foo")
|
||||
@ -413,10 +434,10 @@ def noarg():
|
||||
fmt4 = formatter.parse("\fM " + path + ":lengths")
|
||||
|
||||
self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name")
|
||||
self.assertEqual(fmt2.format_map(self.kwdict), "89")
|
||||
self.assertEqual(fmt2.format_map(self.kwdict), "96")
|
||||
|
||||
self.assertEqual(fmt3.format_map(self.kwdict), "'Title' by Name")
|
||||
self.assertEqual(fmt4.format_map(self.kwdict), "89")
|
||||
self.assertEqual(fmt4.format_map(self.kwdict), "96")
|
||||
|
||||
with self.assertRaises(TypeError):
|
||||
self.assertEqual(fmt0.format_map(self.kwdict), "")
|
||||
|
Loading…
Reference in New Issue
Block a user