mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 10:42:34 +01:00
[formatter] implement slicing strings as bytes (#4087)
prefixing a slice '[10:30]' with a lowercase b '[b10:30]' encodes the string to bytes in filesystem encoding before applying the slice
This commit is contained in:
parent
56b8b8cd36
commit
69865dcc05
@ -11,14 +11,15 @@ Field names select the metadata value to use in a replacement field.
|
|||||||
|
|
||||||
While simple names are usually enough, more complex forms like accessing values by attribute, element index, or slicing are also supported.
|
While simple names are usually enough, more complex forms like accessing values by attribute, element index, or slicing are also supported.
|
||||||
|
|
||||||
| | Example | Result |
|
| | Example | Result |
|
||||||
| -------------------- | ----------------- | ---------------------- |
|
| -------------------- | ------------------- | ---------------------- |
|
||||||
| Name | `{title}` | `Hello World` |
|
| Name | `{title}` | `Hello World` |
|
||||||
| Element Index | `{title[6]}` | `W` |
|
| Element Index | `{title[6]}` | `W` |
|
||||||
| Slicing | `{title[3:8]}` | `lo Wo` |
|
| Slicing | `{title[3:8]}` | `lo Wo` |
|
||||||
| Alternatives | `{empty\|title}` | `Hello World` |
|
| Slicing (Bytes) | `{title_ja[b3:18]}` | `ロー・ワー` |
|
||||||
| Element Access | `{user[name]}` | `John Doe` |
|
| Alternatives | `{empty\|title}` | `Hello World` |
|
||||||
| Attribute Access | `{extractor.url}` | `https://example.org/` |
|
| Element Access | `{user[name]}` | `John Doe` |
|
||||||
|
| Attribute Access | `{extractor.url}` | `https://example.org/` |
|
||||||
|
|
||||||
All of these methods can be combined as needed.
|
All of these methods can be combined as needed.
|
||||||
For example `{title[24]|empty|extractor.url[15:-1]}` would result in `.org`.
|
For example `{title[24]|empty|extractor.url[15:-1]}` would result in `.org`.
|
||||||
@ -150,6 +151,12 @@ Format specifiers can be used for advanced formatting by using the options provi
|
|||||||
<td><code>{foo:[1:-1]}</code></td>
|
<td><code>{foo:[1:-1]}</code></td>
|
||||||
<td><code>oo Ba</code></td>
|
<td><code>oo Ba</code></td>
|
||||||
</tr>
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><code>[b<start>:<stop>]</code></td>
|
||||||
|
<td>Same as above, but applies to the <a href="https://docs.python.org/3/library/stdtypes.html#bytes"><code>bytes()</code></a> representation of a string in <a href="https://docs.python.org/3/library/sys.html#sys.getfilesystemencoding">filesystem encoding</a></td>
|
||||||
|
<td><code>{foo_ja:[b3:-1]}</code></td>
|
||||||
|
<td><code>ー・バ</code></td>
|
||||||
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td rowspan="2"><code>L<maxlen>/<repl>/</code></td>
|
<td rowspan="2"><code>L<maxlen>/<repl>/</code></td>
|
||||||
<td rowspan="2">Replaces the entire output with <code><repl></code> if its length exceeds <code><maxlen></code></td>
|
<td rowspan="2">Replaces the entire output with <code><repl></code> if its length exceeds <code><maxlen></code></td>
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
"""String formatters"""
|
"""String formatters"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import time
|
import time
|
||||||
import string
|
import string
|
||||||
import _string
|
import _string
|
||||||
@ -255,7 +256,11 @@ def parse_field_name(field_name):
|
|||||||
func = operator.itemgetter
|
func = operator.itemgetter
|
||||||
try:
|
try:
|
||||||
if ":" in key:
|
if ":" in key:
|
||||||
key = _slice(key)
|
if key[0] == "b":
|
||||||
|
func = _bytesgetter
|
||||||
|
key = _slice(key[1:])
|
||||||
|
else:
|
||||||
|
key = _slice(key)
|
||||||
else:
|
else:
|
||||||
key = key.strip("\"'")
|
key = key.strip("\"'")
|
||||||
except TypeError:
|
except TypeError:
|
||||||
@ -276,6 +281,14 @@ def _slice(indices):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _bytesgetter(slice, encoding=sys.getfilesystemencoding()):
|
||||||
|
|
||||||
|
def apply_slice_bytes(obj):
|
||||||
|
return obj.encode(encoding)[slice].decode(encoding, "ignore")
|
||||||
|
|
||||||
|
return apply_slice_bytes
|
||||||
|
|
||||||
|
|
||||||
def _build_format_func(format_spec, default):
|
def _build_format_func(format_spec, default):
|
||||||
if format_spec:
|
if format_spec:
|
||||||
return _FORMAT_SPECIFIERS.get(
|
return _FORMAT_SPECIFIERS.get(
|
||||||
@ -295,11 +308,20 @@ def _parse_optional(format_spec, default):
|
|||||||
|
|
||||||
def _parse_slice(format_spec, default):
|
def _parse_slice(format_spec, default):
|
||||||
indices, _, format_spec = format_spec.partition("]")
|
indices, _, format_spec = format_spec.partition("]")
|
||||||
slice = _slice(indices[1:])
|
|
||||||
fmt = _build_format_func(format_spec, default)
|
fmt = _build_format_func(format_spec, default)
|
||||||
|
|
||||||
def apply_slice(obj):
|
if indices[1] == "b":
|
||||||
return fmt(obj[slice])
|
slice_bytes = _bytesgetter(_slice(indices[2:]))
|
||||||
|
|
||||||
|
def apply_slice(obj):
|
||||||
|
return fmt(slice_bytes(obj))
|
||||||
|
|
||||||
|
else:
|
||||||
|
slice = _slice(indices[1:])
|
||||||
|
|
||||||
|
def apply_slice(obj):
|
||||||
|
return fmt(obj[slice])
|
||||||
|
|
||||||
return apply_slice
|
return apply_slice
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2021-2022 Mike Fährmann
|
# Copyright 2021-2023 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@ -23,6 +23,7 @@ class TestFormatter(unittest.TestCase):
|
|||||||
kwdict = {
|
kwdict = {
|
||||||
"a": "hElLo wOrLd",
|
"a": "hElLo wOrLd",
|
||||||
"b": "äöü",
|
"b": "äöü",
|
||||||
|
"j": "げんそうきょう",
|
||||||
"d": {"a": "foo", "b": 0, "c": None},
|
"d": {"a": "foo", "b": 0, "c": None},
|
||||||
"l": ["a", "b", "c"],
|
"l": ["a", "b", "c"],
|
||||||
"n": None,
|
"n": None,
|
||||||
@ -133,7 +134,7 @@ class TestFormatter(unittest.TestCase):
|
|||||||
self._run_test("{d['a']}", "foo")
|
self._run_test("{d['a']}", "foo")
|
||||||
self._run_test('{d["a"]}', "foo")
|
self._run_test('{d["a"]}', "foo")
|
||||||
|
|
||||||
def test_slicing(self):
|
def test_slice_str(self):
|
||||||
v = self.kwdict["a"]
|
v = self.kwdict["a"]
|
||||||
self._run_test("{a[1:10]}" , v[1:10])
|
self._run_test("{a[1:10]}" , v[1:10])
|
||||||
self._run_test("{a[-10:-1]}", v[-10:-1])
|
self._run_test("{a[-10:-1]}", v[-10:-1])
|
||||||
@ -165,6 +166,26 @@ class TestFormatter(unittest.TestCase):
|
|||||||
self._run_test("{a:[:50:2]}", v[:50:2])
|
self._run_test("{a:[:50:2]}", v[:50:2])
|
||||||
self._run_test("{a:[::]}" , v)
|
self._run_test("{a:[::]}" , v)
|
||||||
|
|
||||||
|
def test_slice_bytes(self):
|
||||||
|
v = self.kwdict["j"]
|
||||||
|
self._run_test("{j[b1:10]}" , v[1:3])
|
||||||
|
self._run_test("{j[b-10:-1]}", v[-3:-1])
|
||||||
|
self._run_test("{j[b5:]}" , v[2:])
|
||||||
|
self._run_test("{j[b50:]}" , v[50:])
|
||||||
|
self._run_test("{j[b:5]}" , v[:1])
|
||||||
|
self._run_test("{j[b:50]}" , v[:50])
|
||||||
|
self._run_test("{j[b:]}" , v)
|
||||||
|
self._run_test("{j[b::]}" , v)
|
||||||
|
|
||||||
|
self._run_test("{j:[b1:10]}" , v[1:3])
|
||||||
|
self._run_test("{j:[b-10:-1]}", v[-3:-1])
|
||||||
|
self._run_test("{j:[b5:]}" , v[2:])
|
||||||
|
self._run_test("{j:[b50:]}" , v[50:])
|
||||||
|
self._run_test("{j:[b:5]}" , v[:1])
|
||||||
|
self._run_test("{j:[b:50]}" , v[:50])
|
||||||
|
self._run_test("{j:[b:]}" , v)
|
||||||
|
self._run_test("{j:[b::]}" , v)
|
||||||
|
|
||||||
def test_maxlen(self):
|
def test_maxlen(self):
|
||||||
v = self.kwdict["a"]
|
v = self.kwdict["a"]
|
||||||
self._run_test("{a:L5/foo/}" , "foo")
|
self._run_test("{a:L5/foo/}" , "foo")
|
||||||
@ -413,10 +434,10 @@ def noarg():
|
|||||||
fmt4 = formatter.parse("\fM " + path + ":lengths")
|
fmt4 = formatter.parse("\fM " + path + ":lengths")
|
||||||
|
|
||||||
self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name")
|
self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name")
|
||||||
self.assertEqual(fmt2.format_map(self.kwdict), "89")
|
self.assertEqual(fmt2.format_map(self.kwdict), "96")
|
||||||
|
|
||||||
self.assertEqual(fmt3.format_map(self.kwdict), "'Title' by Name")
|
self.assertEqual(fmt3.format_map(self.kwdict), "'Title' by Name")
|
||||||
self.assertEqual(fmt4.format_map(self.kwdict), "89")
|
self.assertEqual(fmt4.format_map(self.kwdict), "96")
|
||||||
|
|
||||||
with self.assertRaises(TypeError):
|
with self.assertRaises(TypeError):
|
||||||
self.assertEqual(fmt0.format_map(self.kwdict), "")
|
self.assertEqual(fmt0.format_map(self.kwdict), "")
|
||||||
|
Loading…
Reference in New Issue
Block a user