From eb33e6cf2dd17e6aef0e257bd7d15dacfd88a93c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 4 Nov 2022 21:37:36 +0100 Subject: [PATCH] add 'text.extr()' a stripped-down version of text.extract() that - always returns a string (like 'extract_from') - only returns a string - does not deal with 'pos' arguments - is ~20% faster --- gallery_dl/text.py | 9 +++++++++ test/test_text.py | 13 +++++++++++++ 2 files changed, 22 insertions(+) diff --git a/gallery_dl/text.py b/gallery_dl/text.py index 79cf016e..cbba063d 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -120,6 +120,15 @@ def extract(txt, begin, end, pos=0): return None, pos +def extr(txt, begin, end): + """Stripped-down version of 'extract()'""" + try: + first = txt.index(begin) + len(begin) + return txt[first:txt.index(end, first)] + except (ValueError, TypeError, AttributeError): + return "" + + def rextract(txt, begin, end, pos=-1): try: lbeg = len(begin) diff --git a/test/test_text.py b/test/test_text.py index 0ac77671..a1b30887 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -203,6 +203,19 @@ class TestText(unittest.TestCase): self.assertEqual(f(txt , value, ">") , (None, 0)) self.assertEqual(f(txt , "<" , value), (None, 0)) + def test_extr(self, f=text.extr): + txt = "" + self.assertEqual(f(txt, "X", ">"), "") + self.assertEqual(f(txt, "<", "X"), "") + self.assertEqual(f(txt, "<", ">"), "a") + self.assertEqual(f(txt, "><", ">"), "b") + + # invalid arguments + for value in INVALID: + self.assertEqual(f(value, "<" , ">") , "") + self.assertEqual(f(txt , value, ">") , "") + self.assertEqual(f(txt , "<" , value), "") + def test_rextract(self, f=text.rextract): txt = "" self.assertEqual(f(txt, "<", ">"), ("b" , 3))