diff --git a/gallery_dl/text.py b/gallery_dl/text.py index f139173b..e20aa515 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -13,29 +13,8 @@ import html import datetime import urllib.parse - HTML_RE = re.compile("<[^>]+>") -INVALID_XML_CHARS = ( - "\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", - "\x08", "\x0b", "\x0c", "\x0e", "\x0f", "\x10", "\x11", "\x12", - "\x13", "\x14", "\x15", "\x16", "\x17", "\x18", "\x19", "\x1a", - "\x1b", "\x1c", "\x1d", "\x1e", "\x1f", -) - - -def clean_xml(xmldata, repl=""): - """Replace/Remove invalid control characters in 'xmldata'""" - if not isinstance(xmldata, str): - try: - xmldata = "".join(xmldata) - except TypeError: - return "" - for char in INVALID_XML_CHARS: - if char in xmldata: - xmldata = xmldata.replace(char, repl) - return xmldata - def remove_html(txt, repl=" ", sep=" "): """Remove html-tags from a string""" diff --git a/test/test_text.py b/test/test_text.py index 72091fdc..675a04cf 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -23,29 +23,6 @@ INVALID_ALT = ((), [], {}, None, "") class TestText(unittest.TestCase): - def test_clean_xml(self, f=text.clean_xml): - # standard usage - self.assertEqual(f(""), "") - self.assertEqual(f("foo"), "foo") - self.assertEqual(f("\tfoo\nbar\r"), "\tfoo\nbar\r") - self.assertEqual(f("\ab\ba\fr\v"), "bar") - - # 'repl' argument - repl = "#" - self.assertEqual(f("", repl), "") - self.assertEqual(f("foo", repl), "foo") - self.assertEqual(f("\tfoo\nbar\r", repl), "\tfoo\nbar\r") - self.assertEqual( - f("\ab\ba\fr\v", repl), "#b#a#r#") - - # removal of all illegal control characters - value = "".join(chr(x) for x in range(32)) - self.assertEqual(f(value), "\t\n\r") - - # 'invalid' arguments - for value in INVALID: - self.assertEqual(f(value), "") - def test_remove_html(self, f=text.remove_html): result = "Hello World."