1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

fix/improve various things

This commit is contained in:
Mike Fährmann 2017-03-17 09:39:46 +01:00
parent abfe7456d6
commit ed94d9b92d
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
4 changed files with 15 additions and 14 deletions

View File

@ -23,7 +23,7 @@ class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
pattern = [(r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts"
r"\?(?:utf8=%E2%9C%93&)?tags=([^&]+)")]
test = [("https://danbooru.donmai.us/posts?tags=bonocho", {
"url": "fb730af3f5e15650e5d924ffcda54b9ef232b89b",
"url": "f94774bcb5169e943efb4d7bb51c47ae786b05f3",
"content": "b196fb9f1668109d7774a0a82efea3ffdda07746",
})]

View File

@ -53,16 +53,18 @@ class Job():
def dispatch(self, msg):
"""Call the appropriate message handler"""
if msg[0] == Message.Url and self.pred_url:
self.update_kwdict(msg[2])
self.handle_url(msg[1], msg[2])
if msg[0] == Message.Url:
if self.pred_url:
self.update_kwdict(msg[2])
self.handle_url(msg[1], msg[2])
elif msg[0] == Message.Directory:
self.update_kwdict(msg[1])
self.handle_directory(msg[1])
elif msg[0] == Message.Queue and self.pred_queue:
self.handle_queue(msg[1])
elif msg[0] == Message.Queue:
if self.pred_queue:
self.handle_queue(msg[1])
elif msg[0] == Message.Headers:
self.handle_headers(msg[1])

View File

@ -15,15 +15,14 @@ import html
import urllib.parse
INVALID_XML_CHARS = [
chr(c) for c in (1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 14, 15, 16, 17, 18,
INVALID_XML_CHARS = (1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)
]
def clean_xml(xmldata, repl=""):
"""Replace/Remove invalid control characters in XML data"""
for char in INVALID_XML_CHARS:
char = chr(char)
if char in xmldata:
xmldata = xmldata.replace(char, repl)
return xmldata

View File

@ -38,11 +38,11 @@ def main():
config.load()
for url in urls:
hjob = job.HashJob(url, content=args.content)
hjob.run()
print(hjob.extractor.__class__.__name__)
print(TESTDATA_FMT.format(url, hjob.hash_url.hexdigest(),
hjob.hash_keyword.hexdigest(), hjob.hash_content.hexdigest()))
tjob = job.TestJob(url, content=args.content)
tjob.run()
print(tjob.extractor.__class__.__name__)
print(TESTDATA_FMT.format(url, tjob.hash_url.hexdigest(),
tjob.hash_keyword.hexdigest(), tjob.hash_content.hexdigest()))
if __name__ == '__main__':
main()