mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-25 12:12:34 +01:00
renaming variables
mostly 'keyword(s)' to 'kwdict'
This commit is contained in:
parent
87a87bff7e
commit
322c2e7ed4
@ -101,17 +101,17 @@ class Job():
|
|||||||
)
|
)
|
||||||
# TODO: support for multiple message versions
|
# TODO: support for multiple message versions
|
||||||
|
|
||||||
def handle_url(self, url, keywords):
|
def handle_url(self, url, kwdict):
|
||||||
"""Handle Message.Url"""
|
"""Handle Message.Url"""
|
||||||
|
|
||||||
def handle_urllist(self, urls, keywords):
|
def handle_urllist(self, urls, kwdict):
|
||||||
"""Handle Message.Urllist"""
|
"""Handle Message.Urllist"""
|
||||||
self.handle_url(urls[0], keywords)
|
self.handle_url(urls[0], kwdict)
|
||||||
|
|
||||||
def handle_directory(self, keywords):
|
def handle_directory(self, kwdict):
|
||||||
"""Handle Message.Directory"""
|
"""Handle Message.Directory"""
|
||||||
|
|
||||||
def handle_queue(self, url, keywords):
|
def handle_queue(self, url, kwdict):
|
||||||
"""Handle Message.Queue"""
|
"""Handle Message.Queue"""
|
||||||
|
|
||||||
def handle_finalize(self):
|
def handle_finalize(self):
|
||||||
@ -119,8 +119,9 @@ class Job():
|
|||||||
|
|
||||||
def update_kwdict(self, kwdict):
|
def update_kwdict(self, kwdict):
|
||||||
"""Update 'kwdict' with additional metadata"""
|
"""Update 'kwdict' with additional metadata"""
|
||||||
kwdict["category"] = self.extractor.category
|
extr = self.extractor
|
||||||
kwdict["subcategory"] = self.extractor.subcategory
|
kwdict["category"] = extr.category
|
||||||
|
kwdict["subcategory"] = extr.subcategory
|
||||||
if self.userkwds:
|
if self.userkwds:
|
||||||
kwdict.update(self.userkwds)
|
kwdict.update(self.userkwds)
|
||||||
|
|
||||||
@ -176,14 +177,14 @@ class DownloadJob(Job):
|
|||||||
self.postprocessors = None
|
self.postprocessors = None
|
||||||
self.out = output.select()
|
self.out = output.select()
|
||||||
|
|
||||||
def handle_url(self, url, keywords, fallback=None):
|
def handle_url(self, url, kwdict, fallback=None):
|
||||||
"""Download the resource specified in 'url'"""
|
"""Download the resource specified in 'url'"""
|
||||||
postprocessors = self.postprocessors
|
postprocessors = self.postprocessors
|
||||||
pathfmt = self.pathfmt
|
pathfmt = self.pathfmt
|
||||||
archive = self.archive
|
archive = self.archive
|
||||||
|
|
||||||
# prepare download
|
# prepare download
|
||||||
pathfmt.set_filename(keywords)
|
pathfmt.set_filename(kwdict)
|
||||||
|
|
||||||
if postprocessors:
|
if postprocessors:
|
||||||
for pp in postprocessors:
|
for pp in postprocessors:
|
||||||
@ -223,28 +224,28 @@ class DownloadJob(Job):
|
|||||||
pathfmt.finalize()
|
pathfmt.finalize()
|
||||||
self.out.success(pathfmt.path, 0)
|
self.out.success(pathfmt.path, 0)
|
||||||
if archive:
|
if archive:
|
||||||
archive.add(keywords)
|
archive.add(kwdict)
|
||||||
if postprocessors:
|
if postprocessors:
|
||||||
for pp in postprocessors:
|
for pp in postprocessors:
|
||||||
pp.run_after(pathfmt)
|
pp.run_after(pathfmt)
|
||||||
self._skipcnt = 0
|
self._skipcnt = 0
|
||||||
|
|
||||||
def handle_urllist(self, urls, keywords):
|
def handle_urllist(self, urls, kwdict):
|
||||||
"""Download the resource specified in 'url'"""
|
"""Download the resource specified in 'url'"""
|
||||||
fallback = iter(urls)
|
fallback = iter(urls)
|
||||||
url = next(fallback)
|
url = next(fallback)
|
||||||
self.handle_url(url, keywords, fallback)
|
self.handle_url(url, kwdict, fallback)
|
||||||
|
|
||||||
def handle_directory(self, keywords):
|
def handle_directory(self, kwdict):
|
||||||
"""Set and create the target directory for downloads"""
|
"""Set and create the target directory for downloads"""
|
||||||
if not self.pathfmt:
|
if not self.pathfmt:
|
||||||
self.initialize(keywords)
|
self.initialize(kwdict)
|
||||||
else:
|
else:
|
||||||
self.pathfmt.set_directory(keywords)
|
self.pathfmt.set_directory(kwdict)
|
||||||
|
|
||||||
def handle_queue(self, url, keywords):
|
def handle_queue(self, url, kwdict):
|
||||||
if "_extractor" in keywords:
|
if "_extractor" in kwdict:
|
||||||
extr = keywords["_extractor"].from_url(url)
|
extr = kwdict["_extractor"].from_url(url)
|
||||||
else:
|
else:
|
||||||
extr = extractor.find(url)
|
extr = extractor.find(url)
|
||||||
if extr:
|
if extr:
|
||||||
@ -297,11 +298,11 @@ class DownloadJob(Job):
|
|||||||
self.downloaders[scheme] = instance
|
self.downloaders[scheme] = instance
|
||||||
return instance
|
return instance
|
||||||
|
|
||||||
def initialize(self, keywords=None):
|
def initialize(self, kwdict=None):
|
||||||
"""Delayed initialization of PathFormat, etc."""
|
"""Delayed initialization of PathFormat, etc."""
|
||||||
self.pathfmt = util.PathFormat(self.extractor)
|
self.pathfmt = util.PathFormat(self.extractor)
|
||||||
if keywords:
|
if kwdict:
|
||||||
self.pathfmt.set_directory(keywords)
|
self.pathfmt.set_directory(kwdict)
|
||||||
|
|
||||||
self.sleep = self.extractor.config("sleep")
|
self.sleep = self.extractor.config("sleep")
|
||||||
if not self.extractor.config("download", True):
|
if not self.extractor.config("download", True):
|
||||||
@ -368,15 +369,15 @@ class DownloadJob(Job):
|
|||||||
class SimulationJob(DownloadJob):
|
class SimulationJob(DownloadJob):
|
||||||
"""Simulate the extraction process without downloading anything"""
|
"""Simulate the extraction process without downloading anything"""
|
||||||
|
|
||||||
def handle_url(self, url, keywords, fallback=None):
|
def handle_url(self, url, kwdict, fallback=None):
|
||||||
self.pathfmt.set_filename(keywords)
|
self.pathfmt.set_filename(kwdict)
|
||||||
self.out.skip(self.pathfmt.path)
|
self.out.skip(self.pathfmt.path)
|
||||||
if self.sleep:
|
if self.sleep:
|
||||||
time.sleep(self.sleep)
|
time.sleep(self.sleep)
|
||||||
if self.archive:
|
if self.archive:
|
||||||
self.archive.add(keywords)
|
self.archive.add(kwdict)
|
||||||
|
|
||||||
def handle_directory(self, keywords):
|
def handle_directory(self, kwdict):
|
||||||
if not self.pathfmt:
|
if not self.pathfmt:
|
||||||
self.initialize()
|
self.initialize()
|
||||||
|
|
||||||
@ -384,19 +385,19 @@ class SimulationJob(DownloadJob):
|
|||||||
class KeywordJob(Job):
|
class KeywordJob(Job):
|
||||||
"""Print available keywords"""
|
"""Print available keywords"""
|
||||||
|
|
||||||
def handle_url(self, url, keywords):
|
def handle_url(self, url, kwdict):
|
||||||
print("\nKeywords for filenames and --filter:")
|
print("\nKeywords for filenames and --filter:")
|
||||||
print("------------------------------------")
|
print("------------------------------------")
|
||||||
self.print_keywords(keywords)
|
self.print_kwdict(kwdict)
|
||||||
raise exception.StopExtraction()
|
raise exception.StopExtraction()
|
||||||
|
|
||||||
def handle_directory(self, keywords):
|
def handle_directory(self, kwdict):
|
||||||
print("Keywords for directory names:")
|
print("Keywords for directory names:")
|
||||||
print("-----------------------------")
|
print("-----------------------------")
|
||||||
self.print_keywords(keywords)
|
self.print_kwdict(kwdict)
|
||||||
|
|
||||||
def handle_queue(self, url, keywords):
|
def handle_queue(self, url, kwdict):
|
||||||
if not keywords:
|
if not kwdict:
|
||||||
self.extractor.log.info(
|
self.extractor.log.info(
|
||||||
"This extractor delegates work to other extractors "
|
"This extractor delegates work to other extractors "
|
||||||
"and does not provide any keywords on its own. Try "
|
"and does not provide any keywords on its own. Try "
|
||||||
@ -404,27 +405,27 @@ class KeywordJob(Job):
|
|||||||
else:
|
else:
|
||||||
print("Keywords for --chapter-filter:")
|
print("Keywords for --chapter-filter:")
|
||||||
print("------------------------------")
|
print("------------------------------")
|
||||||
self.print_keywords(keywords)
|
self.print_kwdict(kwdict)
|
||||||
if self.extractor.categorytransfer:
|
if self.extractor.categorytransfer:
|
||||||
print()
|
print()
|
||||||
KeywordJob(url, self).run()
|
KeywordJob(url, self).run()
|
||||||
raise exception.StopExtraction()
|
raise exception.StopExtraction()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def print_keywords(keywords, prefix=""):
|
def print_kwdict(kwdict, prefix=""):
|
||||||
"""Print key-value pairs with formatting"""
|
"""Print key-value pairs in 'kwdict' with formatting"""
|
||||||
suffix = "]" if prefix else ""
|
suffix = "]" if prefix else ""
|
||||||
for key, value in sorted(keywords.items()):
|
for key, value in sorted(kwdict.items()):
|
||||||
if key[0] == "_":
|
if key[0] == "_":
|
||||||
continue
|
continue
|
||||||
key = prefix + key + suffix
|
key = prefix + key + suffix
|
||||||
|
|
||||||
if isinstance(value, dict):
|
if isinstance(value, dict):
|
||||||
KeywordJob.print_keywords(value, key + "[")
|
KeywordJob.print_kwdict(value, key + "[")
|
||||||
|
|
||||||
elif isinstance(value, list):
|
elif isinstance(value, list):
|
||||||
if value and isinstance(value[0], dict):
|
if value and isinstance(value[0], dict):
|
||||||
KeywordJob.print_keywords(value[0], key + "[][")
|
KeywordJob.print_kwdict(value[0], key + "[][")
|
||||||
else:
|
else:
|
||||||
print(key, "[]", sep="")
|
print(key, "[]", sep="")
|
||||||
for val in value:
|
for val in value:
|
||||||
|
@ -84,47 +84,47 @@ class TestExtractorResults(unittest.TestCase):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
# test archive-id uniqueness
|
# test archive-id uniqueness
|
||||||
self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive))
|
self.assertEqual(len(set(tjob.archive_list)), len(tjob.archive_list))
|
||||||
|
|
||||||
if tjob.queue:
|
if tjob.queue:
|
||||||
# test '_extractor' entries
|
# test '_extractor' entries
|
||||||
for url, kwdict in zip(tjob.list_url, tjob.list_keyword):
|
for url, kwdict in zip(tjob.url_list, tjob.kwdict_list):
|
||||||
if "_extractor" in kwdict:
|
if "_extractor" in kwdict:
|
||||||
extr = kwdict["_extractor"].from_url(url)
|
extr = kwdict["_extractor"].from_url(url)
|
||||||
self.assertIsInstance(extr, kwdict["_extractor"])
|
self.assertIsInstance(extr, kwdict["_extractor"])
|
||||||
self.assertEqual(extr.url, url)
|
self.assertEqual(extr.url, url)
|
||||||
else:
|
else:
|
||||||
# test 'extension' entries
|
# test 'extension' entries
|
||||||
for kwdict in tjob.list_keyword:
|
for kwdict in tjob.kwdict_list:
|
||||||
self.assertIn("extension", kwdict)
|
self.assertIn("extension", kwdict)
|
||||||
|
|
||||||
# test extraction results
|
# test extraction results
|
||||||
if "url" in result:
|
if "url" in result:
|
||||||
self.assertEqual(result["url"], tjob.hash_url.hexdigest())
|
self.assertEqual(result["url"], tjob.url_hash.hexdigest())
|
||||||
|
|
||||||
if "content" in result:
|
if "content" in result:
|
||||||
self.assertEqual(result["content"], tjob.hash_content.hexdigest())
|
self.assertEqual(result["content"], tjob.content_hash.hexdigest())
|
||||||
|
|
||||||
if "keyword" in result:
|
if "keyword" in result:
|
||||||
keyword = result["keyword"]
|
expected = result["keyword"]
|
||||||
if isinstance(keyword, dict):
|
if isinstance(expected, dict):
|
||||||
for kwdict in tjob.list_keyword:
|
for kwdict in tjob.kwdict_list:
|
||||||
self._test_kwdict(kwdict, keyword)
|
self._test_kwdict(kwdict, expected)
|
||||||
else: # assume SHA1 hash
|
else: # assume SHA1 hash
|
||||||
self.assertEqual(keyword, tjob.hash_keyword.hexdigest())
|
self.assertEqual(expected, tjob.kwdict_hash.hexdigest())
|
||||||
|
|
||||||
if "count" in result:
|
if "count" in result:
|
||||||
count = result["count"]
|
count = result["count"]
|
||||||
if isinstance(count, str):
|
if isinstance(count, str):
|
||||||
self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$")
|
self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$")
|
||||||
expr = "{} {}".format(len(tjob.list_url), count)
|
expr = "{} {}".format(len(tjob.url_list), count)
|
||||||
self.assertTrue(eval(expr), msg=expr)
|
self.assertTrue(eval(expr), msg=expr)
|
||||||
else: # assume integer
|
else: # assume integer
|
||||||
self.assertEqual(len(tjob.list_url), count)
|
self.assertEqual(len(tjob.url_list), count)
|
||||||
|
|
||||||
if "pattern" in result:
|
if "pattern" in result:
|
||||||
self.assertGreater(len(tjob.list_url), 0)
|
self.assertGreater(len(tjob.url_list), 0)
|
||||||
for url in tjob.list_url:
|
for url in tjob.url_list:
|
||||||
self.assertRegex(url, result["pattern"])
|
self.assertRegex(url, result["pattern"])
|
||||||
|
|
||||||
def _test_kwdict(self, kwdict, tests):
|
def _test_kwdict(self, kwdict, tests):
|
||||||
@ -158,58 +158,60 @@ class ResultJob(job.DownloadJob):
|
|||||||
job.DownloadJob.__init__(self, url, parent)
|
job.DownloadJob.__init__(self, url, parent)
|
||||||
self.queue = False
|
self.queue = False
|
||||||
self.content = content
|
self.content = content
|
||||||
self.list_url = []
|
|
||||||
self.list_keyword = []
|
self.url_list = []
|
||||||
self.list_archive = []
|
self.url_hash = hashlib.sha1()
|
||||||
self.hash_url = hashlib.sha1()
|
self.kwdict_list = []
|
||||||
self.hash_keyword = hashlib.sha1()
|
self.kwdict_hash = hashlib.sha1()
|
||||||
self.hash_archive = hashlib.sha1()
|
self.archive_list = []
|
||||||
self.hash_content = hashlib.sha1()
|
self.archive_hash = hashlib.sha1()
|
||||||
|
self.content_hash = hashlib.sha1()
|
||||||
|
|
||||||
if content:
|
if content:
|
||||||
self.fileobj = TestPathfmt(self.hash_content)
|
self.fileobj = TestPathfmt(self.content_hash)
|
||||||
|
|
||||||
self.format_directory = TestFormatter(
|
self.format_directory = TestFormatter(
|
||||||
"".join(self.extractor.directory_fmt))
|
"".join(self.extractor.directory_fmt)).format_map
|
||||||
self.format_filename = TestFormatter(self.extractor.filename_fmt)
|
self.format_filename = TestFormatter(
|
||||||
|
self.extractor.filename_fmt).format_map
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
for msg in self.extractor:
|
for msg in self.extractor:
|
||||||
self.dispatch(msg)
|
self.dispatch(msg)
|
||||||
|
|
||||||
def handle_url(self, url, keywords, fallback=None):
|
def handle_url(self, url, kwdict, fallback=None):
|
||||||
self.update_url(url)
|
self._update_url(url)
|
||||||
self.update_keyword(keywords)
|
self._update_kwdict(kwdict)
|
||||||
self.update_archive(keywords)
|
self._update_archive(kwdict)
|
||||||
self.update_content(url)
|
self._update_content(url)
|
||||||
self.format_filename.format_map(keywords)
|
self.format_filename(kwdict)
|
||||||
|
|
||||||
def handle_directory(self, keywords):
|
def handle_directory(self, kwdict):
|
||||||
self.update_keyword(keywords, False)
|
self._update_kwdict(kwdict, False)
|
||||||
self.format_directory.format_map(keywords)
|
self.format_directory(kwdict)
|
||||||
|
|
||||||
def handle_queue(self, url, keywords):
|
def handle_queue(self, url, kwdict):
|
||||||
self.queue = True
|
self.queue = True
|
||||||
self.update_url(url)
|
self._update_url(url)
|
||||||
self.update_keyword(keywords)
|
self._update_kwdict(kwdict)
|
||||||
|
|
||||||
def update_url(self, url):
|
def _update_url(self, url):
|
||||||
self.list_url.append(url)
|
self.url_list.append(url)
|
||||||
self.hash_url.update(url.encode())
|
self.url_hash.update(url.encode())
|
||||||
|
|
||||||
def update_keyword(self, kwdict, to_list=True):
|
def _update_kwdict(self, kwdict, to_list=True):
|
||||||
if to_list:
|
if to_list:
|
||||||
self.list_keyword.append(kwdict)
|
self.kwdict_list.append(kwdict.copy())
|
||||||
kwdict = self._filter(kwdict)
|
kwdict = self._filter(kwdict)
|
||||||
self.hash_keyword.update(
|
self.kwdict_hash.update(
|
||||||
json.dumps(kwdict, sort_keys=True, default=str).encode())
|
json.dumps(kwdict, sort_keys=True, default=str).encode())
|
||||||
|
|
||||||
def update_archive(self, kwdict):
|
def _update_archive(self, kwdict):
|
||||||
archive_id = self.extractor.archive_fmt.format_map(kwdict)
|
archive_id = self.extractor.archive_fmt.format_map(kwdict)
|
||||||
self.list_archive.append(archive_id)
|
self.archive_list.append(archive_id)
|
||||||
self.hash_archive.update(archive_id.encode())
|
self.archive_hash.update(archive_id.encode())
|
||||||
|
|
||||||
def update_content(self, url):
|
def _update_content(self, url):
|
||||||
if self.content:
|
if self.content:
|
||||||
scheme = url.partition(":")[0]
|
scheme = url.partition(":")[0]
|
||||||
self.get_downloader(scheme).download(url, self.fileobj)
|
self.get_downloader(scheme).download(url, self.fileobj)
|
||||||
|
Loading…
Reference in New Issue
Block a user