mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 02:32:33 +01:00
[pp:hash] add 'hash' post processor (#6099)
This commit is contained in:
parent
f52cf54e16
commit
ae9b0da755
@ -5586,6 +5586,63 @@ Description
|
||||
See `metadata.event`_ for a list of available events.
|
||||
|
||||
|
||||
hash.chunk-size
|
||||
---------------
|
||||
Type
|
||||
``integer``
|
||||
Default
|
||||
``32768``
|
||||
Description
|
||||
Number of bytes read per chunk during file hash computation.
|
||||
|
||||
|
||||
hash.event
|
||||
----------
|
||||
Type
|
||||
* ``string``
|
||||
* ``list`` of ``strings``
|
||||
Default
|
||||
``"file"``
|
||||
Description
|
||||
The event(s) for which `file hashes <hash.hashes_>`__ are computed.
|
||||
|
||||
See `metadata.event`_ for a list of available events.
|
||||
|
||||
|
||||
hash.filename
|
||||
-------------
|
||||
Type
|
||||
* ``bool``
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
Rebuild `filenames <extractor.*.filename_>`__ after computing
|
||||
`hash digests <hash.hashes_>`__ and adding them to the metadata dict.
|
||||
|
||||
|
||||
hash.hashes
|
||||
-----------
|
||||
Type
|
||||
* ``string``
|
||||
* ``object`` (`field name` -> `hash algorithm`)
|
||||
Default
|
||||
``"md5,sha1"``
|
||||
Example
|
||||
.. code:: json
|
||||
|
||||
"sha256:hash_sha,sha3_512:hash_sha3"
|
||||
|
||||
.. code:: json
|
||||
|
||||
{
|
||||
"hash_sha" : "sha256",
|
||||
"hash_sha3": "sha3_512"
|
||||
}
|
||||
|
||||
Description
|
||||
Hash digests to compute.
|
||||
|
||||
|
||||
metadata.mode
|
||||
-------------
|
||||
Type
|
||||
@ -6694,6 +6751,8 @@ Description
|
||||
| (requires `downloader.*.part`_ = ``true`` and `extractor.*.skip`_ = ``false``)
|
||||
``exec``
|
||||
Execute external commands
|
||||
``hash``
|
||||
Compute file hash digests
|
||||
``metadata``
|
||||
Write metadata to separate files
|
||||
``mtime``
|
||||
|
@ -12,6 +12,7 @@ modules = [
|
||||
"classify",
|
||||
"compare",
|
||||
"exec",
|
||||
"hash",
|
||||
"metadata",
|
||||
"mtime",
|
||||
"python",
|
||||
|
71
gallery_dl/postprocessor/hash.py
Normal file
71
gallery_dl/postprocessor/hash.py
Normal file
@ -0,0 +1,71 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Compute file hash digests"""
|
||||
|
||||
from .common import PostProcessor
|
||||
import hashlib
|
||||
|
||||
|
||||
class HashPP(PostProcessor):
|
||||
|
||||
def __init__(self, job, options):
|
||||
PostProcessor.__init__(self, job)
|
||||
|
||||
self.chunk_size = options.get("chunk-size", 32768)
|
||||
self.filename = options.get("filename")
|
||||
|
||||
hashes = options.get("hashes")
|
||||
if isinstance(hashes, dict):
|
||||
self.hashes = list(hashes.items())
|
||||
elif isinstance(hashes, str):
|
||||
self.hashes = []
|
||||
for h in hashes.split(","):
|
||||
name, sep, key = h.partition(":")
|
||||
self.hashes.append((key if sep else name, name))
|
||||
elif hashes:
|
||||
self.hashes = hashes
|
||||
else:
|
||||
self.hashes = (("md5", "md5"), ("sha1", "sha1"))
|
||||
|
||||
events = options.get("event")
|
||||
if events is None:
|
||||
events = ("file",)
|
||||
elif isinstance(events, str):
|
||||
events = events.split(",")
|
||||
job.register_hooks({event: self.run for event in events}, options)
|
||||
|
||||
def run(self, pathfmt):
|
||||
hashes = [
|
||||
(key, hashlib.new(name))
|
||||
for key, name in self.hashes
|
||||
]
|
||||
|
||||
size = self.chunk_size
|
||||
with self._open(pathfmt) as fp:
|
||||
while True:
|
||||
data = fp.read(size)
|
||||
if not data:
|
||||
break
|
||||
for _, h in hashes:
|
||||
h.update(data)
|
||||
|
||||
for key, h in hashes:
|
||||
pathfmt.kwdict[key] = h.hexdigest()
|
||||
|
||||
if self.filename:
|
||||
pathfmt.build_path()
|
||||
|
||||
def _open(self, pathfmt):
|
||||
try:
|
||||
return open(pathfmt.temppath, "rb")
|
||||
except OSError:
|
||||
return open(pathfmt.realpath, "rb")
|
||||
|
||||
|
||||
__postprocessor__ = HashPP
|
@ -240,6 +240,57 @@ class ExecTest(BasePostprocessorTest):
|
||||
self.assertFalse(i.wait.called)
|
||||
|
||||
|
||||
class HashTest(BasePostprocessorTest):
|
||||
|
||||
def test_default(self):
|
||||
self._create({})
|
||||
|
||||
with self.pathfmt.open() as fp:
|
||||
fp.write(b"Foo Bar\n")
|
||||
|
||||
self._trigger()
|
||||
|
||||
kwdict = self.pathfmt.kwdict
|
||||
self.assertEqual(
|
||||
"35c9c9c7c90ad764bae9e2623f522c24", kwdict["md5"], "md5")
|
||||
self.assertEqual(
|
||||
"14d3d804494ef4e57d72de63e4cfee761240471a", kwdict["sha1"], "sha1")
|
||||
|
||||
def test_custom_hashes(self):
|
||||
self._create({"hashes": "sha256:a,sha512:b"})
|
||||
|
||||
with self.pathfmt.open() as fp:
|
||||
fp.write(b"Foo Bar\n")
|
||||
|
||||
self._trigger()
|
||||
|
||||
kwdict = self.pathfmt.kwdict
|
||||
self.assertEqual(
|
||||
"4775b55be17206445d7015a5fc7656f38a74b880670523c3b175455f885f2395",
|
||||
kwdict["a"], "sha256")
|
||||
self.assertEqual(
|
||||
"6028f9e6957f4ca929941318c4bba6258713fd5162f9e33bd10e1c456d252700"
|
||||
"3e1095b50736c4fd1e2deea152e3c8ecd5993462a747208e4d842659935a1c62",
|
||||
kwdict["b"], "sha512")
|
||||
|
||||
def test_custom_hashes_dict(self):
|
||||
self._create({"hashes": {"a": "sha256", "b": "sha512"}})
|
||||
|
||||
with self.pathfmt.open() as fp:
|
||||
fp.write(b"Foo Bar\n")
|
||||
|
||||
self._trigger()
|
||||
|
||||
kwdict = self.pathfmt.kwdict
|
||||
self.assertEqual(
|
||||
"4775b55be17206445d7015a5fc7656f38a74b880670523c3b175455f885f2395",
|
||||
kwdict["a"], "sha256")
|
||||
self.assertEqual(
|
||||
"6028f9e6957f4ca929941318c4bba6258713fd5162f9e33bd10e1c456d252700"
|
||||
"3e1095b50736c4fd1e2deea152e3c8ecd5993462a747208e4d842659935a1c62",
|
||||
kwdict["b"], "sha512")
|
||||
|
||||
|
||||
class MetadataTest(BasePostprocessorTest):
|
||||
|
||||
def test_metadata_default(self):
|
||||
|
Loading…
Reference in New Issue
Block a user