mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 18:53:21 +01:00
[pp:hash] add 'hash' post processor (#6099)
This commit is contained in:
parent
f52cf54e16
commit
ae9b0da755
@ -5586,6 +5586,63 @@ Description
|
|||||||
See `metadata.event`_ for a list of available events.
|
See `metadata.event`_ for a list of available events.
|
||||||
|
|
||||||
|
|
||||||
|
hash.chunk-size
|
||||||
|
---------------
|
||||||
|
Type
|
||||||
|
``integer``
|
||||||
|
Default
|
||||||
|
``32768``
|
||||||
|
Description
|
||||||
|
Number of bytes read per chunk during file hash computation.
|
||||||
|
|
||||||
|
|
||||||
|
hash.event
|
||||||
|
----------
|
||||||
|
Type
|
||||||
|
* ``string``
|
||||||
|
* ``list`` of ``strings``
|
||||||
|
Default
|
||||||
|
``"file"``
|
||||||
|
Description
|
||||||
|
The event(s) for which `file hashes <hash.hashes_>`__ are computed.
|
||||||
|
|
||||||
|
See `metadata.event`_ for a list of available events.
|
||||||
|
|
||||||
|
|
||||||
|
hash.filename
|
||||||
|
-------------
|
||||||
|
Type
|
||||||
|
* ``bool``
|
||||||
|
Default
|
||||||
|
``false``
|
||||||
|
Description
|
||||||
|
Rebuild `filenames <extractor.*.filename_>`__ after computing
|
||||||
|
`hash digests <hash.hashes_>`__ and adding them to the metadata dict.
|
||||||
|
|
||||||
|
|
||||||
|
hash.hashes
|
||||||
|
-----------
|
||||||
|
Type
|
||||||
|
* ``string``
|
||||||
|
* ``object`` (`field name` -> `hash algorithm`)
|
||||||
|
Default
|
||||||
|
``"md5,sha1"``
|
||||||
|
Example
|
||||||
|
.. code:: json
|
||||||
|
|
||||||
|
"sha256:hash_sha,sha3_512:hash_sha3"
|
||||||
|
|
||||||
|
.. code:: json
|
||||||
|
|
||||||
|
{
|
||||||
|
"hash_sha" : "sha256",
|
||||||
|
"hash_sha3": "sha3_512"
|
||||||
|
}
|
||||||
|
|
||||||
|
Description
|
||||||
|
Hash digests to compute.
|
||||||
|
|
||||||
|
|
||||||
metadata.mode
|
metadata.mode
|
||||||
-------------
|
-------------
|
||||||
Type
|
Type
|
||||||
@ -6694,6 +6751,8 @@ Description
|
|||||||
| (requires `downloader.*.part`_ = ``true`` and `extractor.*.skip`_ = ``false``)
|
| (requires `downloader.*.part`_ = ``true`` and `extractor.*.skip`_ = ``false``)
|
||||||
``exec``
|
``exec``
|
||||||
Execute external commands
|
Execute external commands
|
||||||
|
``hash``
|
||||||
|
Compute file hash digests
|
||||||
``metadata``
|
``metadata``
|
||||||
Write metadata to separate files
|
Write metadata to separate files
|
||||||
``mtime``
|
``mtime``
|
||||||
|
@ -12,6 +12,7 @@ modules = [
|
|||||||
"classify",
|
"classify",
|
||||||
"compare",
|
"compare",
|
||||||
"exec",
|
"exec",
|
||||||
|
"hash",
|
||||||
"metadata",
|
"metadata",
|
||||||
"mtime",
|
"mtime",
|
||||||
"python",
|
"python",
|
||||||
|
71
gallery_dl/postprocessor/hash.py
Normal file
71
gallery_dl/postprocessor/hash.py
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2024 Mike Fährmann
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
"""Compute file hash digests"""
|
||||||
|
|
||||||
|
from .common import PostProcessor
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
|
||||||
|
class HashPP(PostProcessor):
|
||||||
|
|
||||||
|
def __init__(self, job, options):
|
||||||
|
PostProcessor.__init__(self, job)
|
||||||
|
|
||||||
|
self.chunk_size = options.get("chunk-size", 32768)
|
||||||
|
self.filename = options.get("filename")
|
||||||
|
|
||||||
|
hashes = options.get("hashes")
|
||||||
|
if isinstance(hashes, dict):
|
||||||
|
self.hashes = list(hashes.items())
|
||||||
|
elif isinstance(hashes, str):
|
||||||
|
self.hashes = []
|
||||||
|
for h in hashes.split(","):
|
||||||
|
name, sep, key = h.partition(":")
|
||||||
|
self.hashes.append((key if sep else name, name))
|
||||||
|
elif hashes:
|
||||||
|
self.hashes = hashes
|
||||||
|
else:
|
||||||
|
self.hashes = (("md5", "md5"), ("sha1", "sha1"))
|
||||||
|
|
||||||
|
events = options.get("event")
|
||||||
|
if events is None:
|
||||||
|
events = ("file",)
|
||||||
|
elif isinstance(events, str):
|
||||||
|
events = events.split(",")
|
||||||
|
job.register_hooks({event: self.run for event in events}, options)
|
||||||
|
|
||||||
|
def run(self, pathfmt):
|
||||||
|
hashes = [
|
||||||
|
(key, hashlib.new(name))
|
||||||
|
for key, name in self.hashes
|
||||||
|
]
|
||||||
|
|
||||||
|
size = self.chunk_size
|
||||||
|
with self._open(pathfmt) as fp:
|
||||||
|
while True:
|
||||||
|
data = fp.read(size)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
for _, h in hashes:
|
||||||
|
h.update(data)
|
||||||
|
|
||||||
|
for key, h in hashes:
|
||||||
|
pathfmt.kwdict[key] = h.hexdigest()
|
||||||
|
|
||||||
|
if self.filename:
|
||||||
|
pathfmt.build_path()
|
||||||
|
|
||||||
|
def _open(self, pathfmt):
|
||||||
|
try:
|
||||||
|
return open(pathfmt.temppath, "rb")
|
||||||
|
except OSError:
|
||||||
|
return open(pathfmt.realpath, "rb")
|
||||||
|
|
||||||
|
|
||||||
|
__postprocessor__ = HashPP
|
@ -240,6 +240,57 @@ class ExecTest(BasePostprocessorTest):
|
|||||||
self.assertFalse(i.wait.called)
|
self.assertFalse(i.wait.called)
|
||||||
|
|
||||||
|
|
||||||
|
class HashTest(BasePostprocessorTest):
|
||||||
|
|
||||||
|
def test_default(self):
|
||||||
|
self._create({})
|
||||||
|
|
||||||
|
with self.pathfmt.open() as fp:
|
||||||
|
fp.write(b"Foo Bar\n")
|
||||||
|
|
||||||
|
self._trigger()
|
||||||
|
|
||||||
|
kwdict = self.pathfmt.kwdict
|
||||||
|
self.assertEqual(
|
||||||
|
"35c9c9c7c90ad764bae9e2623f522c24", kwdict["md5"], "md5")
|
||||||
|
self.assertEqual(
|
||||||
|
"14d3d804494ef4e57d72de63e4cfee761240471a", kwdict["sha1"], "sha1")
|
||||||
|
|
||||||
|
def test_custom_hashes(self):
|
||||||
|
self._create({"hashes": "sha256:a,sha512:b"})
|
||||||
|
|
||||||
|
with self.pathfmt.open() as fp:
|
||||||
|
fp.write(b"Foo Bar\n")
|
||||||
|
|
||||||
|
self._trigger()
|
||||||
|
|
||||||
|
kwdict = self.pathfmt.kwdict
|
||||||
|
self.assertEqual(
|
||||||
|
"4775b55be17206445d7015a5fc7656f38a74b880670523c3b175455f885f2395",
|
||||||
|
kwdict["a"], "sha256")
|
||||||
|
self.assertEqual(
|
||||||
|
"6028f9e6957f4ca929941318c4bba6258713fd5162f9e33bd10e1c456d252700"
|
||||||
|
"3e1095b50736c4fd1e2deea152e3c8ecd5993462a747208e4d842659935a1c62",
|
||||||
|
kwdict["b"], "sha512")
|
||||||
|
|
||||||
|
def test_custom_hashes_dict(self):
|
||||||
|
self._create({"hashes": {"a": "sha256", "b": "sha512"}})
|
||||||
|
|
||||||
|
with self.pathfmt.open() as fp:
|
||||||
|
fp.write(b"Foo Bar\n")
|
||||||
|
|
||||||
|
self._trigger()
|
||||||
|
|
||||||
|
kwdict = self.pathfmt.kwdict
|
||||||
|
self.assertEqual(
|
||||||
|
"4775b55be17206445d7015a5fc7656f38a74b880670523c3b175455f885f2395",
|
||||||
|
kwdict["a"], "sha256")
|
||||||
|
self.assertEqual(
|
||||||
|
"6028f9e6957f4ca929941318c4bba6258713fd5162f9e33bd10e1c456d252700"
|
||||||
|
"3e1095b50736c4fd1e2deea152e3c8ecd5993462a747208e4d842659935a1c62",
|
||||||
|
kwdict["b"], "sha512")
|
||||||
|
|
||||||
|
|
||||||
class MetadataTest(BasePostprocessorTest):
|
class MetadataTest(BasePostprocessorTest):
|
||||||
|
|
||||||
def test_metadata_default(self):
|
def test_metadata_default(self):
|
||||||
|
Loading…
Reference in New Issue
Block a user