2019-01-05 16:39:05 +01:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2023-02-01 13:57:16 +01:00
|
|
|
# Copyright 2019-2023 Mike Fährmann
|
2019-01-05 16:39:05 +01:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2020-11-20 22:28:01 +01:00
|
|
|
"""Write metadata to external files"""
|
2019-01-05 16:39:05 +01:00
|
|
|
|
|
|
|
from .common import PostProcessor
|
2021-09-27 02:37:04 +02:00
|
|
|
from .. import util, formatter
|
2023-02-06 12:35:28 +01:00
|
|
|
import json
|
2022-05-30 21:15:16 +02:00
|
|
|
import sys
|
2020-01-02 20:58:10 +01:00
|
|
|
import os
|
2019-01-05 16:39:05 +01:00
|
|
|
|
|
|
|
|
|
|
|
class MetadataPP(PostProcessor):
|
|
|
|
|
2020-05-18 01:35:53 +02:00
|
|
|
def __init__(self, job, options):
|
|
|
|
PostProcessor.__init__(self, job)
|
2019-01-17 21:18:12 +01:00
|
|
|
|
2022-10-06 13:14:08 +02:00
|
|
|
mode = options.get("mode")
|
2022-10-04 17:53:04 +02:00
|
|
|
cfmt = options.get("content-format") or options.get("format")
|
2022-11-07 15:46:35 +01:00
|
|
|
omode = "w"
|
|
|
|
filename = None
|
|
|
|
|
2022-10-04 17:53:04 +02:00
|
|
|
if mode == "tags":
|
2019-01-17 21:18:12 +01:00
|
|
|
self.write = self._write_tags
|
2019-11-29 23:12:22 +01:00
|
|
|
ext = "txt"
|
2022-07-19 12:24:26 +02:00
|
|
|
elif mode == "modify":
|
|
|
|
self.run = self._run_modify
|
|
|
|
self.fields = {
|
|
|
|
name: formatter.parse(value, None, util.identity).format_map
|
|
|
|
for name, value in options.get("fields").items()
|
|
|
|
}
|
|
|
|
ext = None
|
2022-07-19 00:57:29 +02:00
|
|
|
elif mode == "delete":
|
|
|
|
self.run = self._run_delete
|
|
|
|
self.fields = options.get("fields")
|
|
|
|
ext = None
|
2022-10-06 13:14:08 +02:00
|
|
|
elif mode == "custom" or not mode and cfmt:
|
2022-10-04 17:53:04 +02:00
|
|
|
self.write = self._write_custom
|
|
|
|
if isinstance(cfmt, list):
|
|
|
|
cfmt = "\n".join(cfmt) + "\n"
|
|
|
|
self._content_fmt = formatter.parse(cfmt).format_map
|
|
|
|
ext = "txt"
|
2022-11-07 15:46:35 +01:00
|
|
|
elif mode == "jsonl":
|
|
|
|
self.write = self._write_json
|
2023-02-07 18:28:14 +01:00
|
|
|
self._json_encode = self._make_encoder(options).encode
|
2022-11-07 15:46:35 +01:00
|
|
|
omode = "a"
|
|
|
|
filename = "data.jsonl"
|
2019-01-17 21:18:12 +01:00
|
|
|
else:
|
|
|
|
self.write = self._write_json
|
2023-02-07 18:28:14 +01:00
|
|
|
self._json_encode = self._make_encoder(options, 4).encode
|
2019-01-17 21:18:12 +01:00
|
|
|
ext = "json"
|
|
|
|
|
2020-01-02 20:58:10 +01:00
|
|
|
directory = options.get("directory")
|
|
|
|
if directory:
|
|
|
|
self._directory = self._directory_custom
|
|
|
|
sep = os.sep + (os.altsep or "")
|
2021-02-27 17:19:29 +01:00
|
|
|
self._metadir = util.expand_path(directory).rstrip(sep) + os.sep
|
2020-01-02 20:58:10 +01:00
|
|
|
|
2022-11-07 15:46:35 +01:00
|
|
|
filename = options.get("filename", filename)
|
2019-11-29 23:12:22 +01:00
|
|
|
extfmt = options.get("extension-format")
|
2020-11-20 22:28:01 +01:00
|
|
|
if filename:
|
2022-05-30 21:15:16 +02:00
|
|
|
if filename == "-":
|
|
|
|
self.run = self._run_stdout
|
|
|
|
else:
|
|
|
|
self._filename = self._filename_custom
|
|
|
|
self._filename_fmt = formatter.parse(filename).format_map
|
2020-11-20 22:28:01 +01:00
|
|
|
elif extfmt:
|
|
|
|
self._filename = self._filename_extfmt
|
2021-09-27 02:37:04 +02:00
|
|
|
self._extension_fmt = formatter.parse(extfmt).format_map
|
2019-11-29 23:12:22 +01:00
|
|
|
else:
|
|
|
|
self.extension = options.get("extension", ext)
|
2019-01-05 16:39:05 +01:00
|
|
|
|
2020-11-20 22:28:01 +01:00
|
|
|
events = options.get("event")
|
|
|
|
if events is None:
|
|
|
|
events = ("file",)
|
|
|
|
elif isinstance(events, str):
|
|
|
|
events = events.split(",")
|
2021-06-04 18:08:08 +02:00
|
|
|
job.register_hooks({event: self.run for event in events}, options)
|
2019-12-16 17:19:23 +01:00
|
|
|
|
2023-02-01 13:57:16 +01:00
|
|
|
self._init_archive(job, options, "_MD_")
|
2022-02-22 23:02:13 +01:00
|
|
|
self.mtime = options.get("mtime")
|
2022-11-07 15:46:35 +01:00
|
|
|
self.omode = options.get("open", omode)
|
2022-11-07 15:37:22 +01:00
|
|
|
self.encoding = options.get("encoding", "utf-8")
|
2022-11-20 15:27:36 +01:00
|
|
|
self.private = options.get("private", False)
|
2023-03-17 23:16:52 +01:00
|
|
|
self.skip = options.get("skip", False)
|
2022-02-22 23:02:13 +01:00
|
|
|
|
2019-01-05 16:39:05 +01:00
|
|
|
def run(self, pathfmt):
|
2022-03-20 21:16:46 +01:00
|
|
|
archive = self.archive
|
|
|
|
if archive and archive.check(pathfmt.kwdict):
|
|
|
|
return
|
|
|
|
|
2020-11-20 22:28:01 +01:00
|
|
|
directory = self._directory(pathfmt)
|
|
|
|
path = directory + self._filename(pathfmt)
|
|
|
|
|
2023-03-17 23:16:52 +01:00
|
|
|
if self.skip and os.path.exists(path):
|
|
|
|
return
|
|
|
|
|
2020-11-20 22:28:01 +01:00
|
|
|
try:
|
2022-11-07 15:37:22 +01:00
|
|
|
with open(path, self.omode, encoding=self.encoding) as fp:
|
2020-11-20 22:28:01 +01:00
|
|
|
self.write(fp, pathfmt.kwdict)
|
|
|
|
except FileNotFoundError:
|
2022-08-14 17:42:42 +02:00
|
|
|
os.makedirs(directory, exist_ok=True)
|
2022-11-07 15:37:22 +01:00
|
|
|
with open(path, self.omode, encoding=self.encoding) as fp:
|
2020-11-20 22:28:01 +01:00
|
|
|
self.write(fp, pathfmt.kwdict)
|
2019-01-17 21:18:12 +01:00
|
|
|
|
2022-03-20 21:16:46 +01:00
|
|
|
if archive:
|
|
|
|
archive.add(pathfmt.kwdict)
|
|
|
|
|
2022-02-22 23:02:13 +01:00
|
|
|
if self.mtime:
|
|
|
|
mtime = pathfmt.kwdict.get("_mtime")
|
|
|
|
if mtime:
|
|
|
|
util.set_mtime(path, mtime)
|
|
|
|
|
2022-05-30 21:15:16 +02:00
|
|
|
def _run_stdout(self, pathfmt):
|
|
|
|
self.write(sys.stdout, pathfmt.kwdict)
|
|
|
|
|
2022-07-19 12:24:26 +02:00
|
|
|
def _run_modify(self, pathfmt):
|
|
|
|
kwdict = pathfmt.kwdict
|
|
|
|
for key, func in self.fields.items():
|
|
|
|
obj = kwdict
|
|
|
|
try:
|
2023-04-25 14:17:25 +02:00
|
|
|
if "[" in key:
|
|
|
|
obj, key = _traverse(obj, key)
|
2022-07-19 12:24:26 +02:00
|
|
|
obj[key] = func(kwdict)
|
|
|
|
except Exception:
|
|
|
|
pass
|
|
|
|
|
2022-07-19 00:57:29 +02:00
|
|
|
def _run_delete(self, pathfmt):
|
|
|
|
kwdict = pathfmt.kwdict
|
|
|
|
for key in self.fields:
|
|
|
|
obj = kwdict
|
|
|
|
try:
|
2023-04-25 14:17:25 +02:00
|
|
|
if "[" in key:
|
|
|
|
obj, key = _traverse(obj, key)
|
2022-07-19 00:57:29 +02:00
|
|
|
del obj[key]
|
|
|
|
except Exception:
|
|
|
|
pass
|
|
|
|
|
2020-01-02 20:58:10 +01:00
|
|
|
def _directory(self, pathfmt):
|
|
|
|
return pathfmt.realdirectory
|
|
|
|
|
|
|
|
def _directory_custom(self, pathfmt):
|
2020-11-20 22:28:01 +01:00
|
|
|
return os.path.join(pathfmt.realdirectory, self._metadir)
|
2020-01-02 20:58:10 +01:00
|
|
|
|
|
|
|
def _filename(self, pathfmt):
|
2020-11-20 22:28:01 +01:00
|
|
|
return (pathfmt.filename or "metadata") + "." + self.extension
|
2019-11-29 23:12:22 +01:00
|
|
|
|
2020-01-02 20:58:10 +01:00
|
|
|
def _filename_custom(self, pathfmt):
|
2021-01-10 00:10:47 +01:00
|
|
|
return pathfmt.clean_path(pathfmt.clean_segment(
|
|
|
|
self._filename_fmt(pathfmt.kwdict)))
|
2020-11-20 22:28:01 +01:00
|
|
|
|
|
|
|
def _filename_extfmt(self, pathfmt):
|
2019-11-29 23:12:22 +01:00
|
|
|
kwdict = pathfmt.kwdict
|
2021-02-10 18:05:46 +01:00
|
|
|
ext = kwdict.get("extension")
|
2019-11-29 23:12:22 +01:00
|
|
|
kwdict["extension"] = pathfmt.extension
|
2020-11-20 22:28:01 +01:00
|
|
|
kwdict["extension"] = pathfmt.prefix + self._extension_fmt(kwdict)
|
2021-06-20 20:11:32 +02:00
|
|
|
filename = pathfmt.build_filename(kwdict)
|
2019-11-29 23:12:22 +01:00
|
|
|
kwdict["extension"] = ext
|
2020-01-02 20:58:10 +01:00
|
|
|
return filename
|
2019-11-29 23:12:22 +01:00
|
|
|
|
2020-11-20 22:28:01 +01:00
|
|
|
def _write_custom(self, fp, kwdict):
|
|
|
|
fp.write(self._content_fmt(kwdict))
|
2019-01-17 21:18:12 +01:00
|
|
|
|
2020-11-20 22:28:01 +01:00
|
|
|
def _write_tags(self, fp, kwdict):
|
2019-08-12 21:40:37 +02:00
|
|
|
tags = kwdict.get("tags") or kwdict.get("tag_string")
|
2019-01-17 21:18:12 +01:00
|
|
|
|
|
|
|
if not tags:
|
|
|
|
return
|
|
|
|
|
2021-06-04 20:58:11 +02:00
|
|
|
if isinstance(tags, str):
|
2019-05-09 22:50:25 +02:00
|
|
|
taglist = tags.split(", ")
|
|
|
|
if len(taglist) < len(tags) / 16:
|
|
|
|
taglist = tags.split(" ")
|
2019-01-17 21:18:12 +01:00
|
|
|
tags = taglist
|
2021-06-04 20:58:11 +02:00
|
|
|
elif isinstance(tags, dict):
|
|
|
|
taglists = tags.values()
|
|
|
|
tags = []
|
|
|
|
extend = tags.extend
|
|
|
|
for taglist in taglists:
|
|
|
|
extend(taglist)
|
2021-06-05 14:49:14 +02:00
|
|
|
tags.sort()
|
2023-02-19 07:09:05 +01:00
|
|
|
elif all(isinstance(e, dict) for e in tags):
|
|
|
|
taglists = tags
|
|
|
|
tags = []
|
|
|
|
extend = tags.extend
|
|
|
|
for tagdict in taglists:
|
2023-11-04 17:33:14 +01:00
|
|
|
extend([x for x in tagdict.values() if isinstance(x, str)])
|
2023-02-19 07:09:05 +01:00
|
|
|
tags.sort()
|
2019-01-17 21:18:12 +01:00
|
|
|
|
2020-11-20 22:28:01 +01:00
|
|
|
fp.write("\n".join(tags) + "\n")
|
2019-01-17 21:18:12 +01:00
|
|
|
|
2020-11-20 22:28:01 +01:00
|
|
|
def _write_json(self, fp, kwdict):
|
2022-11-20 15:27:36 +01:00
|
|
|
if not self.private:
|
|
|
|
kwdict = util.filter_dict(kwdict)
|
2023-02-06 12:35:28 +01:00
|
|
|
fp.write(self._json_encode(kwdict) + "\n")
|
2019-01-05 16:39:05 +01:00
|
|
|
|
2023-02-07 18:28:14 +01:00
|
|
|
@staticmethod
|
|
|
|
def _make_encoder(options, indent=None):
|
|
|
|
return json.JSONEncoder(
|
|
|
|
ensure_ascii=options.get("ascii", False),
|
|
|
|
sort_keys=options.get("sort", False),
|
|
|
|
separators=options.get("separators"),
|
|
|
|
indent=options.get("indent", indent),
|
2023-11-02 15:23:28 +01:00
|
|
|
check_circular=False,
|
|
|
|
default=util.json_default,
|
2023-02-07 18:28:14 +01:00
|
|
|
)
|
|
|
|
|
2019-01-05 16:39:05 +01:00
|
|
|
|
2023-04-25 14:17:25 +02:00
|
|
|
def _traverse(obj, key):
|
|
|
|
name, _, key = key.partition("[")
|
|
|
|
obj = obj[name]
|
|
|
|
|
|
|
|
while "[" in key:
|
|
|
|
name, _, key = key.partition("[")
|
2023-04-25 14:30:18 +02:00
|
|
|
obj = obj[name.strip("\"']")]
|
2023-04-25 14:17:25 +02:00
|
|
|
|
2023-04-25 14:30:18 +02:00
|
|
|
return obj, key.strip("\"']")
|
2023-04-25 14:17:25 +02:00
|
|
|
|
|
|
|
|
2019-01-05 16:39:05 +01:00
|
|
|
__postprocessor__ = MetadataPP
|