From e63fc1bed423e7b84e257000d7d25bb812d37685 Mon Sep 17 00:00:00 2001 From: epitron Date: Thu, 2 Jan 2014 07:47:28 -0500 Subject: [PATCH] Added '--xattrs' option which writes metadata to the file's extended attributes using a youtube-dl postprocessor. Works on Linux, OSX, and Windows. --- README.md | 4 +- youtube_dl/PostProcessor.py | 120 ++++++++++++++++++++++++++++++++++++ youtube_dl/__init__.py | 8 ++- youtube_dl/utils.py | 9 +++ 4 files changed, 139 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index caed94846..5fa0103df 100644 --- a/README.md +++ b/README.md @@ -189,7 +189,9 @@ ## Post-processing Options: processed files are overwritten by default --embed-subs embed subtitles in the video (only for mp4 videos) - --add-metadata add metadata to the files + --add-metadata write metadata to the video file + --xattrs write metadata to the video file's xattrs (using + dublin core and xdg standards) # CONFIGURATION diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index 69aedf87a..da95f1a87 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -62,6 +62,7 @@ class FFmpegPostProcessorError(PostProcessingError): class AudioConversionError(PostProcessingError): pass + class FFmpegPostProcessor(PostProcessor): def __init__(self,downloader=None): PostProcessor.__init__(self, downloader) @@ -107,6 +108,7 @@ def _ffmpeg_filename_argument(self, fn): return u'./' + fn return fn + class FFmpegExtractAudioPP(FFmpegPostProcessor): def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): FFmpegPostProcessor.__init__(self, downloader) @@ -232,6 +234,7 @@ def run(self, information): information['filepath'] = new_path return self._nopostoverwrites,information + class FFmpegVideoConvertor(FFmpegPostProcessor): def __init__(self, downloader=None,preferedformat=None): super(FFmpegVideoConvertor, self).__init__(downloader) @@ -509,3 +512,120 @@ def run(self, info): os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) return True, info + + +class XAttrMetadataPP(PostProcessor): + + # + # More info about extended attributes for media: + # http://freedesktop.org/wiki/CommonExtendedAttributes/ + # http://www.freedesktop.org/wiki/PhreedomDraft/ + # http://dublincore.org/documents/usageguide/elements.shtml + # + # TODO: + # * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated) + # * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution' + # + + def run(self, info): + """ Set extended attributes on downloaded file (if xattr support is found). """ + + from .utils import hyphenate_date + + # This mess below finds the best xattr tool for the job and creates a + # "write_xattr" function. + try: + # try the pyxattr module... + import xattr + def write_xattr(path, key, value): + return xattr.setxattr(path, key, value) + + except ImportError: + + if os.name == 'posix': + def which(bin): + for dir in os.environ["PATH"].split(":"): + path = os.path.join(dir, bin) + if os.path.exists(path): + return path + + user_has_setfattr = which("setfattr") + user_has_xattr = which("xattr") + + if user_has_setfattr or user_has_xattr: + + def write_xattr(path, key, value): + import errno + potential_errors = { + # setfattr: /tmp/blah: Operation not supported + "Operation not supported": errno.EOPNOTSUPP, + # setfattr: ~/blah: No such file or directory + # xattr: No such file: ~/blah + "No such file": errno.ENOENT, + } + + if user_has_setfattr: + cmd = ['setfattr', '-n', key, '-v', value, path] + elif user_has_xattr: + cmd = ['xattr', '-w', key, value, path] + + try: + output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + errorstr = e.output.strip().decode() + for potential_errorstr, potential_errno in potential_errors.items(): + if errorstr.find(potential_errorstr) > -1: + e = OSError(potential_errno, potential_errorstr) + e.__cause__ = None + raise e + raise # Reraise unhandled error + + else: + # On Unix, and can't find pyxattr, setfattr, or xattr. + if sys.platform.startswith('linux'): + self._downloader.report_error("Couldn't find a tool to set the xattrs. Install either the python 'pyxattr' or 'xattr' modules, or the GNU 'attr' package (which contains the 'setfattr' tool).") + elif sys.platform == 'darwin': + self._downloader.report_error("Couldn't find a tool to set the xattrs. Install either the python 'xattr' module, or the 'xattr' binary.") + else: + # Write xattrs to NTFS Alternate Data Streams: http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 + def write_xattr(path, key, value): + assert(key.find(":") < 0) + assert(path.find(":") < 0) + assert(os.path.exists(path)) + + f = open(path+":"+key, "w") + f.write(value) + f.close() + + # Write the metadata to the file's xattrs + self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs...') + + filename = info['filepath'] + + try: + xattr_mapping = { + 'user.xdg.referrer.url': 'webpage_url', + # 'user.xdg.comment': 'description', + 'user.dublincore.title': 'title', + 'user.dublincore.date': 'upload_date', + 'user.dublincore.description': 'description', + 'user.dublincore.contributor': 'uploader', + 'user.dublincore.format': 'format', + } + + for xattrname, infoname in xattr_mapping.items(): + + value = info.get(infoname) + + if value: + if infoname == "upload_date": + value = hyphenate_date(value) + + write_xattr(filename, xattrname, value) + + return True, info + + except OSError: + self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)") + return False, info + diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 63437301b..03f98f504 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -38,6 +38,7 @@ 'Takuya Tsuchida', 'Sergey M.', 'Michael Orlitzky', + 'Chris Gahan', ) __license__ = 'Public Domain' @@ -78,6 +79,7 @@ FFmpegVideoConvertor, FFmpegExtractAudioPP, FFmpegEmbedSubtitlePP, + XAttrMetadataPP, ) @@ -412,7 +414,9 @@ def _hide_login_info(opts): postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, help='embed subtitles in the video (only for mp4 videos)') postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False, - help='add metadata to the files') + help='write metadata to the video file') + postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False, + help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)') parser.add_option_group(general) @@ -709,6 +713,8 @@ def _real_main(argv=None): ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) if opts.embedsubtitles: ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat)) + if opts.xattrs: + ydl.add_post_processor(XAttrMetadataPP()) # Update version if opts.update_self: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2e48f187e..20ebea38c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -809,6 +809,15 @@ def date_from_str(date_str): return today + delta return datetime.datetime.strptime(date_str, "%Y%m%d").date() +def hyphenate_date(date_str): + """ + Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" + match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str) + if match is not None: + return '-'.join(match.groups()) + else: + return date_str + class DateRange(object): """Represents a time interval between two dates""" def __init__(self, start=None, end=None):