1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-09-05 02:19:36 +02:00

Allow running some postprocessors before actual download

This commit is contained in:
pukkandan 2021-04-11 03:48:07 +05:30
parent f4f751af40
commit 56d868dbb7
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698
4 changed files with 54 additions and 60 deletions

View File

@ -291,10 +291,9 @@ class YoutubeDL(object):
postprocessors: A list of dictionaries, each with an entry postprocessors: A list of dictionaries, each with an entry
* key: The name of the postprocessor. See * key: The name of the postprocessor. See
yt_dlp/postprocessor/__init__.py for a list. yt_dlp/postprocessor/__init__.py for a list.
* _after_move: Optional. If True, run this post_processor * when: When to run the postprocessor. Can be one of
after 'MoveFilesAfterDownload' pre_process|before_dl|post_process|after_move.
as well as any further keyword arguments for the Assumed to be 'post_process' if not given
postprocessor.
post_hooks: A list of functions that get called as the final step post_hooks: A list of functions that get called as the final step
for each video file, after all postprocessors have been for each video file, after all postprocessors have been
called. The filename will be passed as the only argument. called. The filename will be passed as the only argument.
@ -423,7 +422,7 @@ class YoutubeDL(object):
params = None params = None
_ies = [] _ies = []
_pps = {'beforedl': [], 'aftermove': [], 'normal': []} _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
__prepare_filename_warned = False __prepare_filename_warned = False
_first_webpage_request = True _first_webpage_request = True
_download_retcode = None _download_retcode = None
@ -438,7 +437,7 @@ def __init__(self, params=None, auto_init=True):
params = {} params = {}
self._ies = [] self._ies = []
self._ies_instances = {} self._ies_instances = {}
self._pps = {'beforedl': [], 'aftermove': [], 'normal': []} self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
self.__prepare_filename_warned = False self.__prepare_filename_warned = False
self._first_webpage_request = True self._first_webpage_request = True
self._post_hooks = [] self._post_hooks = []
@ -551,7 +550,7 @@ def check_deprecated(param, option, suggestion):
when = pp_def['when'] when = pp_def['when']
del pp_def['when'] del pp_def['when']
else: else:
when = 'normal' when = 'post_process'
pp = pp_class(self, **compat_kwargs(pp_def)) pp = pp_class(self, **compat_kwargs(pp_def))
self.add_post_processor(pp, when=when) self.add_post_processor(pp, when=when)
@ -605,7 +604,7 @@ def add_default_info_extractors(self):
for ie in gen_extractor_classes(): for ie in gen_extractor_classes():
self.add_info_extractor(ie) self.add_info_extractor(ie)
def add_post_processor(self, pp, when='normal'): def add_post_processor(self, pp, when='post_process'):
"""Add a PostProcessor object to the end of the chain.""" """Add a PostProcessor object to the end of the chain."""
self._pps[when].append(pp) self._pps[when].append(pp)
pp.set_downloader(self) pp.set_downloader(self)
@ -2114,13 +2113,12 @@ def process_info(self, info_dict):
self.post_extract(info_dict) self.post_extract(info_dict)
self._num_downloads += 1 self._num_downloads += 1
info_dict = self.pre_process(info_dict) info_dict, _ = self.pre_process(info_dict)
# info_dict['_filename'] needs to be set for backward compatibility # info_dict['_filename'] needs to be set for backward compatibility
info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True) info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
temp_filename = self.prepare_filename(info_dict, 'temp') temp_filename = self.prepare_filename(info_dict, 'temp')
files_to_move = {} files_to_move = {}
skip_dl = self.params.get('skip_download', False)
# Forced printings # Forced printings
self.__forced_printings(info_dict, full_filename, incomplete=False) self.__forced_printings(info_dict, full_filename, incomplete=False)
@ -2197,11 +2195,9 @@ def dl(name, info, subtitle=False):
# ie = self.get_info_extractor(info_dict['extractor_key']) # ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items(): for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext'] sub_format = sub_info['ext']
sub_fn = self.prepare_filename(info_dict, 'subtitle') sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
sub_filename = subtitles_filename( sub_filename_final = subtitles_filename(
temp_filename if not skip_dl else sub_fn, self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
sub_lang, sub_format, info_dict.get('ext'))
sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)): if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format)) self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
sub_info['filepath'] = sub_filename sub_info['filepath'] = sub_filename
@ -2229,28 +2225,6 @@ def dl(name, info, subtitle=False):
(sub_lang, error_to_compat_str(err))) (sub_lang, error_to_compat_str(err)))
continue continue
if skip_dl:
if self.params.get('convertsubtitles', False):
# subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
filename_real_ext = os.path.splitext(full_filename)[1][1:]
filename_wo_ext = (
os.path.splitext(full_filename)[0]
if filename_real_ext == info_dict['ext']
else full_filename)
afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
# if subconv.available:
# info_dict['__postprocessors'].append(subconv)
if os.path.exists(encodeFilename(afilename)):
self.to_screen(
'[download] %s has already been downloaded and '
'converted' % afilename)
else:
try:
self.post_process(full_filename, info_dict, files_to_move)
except PostProcessingError as err:
self.report_error('Postprocessing: %s' % str(err))
return
if self.params.get('writeinfojson', False): if self.params.get('writeinfojson', False):
infofn = self.prepare_filename(info_dict, 'infojson') infofn = self.prepare_filename(info_dict, 'infojson')
if not self._ensure_dir_exists(encodeFilename(infofn)): if not self._ensure_dir_exists(encodeFilename(infofn)):
@ -2266,11 +2240,10 @@ def dl(name, info, subtitle=False):
return return
info_dict['__infojson_filename'] = infofn info_dict['__infojson_filename'] = infofn
thumbfn = self.prepare_filename(info_dict, 'thumbnail') for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
thumb_fn_temp = temp_filename if not skip_dl else thumbfn thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp): thumb_filename = replace_extension(
thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext')) self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
files_to_move[thumb_filename_temp] = thumb_filename files_to_move[thumb_filename_temp] = thumb_filename
# Write internet shortcut files # Write internet shortcut files
@ -2322,9 +2295,20 @@ def _write_link_file(extension, template, newline, embed_filename):
if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True): if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
return return
# Download try:
info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
except PostProcessingError as err:
self.report_error('Preprocessing: %s' % str(err))
return
must_record_download_archive = False must_record_download_archive = False
if not skip_dl: if self.params.get('skip_download', False):
info_dict['filepath'] = temp_filename
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
info_dict['__files_to_move'] = files_to_move
info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
else:
# Download
try: try:
def existing_file(*filepaths): def existing_file(*filepaths):
@ -2633,11 +2617,12 @@ def actual_post_extract(info_dict):
actual_post_extract(info_dict or {}) actual_post_extract(info_dict or {})
def pre_process(self, ie_info): def pre_process(self, ie_info, key='pre_process', files_to_move=None):
info = dict(ie_info) info = dict(ie_info)
for pp in self._pps['beforedl']: info['__files_to_move'] = files_to_move or {}
for pp in self._pps[key]:
info = self.run_pp(pp, info) info = self.run_pp(pp, info)
return info return info, info.pop('__files_to_move', None)
def post_process(self, filename, ie_info, files_to_move=None): def post_process(self, filename, ie_info, files_to_move=None):
"""Run all the postprocessors on the given file.""" """Run all the postprocessors on the given file."""
@ -2645,11 +2630,11 @@ def post_process(self, filename, ie_info, files_to_move=None):
info['filepath'] = filename info['filepath'] = filename
info['__files_to_move'] = files_to_move or {} info['__files_to_move'] = files_to_move or {}
for pp in ie_info.get('__postprocessors', []) + self._pps['normal']: for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
info = self.run_pp(pp, info) info = self.run_pp(pp, info)
info = self.run_pp(MoveFilesAfterDownloadPP(self), info) info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
del info['__files_to_move'] del info['__files_to_move']
for pp in self._pps['aftermove']: for pp in self._pps['after_move']:
info = self.run_pp(pp, info) info = self.run_pp(pp, info)
return info return info

View File

@ -228,7 +228,7 @@ def parse_retries(retries, name=''):
if not re.match(remux_regex, opts.remuxvideo): if not re.match(remux_regex, opts.remuxvideo):
parser.error('invalid video remux format specified') parser.error('invalid video remux format specified')
if opts.convertsubtitles is not None: if opts.convertsubtitles is not None:
if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']: if opts.convertsubtitles not in ('srt', 'vtt', 'ass', 'lrc'):
parser.error('invalid subtitle format specified') parser.error('invalid subtitle format specified')
if opts.date is not None: if opts.date is not None:
@ -322,7 +322,15 @@ def report_conflict(arg1, arg2):
postprocessors.append({ postprocessors.append({
'key': 'MetadataFromField', 'key': 'MetadataFromField',
'formats': opts.metafromfield, 'formats': opts.metafromfield,
'when': 'beforedl' # Run this immediately after extraction is complete
'when': 'pre_process'
})
if opts.convertsubtitles:
postprocessors.append({
'key': 'FFmpegSubtitlesConvertor',
'format': opts.convertsubtitles,
# Run this before the actual video download
'when': 'before_dl'
}) })
if opts.extractaudio: if opts.extractaudio:
postprocessors.append({ postprocessors.append({
@ -351,15 +359,11 @@ def report_conflict(arg1, arg2):
# so metadata can be added here. # so metadata can be added here.
if opts.addmetadata: if opts.addmetadata:
postprocessors.append({'key': 'FFmpegMetadata'}) postprocessors.append({'key': 'FFmpegMetadata'})
if opts.convertsubtitles:
postprocessors.append({
'key': 'FFmpegSubtitlesConvertor',
'format': opts.convertsubtitles,
})
if opts.embedsubtitles: if opts.embedsubtitles:
already_have_subtitle = opts.writesubtitles already_have_subtitle = opts.writesubtitles
postprocessors.append({ postprocessors.append({
'key': 'FFmpegEmbedSubtitle', 'key': 'FFmpegEmbedSubtitle',
# already_have_subtitle = True prevents the file from being deleted after embedding
'already_have_subtitle': already_have_subtitle 'already_have_subtitle': already_have_subtitle
}) })
if not already_have_subtitle: if not already_have_subtitle:
@ -385,6 +389,7 @@ def report_conflict(arg1, arg2):
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
postprocessors.append({ postprocessors.append({
'key': 'EmbedThumbnail', 'key': 'EmbedThumbnail',
# already_have_thumbnail = True prevents the file from being deleted after embedding
'already_have_thumbnail': already_have_thumbnail 'already_have_thumbnail': already_have_thumbnail
}) })
if not already_have_thumbnail: if not already_have_thumbnail:
@ -399,7 +404,8 @@ def report_conflict(arg1, arg2):
postprocessors.append({ postprocessors.append({
'key': 'ExecAfterDownload', 'key': 'ExecAfterDownload',
'exec_cmd': opts.exec_cmd, 'exec_cmd': opts.exec_cmd,
'when': 'aftermove' # Run this only after the files have been moved to their final locations
'when': 'after_move'
}) })
def report_args_compat(arg, name): def report_args_compat(arg, name):
@ -425,7 +431,6 @@ def report_args_compat(arg, name):
else match_filter_func(opts.match_filter)) else match_filter_func(opts.match_filter))
ydl_opts = { ydl_opts = {
'convertsubtitles': opts.convertsubtitles,
'usenetrc': opts.usenetrc, 'usenetrc': opts.usenetrc,
'username': opts.username, 'username': opts.username,
'password': opts.password, 'password': opts.password,

View File

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import os import os
import subprocess import subprocess
import struct import struct

View File

@ -13,6 +13,10 @@
class MoveFilesAfterDownloadPP(PostProcessor): class MoveFilesAfterDownloadPP(PostProcessor):
def __init__(self, downloader=None, downloaded=True):
PostProcessor.__init__(self, downloader)
self._downloaded = downloaded
@classmethod @classmethod
def pp_key(cls): def pp_key(cls):
return 'MoveFiles' return 'MoveFiles'
@ -21,7 +25,8 @@ def run(self, info):
dl_path, dl_name = os.path.split(encodeFilename(info['filepath'])) dl_path, dl_name = os.path.split(encodeFilename(info['filepath']))
finaldir = info.get('__finaldir', dl_path) finaldir = info.get('__finaldir', dl_path)
finalpath = os.path.join(finaldir, dl_name) finalpath = os.path.join(finaldir, dl_name)
info['__files_to_move'][info['filepath']] = decodeFilename(finalpath) if self._downloaded:
info['__files_to_move'][info['filepath']] = decodeFilename(finalpath)
make_newfilename = lambda old: decodeFilename(os.path.join(finaldir, os.path.basename(encodeFilename(old)))) make_newfilename = lambda old: decodeFilename(os.path.join(finaldir, os.path.basename(encodeFilename(old))))
for oldfile, newfile in info['__files_to_move'].items(): for oldfile, newfile in info['__files_to_move'].items():