1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-11-02 01:02:48 +01:00

Sponskrub integration

This commit is contained in:
pukkandan 2020-11-15 05:58:41 +05:30
parent 76d321f68f
commit a9e7f54670
7 changed files with 144 additions and 4 deletions

View File

@ -523,6 +523,19 @@ ## Post-processing Options:
--convert-subs FORMAT Convert the subtitles to other format --convert-subs FORMAT Convert the subtitles to other format
(currently supported: srt|ass|vtt|lrc) (currently supported: srt|ass|vtt|lrc)
## SponSkrub Options (SponsorBlock)
--sponskrub Use sponskrub to mark sponsored sections
with the data available in SponsorBlock API
(Youtube only)
--sponskrub-cut Cut out the sponsor sections instead of
simply marking them
--sponskrub-force Run sponskrub even if the video was
already downloaded. Use with caution
--sponskrub-location Location of the sponskrub binary;
either the path to the binary or its
containing directory
--sponskrub-args Give these arguments to sponskrub
## Extractor Options: ## Extractor Options:
--ignore-dynamic-mpd Do not process dynamic DASH manifests --ignore-dynamic-mpd Do not process dynamic DASH manifests

View File

@ -2110,13 +2110,16 @@ def compatible_formats(formats):
if not ensure_dir_exists(fname): if not ensure_dir_exists(fname):
return return
downloaded.append(fname) downloaded.append(fname)
partial_success = dl(fname, new_info) partial_success, real_download = dl(fname, new_info)
success = success and partial_success success = success and partial_success
info_dict['__postprocessors'] = postprocessors info_dict['__postprocessors'] = postprocessors
info_dict['__files_to_merge'] = downloaded info_dict['__files_to_merge'] = downloaded
# Even if there were no downloads, it is being merged only now
info_dict['__real_download'] = True
else: else:
# Just a single file # Just a single file
success = dl(filename, info_dict) success, real_download = dl(filename, info_dict)
info_dict['__real_download'] = real_download
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_error('unable to download video data: %s' % error_to_compat_str(err)) self.report_error('unable to download video data: %s' % error_to_compat_str(err))
return return

View File

@ -310,6 +310,17 @@ def parse_retries(retries):
# contents # contents
if opts.xattrs: if opts.xattrs:
postprocessors.append({'key': 'XAttrMetadata'}) postprocessors.append({'key': 'XAttrMetadata'})
# This should be below all ffmpeg PP because it may cut parts out from the video
# If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found
if opts.sponskrub is not False:
postprocessors.append({
'key': 'SponSkrub',
'path': opts.sponskrub_path,
'args': opts.sponskrub_args,
'cut': opts.sponskrub_cut,
'force': opts.sponskrub_force,
'ignoreerror': opts.sponskrub is None,
})
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way. # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems. # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
if opts.exec_cmd: if opts.exec_cmd:

View File

@ -351,7 +351,7 @@ def download(self, filename, info_dict, subtitle=False):
'status': 'finished', 'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)), 'total_bytes': os.path.getsize(encodeFilename(filename)),
}) })
return True return True, False
if subtitle is False: if subtitle is False:
min_sleep_interval = self.params.get('sleep_interval') min_sleep_interval = self.params.get('sleep_interval')
@ -372,7 +372,7 @@ def download(self, filename, info_dict, subtitle=False):
'[download] Sleeping %s seconds...' % ( '[download] Sleeping %s seconds...' % (
sleep_interval_sub)) sleep_interval_sub))
time.sleep(sleep_interval_sub) time.sleep(sleep_interval_sub)
return self.real_download(filename, info_dict) return self.real_download(filename, info_dict), True
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
"""Real download process. Redefine in subclasses.""" """Real download process. Redefine in subclasses."""

View File

@ -946,6 +946,31 @@ def _comma_separated_values_options_callback(option, opt_str, value, parser):
metavar='FORMAT', dest='convertsubtitles', default=None, metavar='FORMAT', dest='convertsubtitles', default=None,
help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)') help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)')
extractor = optparse.OptionGroup(parser, 'SponSkrub Options (SponsorBlock)')
extractor.add_option(
'--sponskrub',
action='store_true', dest='sponskrub', default=None,
help='Use sponskrub to mark sponsored sections with the data available in SponsorBlock API (Youtube only)')
extractor.add_option(
'--no-sponskrub',
action='store_false', dest='sponskrub',
help=optparse.SUPPRESS_HELP)
extractor.add_option(
'--sponskrub-cut', default=False,
action='store_true', dest='sponskrub_cut',
help='Cut out the sponsor sections instead of simply marking them')
extractor.add_option(
'--sponskrub-force', default=False,
action='store_true', dest='sponskrub_force',
help='Run sponskrub even if the video was already downloaded')
extractor.add_option(
'--sponskrub-location', metavar='PATH',
dest='sponskrub_path', default='',
help='Location of the sponskrub binary; either the path to the binary or its containing directory.')
extractor.add_option(
'--sponskrub-args', dest='sponskrub_args',
help='Give these arguments to sponskrub')
extractor = optparse.OptionGroup(parser, 'Extractor Options') extractor = optparse.OptionGroup(parser, 'Extractor Options')
extractor.add_option( extractor.add_option(
'--allow-dynamic-mpd', '--allow-dynamic-mpd',

View File

@ -17,6 +17,7 @@
from .xattrpp import XAttrMetadataPP from .xattrpp import XAttrMetadataPP
from .execafterdownload import ExecAfterDownloadPP from .execafterdownload import ExecAfterDownloadPP
from .metadatafromtitle import MetadataFromTitlePP from .metadatafromtitle import MetadataFromTitlePP
from .sponskrub import SponSkrubPP
def get_postprocessor(key): def get_postprocessor(key):
@ -38,5 +39,6 @@ def get_postprocessor(key):
'FFmpegVideoConvertorPP', 'FFmpegVideoConvertorPP',
'FFmpegVideoRemuxerPP', 'FFmpegVideoRemuxerPP',
'MetadataFromTitlePP', 'MetadataFromTitlePP',
'SponSkrubPP',
'XAttrMetadataPP', 'XAttrMetadataPP',
] ]

View File

@ -0,0 +1,86 @@
from __future__ import unicode_literals
import os
import subprocess
from .common import PostProcessor
from ..compat import compat_shlex_split
from ..utils import (
check_executable,
encodeArgument,
shell_quote,
PostProcessingError,
)
class SponSkrubPP(PostProcessor):
_temp_ext = 'spons'
_def_args = []
_exe_name = 'sponskrub'
def __init__(self, downloader, path='', args=None, ignoreerror=False, cut=False, force=False):
PostProcessor.__init__(self, downloader)
self.force = force
self.cutout = cut
self.args = ['-chapter'] if not cut else []
self.args += self._def_args if args is None else compat_shlex_split(args)
self.path = self.get_exe(path)
if not ignoreerror and self.path is None:
if path:
raise PostProcessingError('sponskrub not found in "%s"' % path)
else:
raise PostProcessingError('sponskrub not found. Please install or provide the path using --sponskrub-path.')
def get_exe(self, path=''):
if not path or not check_executable(path, ['-h']):
path = os.path.join(path, self._exe_name)
if not check_executable(path, ['-h']):
return None
return path
def run(self, information):
if self.path is None:
return [], information
if information['extractor_key'].lower() != 'youtube':
self._downloader.to_screen('[sponskrub] Skipping sponskrub since it is not a YouTube video')
return [], information
if self.cutout and not self.force and not information.get('__real_download', False):
self._downloader.to_screen(
'[sponskrub] Skipping sponskrub since the video was already downloaded. '
'Use --sponskrub-force to run sponskrub anyway')
return [], information
self._downloader.to_screen('[sponskrub] Trying to %s sponsor sections' % ('remove' if self.cutout else 'mark'))
if self.cutout:
self._downloader.to_screen('WARNING: Cutting out sponsor segments will cause the subtitles to go out of sync.')
if not information.get('__real_download', False):
self._downloader.to_screen('WARNING: If sponskrub is run multiple times, unintended parts of the video could be cut out.')
filename = information['filepath']
temp_filename = filename + '.' + self._temp_ext + os.path.splitext(filename)[1]
if os.path.exists(temp_filename):
os.remove(temp_filename)
cmd = [self.path]
if self.args:
cmd += self.args
cmd += ['--', information['id'], filename, temp_filename]
cmd = [encodeArgument(i) for i in cmd]
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] sponskrub command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode == 0:
os.remove(filename)
os.rename(temp_filename, filename)
self._downloader.to_screen('[sponskrub] Sponsor sections have been %s' % ('removed' if self.cutout else 'marked'))
elif p.returncode != 3: # error code 3 means there was no info about the video
stderr = stderr.decode('utf-8', 'replace')
msg = stderr.strip().split('\n')[-1]
raise PostProcessingError(msg if msg else 'sponskrub failed with error code %s!' % p.returncode)
else:
self._downloader.to_screen('[sponskrub] No segments in the SponsorBlock database')
return [], information