mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-24 20:22:41 +01:00
Update whisper xxl help text
This commit is contained in:
parent
e7f38e60e0
commit
c46f838723
@ -544,10 +544,10 @@ optional arguments:
|
|||||||
<data name="nikseTextBox1.Text" xml:space="preserve">
|
<data name="nikseTextBox1.Text" xml:space="preserve">
|
||||||
<value>[--device DEVICE]
|
<value>[--device DEVICE]
|
||||||
[--verbose VERBOSE]
|
[--verbose VERBOSE]
|
||||||
[--task {transcribe,translate}]
|
[--language_detection_threshold LANGUAGE_DETECTION_THRESHOLD]
|
||||||
[--best_of BEST_OF]
|
[--language_detection_segments LANGUAGE_DETECTION_SEGMENTS]
|
||||||
[--beam_size BEAM_SIZE]
|
[--temperature TEMPERATURE] [--best_of BEST_OF]
|
||||||
[--patience PATIENCE]
|
[--beam_size BEAM_SIZE] [--patience PATIENCE]
|
||||||
[--length_penalty LENGTH_PENALTY]
|
[--length_penalty LENGTH_PENALTY]
|
||||||
[--repetition_penalty REPETITION_PENALTY]
|
[--repetition_penalty REPETITION_PENALTY]
|
||||||
[--no_repeat_ngram_size NO_REPEAT_NGRAM_SIZE]
|
[--no_repeat_ngram_size NO_REPEAT_NGRAM_SIZE]
|
||||||
@ -572,8 +572,7 @@ optional arguments:
|
|||||||
[--highlight_words HIGHLIGHT_WORDS]
|
[--highlight_words HIGHLIGHT_WORDS]
|
||||||
[--prepend_punctuations PREPEND_PUNCTUATIONS]
|
[--prepend_punctuations PREPEND_PUNCTUATIONS]
|
||||||
[--append_punctuations APPEND_PUNCTUATIONS]
|
[--append_punctuations APPEND_PUNCTUATIONS]
|
||||||
[--threads THREADS]
|
[--threads THREADS] [--version]
|
||||||
[--version]
|
|
||||||
[--vad_filter VAD_FILTER]
|
[--vad_filter VAD_FILTER]
|
||||||
[--vad_threshold VAD_THRESHOLD]
|
[--vad_threshold VAD_THRESHOLD]
|
||||||
[--vad_min_speech_duration_ms VAD_MIN_SPEECH_DURATION_MS]
|
[--vad_min_speech_duration_ms VAD_MIN_SPEECH_DURATION_MS]
|
||||||
@ -581,24 +580,17 @@ optional arguments:
|
|||||||
[--vad_min_silence_duration_ms VAD_MIN_SILENCE_DURATION_MS]
|
[--vad_min_silence_duration_ms VAD_MIN_SILENCE_DURATION_MS]
|
||||||
[--vad_speech_pad_ms VAD_SPEECH_PAD_MS]
|
[--vad_speech_pad_ms VAD_SPEECH_PAD_MS]
|
||||||
[--vad_window_size_samples VAD_WINDOW_SIZE_SAMPLES]
|
[--vad_window_size_samples VAD_WINDOW_SIZE_SAMPLES]
|
||||||
[--vad_dump]
|
[--vad_method {silero_v3,silero_v4,pyannote_v3,pyannote_onnx_v3,auditok,webrtc}]
|
||||||
|
[--vad_dump] [--nullify_non_speech]
|
||||||
[--max_new_tokens MAX_NEW_TOKENS]
|
[--max_new_tokens MAX_NEW_TOKENS]
|
||||||
[--chunk_length CHUNK_LENGTH]
|
[--chunk_length CHUNK_LENGTH]
|
||||||
[--compute_type {default,auto,int8,int8_float16,int8_float32,int8_bfloat16,int16,float16,float32,bfloat16}]
|
[--compute_type {default,auto,int8,int8_float16,int8_float32,int8_bfloat16,int16,float16,float32,bfloat16}]
|
||||||
[--batch_recursive]
|
[--batch_recursive] [--beep_off] [--skip]
|
||||||
[--beep_off]
|
[--checkcuda] [--print_progress] [--postfix]
|
||||||
[--skip]
|
[--check_files] [--PR163_off]
|
||||||
[--checkcuda]
|
[--hallucinations_list_off] [--alt_writer_off]
|
||||||
[--print_progress]
|
[--one_word {0,1,2}] [--sentence] [--standard]
|
||||||
[--postfix]
|
[--standard_asia] [--max_comma MAX_COMMA]
|
||||||
[--check_files]
|
|
||||||
[--PR163_off]
|
|
||||||
[--hallucinations_list_off]
|
|
||||||
[--one_word {0,1,2}]
|
|
||||||
[--sentence]
|
|
||||||
[--standard]
|
|
||||||
[--standard_asia]
|
|
||||||
[--max_comma MAX_COMMA]
|
|
||||||
[--max_comma_cent {50,60,70,80,90,100}]
|
[--max_comma_cent {50,60,70,80,90,100}]
|
||||||
[--max_gap MAX_GAP]
|
[--max_gap MAX_GAP]
|
||||||
[--max_line_width MAX_LINE_WIDTH]
|
[--max_line_width MAX_LINE_WIDTH]
|
||||||
@ -606,243 +598,363 @@ optional arguments:
|
|||||||
[--min_dist_to_end {0,4,5,6,7,8,9,10,11,12}]
|
[--min_dist_to_end {0,4,5,6,7,8,9,10,11,12}]
|
||||||
[--prompt_max {16,32,64,128,223}]
|
[--prompt_max {16,32,64,128,223}]
|
||||||
[--reprompt {0,1,2}]
|
[--reprompt {0,1,2}]
|
||||||
[--prompt_reset_on_no_end {0,1,2}]
|
[--prompt_reset_on_no_end {0,1,2}] [--ff_dump]
|
||||||
[--ff_dump]
|
[--ff_track {1,2,3,4,5,6}] [--ff_fc] [--ff_mp3]
|
||||||
[--ff_track {1,2,3,4,5,6}]
|
[--ff_sync] [--ff_rnndn_sh] [--ff_rnndn_xiph]
|
||||||
[--ff_fc]
|
[--ff_fftdn [0 - 97]] [--ff_tempo [0.5 - 2.0]]
|
||||||
[--ff_mp3]
|
[--ff_gate] [--ff_speechnorm] [--ff_loudnorm]
|
||||||
[--ff_sync]
|
|
||||||
[--ff_rnndn_sh]
|
|
||||||
[--ff_rnndn_xiph]
|
|
||||||
[--ff_fftdn
|
|
||||||
[0 - 97]]
|
|
||||||
[--ff_tempo
|
|
||||||
[0.5 - 2.0]]
|
|
||||||
[--ff_gate]
|
|
||||||
[--ff_speechnorm]
|
|
||||||
[--ff_loudnorm]
|
|
||||||
[--ff_silence_suppress noise duration]
|
[--ff_silence_suppress noise duration]
|
||||||
[--ff_lowhighpass]
|
[--ff_lowhighpass] [--ff_mdx_kim2]
|
||||||
audio
|
[--mdx_chunk MDX_CHUNK]
|
||||||
[audio ...]
|
[--mdx_device MDX_DEVICE] [--diarize]
|
||||||
|
[--diarize_device DIARIZE_DEVICE]
|
||||||
|
[--diarize_threads DIARIZE_THREADS]
|
||||||
|
[--speaker SPEAKER]
|
||||||
|
audio [audio ...]
|
||||||
|
|
||||||
positional arguments:
|
positional arguments:
|
||||||
audio audio file(s). You can enter a file wildcard, filelist (txt. m3u, m3u8, lst) or directory to
|
audio audio file(s). You can enter a file wildcard, filelist
|
||||||
do batch processing. Note: non-media files in list or directory are filtered out by extension.
|
(txt. m3u, m3u8, lst) or directory to do batch
|
||||||
|
processing. Note: non-media files in list or directory
|
||||||
|
are filtered out by extension.
|
||||||
|
|
||||||
optional arguments:
|
optional arguments:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
--model MODEL, -m MODEL
|
--model MODEL, -m MODEL
|
||||||
name of the Whisper model to use (default: medium)
|
name of the Whisper model to use (default: medium)
|
||||||
--model_dir MODEL_DIR
|
--model_dir MODEL_DIR
|
||||||
the path to save model files; uses C:\git\subtitleedit\src\ui\bin\Debug\Whisper\Purfview-
|
the path to save model files; uses C:\git\subtitleedit
|
||||||
Whisper-Faster\_models by default (default: None)
|
\src\ui\bin\Debug\net48\Whisper\Purfview-Whisper-
|
||||||
|
Faster\_models by default (default: None)
|
||||||
--device DEVICE, -d DEVICE
|
--device DEVICE, -d DEVICE
|
||||||
Device to use. Default is 'cuda' if CUDA device is detected, else is 'cpu'. If CUDA GPU is a
|
Device to use. Default is 'cuda' if CUDA device is
|
||||||
second device then set 'cuda:1'. (default: cpu)
|
detected, else is 'cpu'. If CUDA GPU is a second
|
||||||
|
device then set 'cuda:1'. (default: cpu)
|
||||||
--output_dir OUTPUT_DIR, -o OUTPUT_DIR
|
--output_dir OUTPUT_DIR, -o OUTPUT_DIR
|
||||||
directory to save the outputs. By default the same folder where the executable file is or
|
directory to save the outputs. By default the same
|
||||||
where media file is if --batch_recursive=True. '.'- sets to the current folder. 'source' -
|
folder where the executable file is or where media
|
||||||
sets to where media file is. (default: default)
|
file is if --batch_recursive=True. '.'- sets to the
|
||||||
|
current folder. 'source' - sets to where media file
|
||||||
|
is. (default: default)
|
||||||
--output_format {lrc,txt,text,vtt,srt,tsv,json,all}, -f {lrc,txt,text,vtt,srt,tsv,json,all}
|
--output_format {lrc,txt,text,vtt,srt,tsv,json,all}, -f {lrc,txt,text,vtt,srt,tsv,json,all}
|
||||||
format of the output file; if not specified srt will be produced (default: srt)
|
format of the output file; if not specified srt will
|
||||||
|
be produced (default: srt)
|
||||||
--verbose VERBOSE, -v VERBOSE
|
--verbose VERBOSE, -v VERBOSE
|
||||||
whether to print out debug messages (default: False)
|
whether to print out debug messages (default: False)
|
||||||
--task {transcribe,translate}
|
--task {transcribe,translate}
|
||||||
whether to perform X->X speech recognition ('transcribe') or X->English translation
|
whether to perform X->X speech recognition
|
||||||
('translate') (default: transcribe)
|
('transcribe') or X->English translation ('translate')
|
||||||
|
(default: transcribe)
|
||||||
--language {af,am,ar,as,az,ba,be,bg,bn,bo,br,bs,ca,cs,cy,da,de,el,en,es,et,eu,fa,fi,fo,fr,gl,gu,ha,haw,he,hi,hr,ht,hu,hy,id,is,it,ja,jw,ka,kk,km,kn,ko,la,lb,ln,lo,lt,lv,mg,mi,mk,ml,mn,mr,ms,mt,my,ne,nl,nn,no,oc,pa,pl,ps,pt,ro,ru,sa,sd,si,sk,sl,sn,so,sq,sr,su,sv,sw,ta,te,tg,th,tk,tl,tr,tt,uk,ur,uz,vi,yi,yo,yue,zh,Afrikaans,Albanian,Amharic,Arabic,Armenian,Assamese,Azerbaijani,Bashkir,Basque,Belarusian,Bengali,Bosnian,Breton,Bulgarian,Burmese,Cantonese,Castilian,Catalan,Chinese,Croatian,Czech,Danish,Dutch,English,Estonian,Faroese,Finnish,Flemish,French,Galician,Georgian,German,Greek,Gujarati,Haitian,Haitian Creole,Hausa,Hawaiian,Hebrew,Hindi,Hungarian,Icelandic,Indonesian,Italian,Japanese,Javanese,Kannada,Kazakh,Khmer,Korean,Lao,Latin,Latvian,Letzeburgesch,Lingala,Lithuanian,Luxembourgish,Macedonian,Malagasy,Malay,Malayalam,Maltese,Mandarin,Maori,Marathi,Moldavian,Moldovan,Mongolian,Myanmar,Nepali,Norwegian,Nynorsk,Occitan,Panjabi,Pashto,Persian,Polish,Portuguese,Punjabi,Pushto,Romanian,Russian,Sanskrit,Serbian,Shona,Sindhi,Sinhala,Sinhalese,Slovak,Slovenian,Somali,Spanish,Sundanese,Swahili,Swedish,Tagalog,Tajik,Tamil,Tatar,Telugu,Thai,Tibetan,Turkish,Turkmen,Ukrainian,Urdu,Uzbek,Valencian,Vietnamese,Welsh,Yiddish,Yoruba}, -l {af,am,ar,as,az,ba,be,bg,bn,bo,br,bs,ca,cs,cy,da,de,el,en,es,et,eu,fa,fi,fo,fr,gl,gu,ha,haw,he,hi,hr,ht,hu,hy,id,is,it,ja,jw,ka,kk,km,kn,ko,la,lb,ln,lo,lt,lv,mg,mi,mk,ml,mn,mr,ms,mt,my,ne,nl,nn,no,oc,pa,pl,ps,pt,ro,ru,sa,sd,si,sk,sl,sn,so,sq,sr,su,sv,sw,ta,te,tg,th,tk,tl,tr,tt,uk,ur,uz,vi,yi,yo,yue,zh,Afrikaans,Albanian,Amharic,Arabic,Armenian,Assamese,Azerbaijani,Bashkir,Basque,Belarusian,Bengali,Bosnian,Breton,Bulgarian,Burmese,Cantonese,Castilian,Catalan,Chinese,Croatian,Czech,Danish,Dutch,English,Estonian,Faroese,Finnish,Flemish,French,Galician,Georgian,German,Greek,Gujarati,Haitian,Haitian Creole,Hausa,Hawaiian,Hebrew,Hindi,Hungarian,Icelandic,Indonesian,Italian,Japanese,Javanese,Kannada,Kazakh,Khmer,Korean,Lao,Latin,Latvian,Letzeburgesch,Lingala,Lithuanian,Luxembourgish,Macedonian,Malagasy,Malay,Malayalam,Maltese,Mandarin,Maori,Marathi,Moldavian,Moldovan,Mongolian,Myanmar,Nepali,Norwegian,Nynorsk,Occitan,Panjabi,Pashto,Persian,Polish,Portuguese,Punjabi,Pushto,Romanian,Russian,Sanskrit,Serbian,Shona,Sindhi,Sinhala,Sinhalese,Slovak,Slovenian,Somali,Spanish,Sundanese,Swahili,Swedish,Tagalog,Tajik,Tamil,Tatar,Telugu,Thai,Tibetan,Turkish,Turkmen,Ukrainian,Urdu,Uzbek,Valencian,Vietnamese,Welsh,Yiddish,Yoruba}
|
--language {af,am,ar,as,az,ba,be,bg,bn,bo,br,bs,ca,cs,cy,da,de,el,en,es,et,eu,fa,fi,fo,fr,gl,gu,ha,haw,he,hi,hr,ht,hu,hy,id,is,it,ja,jw,ka,kk,km,kn,ko,la,lb,ln,lo,lt,lv,mg,mi,mk,ml,mn,mr,ms,mt,my,ne,nl,nn,no,oc,pa,pl,ps,pt,ro,ru,sa,sd,si,sk,sl,sn,so,sq,sr,su,sv,sw,ta,te,tg,th,tk,tl,tr,tt,uk,ur,uz,vi,yi,yo,yue,zh,Afrikaans,Albanian,Amharic,Arabic,Armenian,Assamese,Azerbaijani,Bashkir,Basque,Belarusian,Bengali,Bosnian,Breton,Bulgarian,Burmese,Cantonese,Castilian,Catalan,Chinese,Croatian,Czech,Danish,Dutch,English,Estonian,Faroese,Finnish,Flemish,French,Galician,Georgian,German,Greek,Gujarati,Haitian,Haitian Creole,Hausa,Hawaiian,Hebrew,Hindi,Hungarian,Icelandic,Indonesian,Italian,Japanese,Javanese,Kannada,Kazakh,Khmer,Korean,Lao,Latin,Latvian,Letzeburgesch,Lingala,Lithuanian,Luxembourgish,Macedonian,Malagasy,Malay,Malayalam,Maltese,Mandarin,Maori,Marathi,Moldavian,Moldovan,Mongolian,Myanmar,Nepali,Norwegian,Nynorsk,Occitan,Panjabi,Pashto,Persian,Polish,Portuguese,Punjabi,Pushto,Romanian,Russian,Sanskrit,Serbian,Shona,Sindhi,Sinhala,Sinhalese,Slovak,Slovenian,Somali,Spanish,Sundanese,Swahili,Swedish,Tagalog,Tajik,Tamil,Tatar,Telugu,Thai,Tibetan,Turkish,Turkmen,Ukrainian,Urdu,Uzbek,Valencian,Vietnamese,Welsh,Yiddish,Yoruba}, -l {af,am,ar,as,az,ba,be,bg,bn,bo,br,bs,ca,cs,cy,da,de,el,en,es,et,eu,fa,fi,fo,fr,gl,gu,ha,haw,he,hi,hr,ht,hu,hy,id,is,it,ja,jw,ka,kk,km,kn,ko,la,lb,ln,lo,lt,lv,mg,mi,mk,ml,mn,mr,ms,mt,my,ne,nl,nn,no,oc,pa,pl,ps,pt,ro,ru,sa,sd,si,sk,sl,sn,so,sq,sr,su,sv,sw,ta,te,tg,th,tk,tl,tr,tt,uk,ur,uz,vi,yi,yo,yue,zh,Afrikaans,Albanian,Amharic,Arabic,Armenian,Assamese,Azerbaijani,Bashkir,Basque,Belarusian,Bengali,Bosnian,Breton,Bulgarian,Burmese,Cantonese,Castilian,Catalan,Chinese,Croatian,Czech,Danish,Dutch,English,Estonian,Faroese,Finnish,Flemish,French,Galician,Georgian,German,Greek,Gujarati,Haitian,Haitian Creole,Hausa,Hawaiian,Hebrew,Hindi,Hungarian,Icelandic,Indonesian,Italian,Japanese,Javanese,Kannada,Kazakh,Khmer,Korean,Lao,Latin,Latvian,Letzeburgesch,Lingala,Lithuanian,Luxembourgish,Macedonian,Malagasy,Malay,Malayalam,Maltese,Mandarin,Maori,Marathi,Moldavian,Moldovan,Mongolian,Myanmar,Nepali,Norwegian,Nynorsk,Occitan,Panjabi,Pashto,Persian,Polish,Portuguese,Punjabi,Pushto,Romanian,Russian,Sanskrit,Serbian,Shona,Sindhi,Sinhala,Sinhalese,Slovak,Slovenian,Somali,Spanish,Sundanese,Swahili,Swedish,Tagalog,Tajik,Tamil,Tatar,Telugu,Thai,Tibetan,Turkish,Turkmen,Ukrainian,Urdu,Uzbek,Valencian,Vietnamese,Welsh,Yiddish,Yoruba}
|
||||||
language spoken in the audio, specify None to perform language detection (default: None)
|
language spoken in the audio, specify None to perform
|
||||||
|
language detection (default: None)
|
||||||
--language_detection_threshold LANGUAGE_DETECTION_THRESHOLD
|
--language_detection_threshold LANGUAGE_DETECTION_THRESHOLD
|
||||||
If the maximum probability of the language tokens is higher than this value, the language is
|
If the maximum probability of the language tokens is
|
||||||
detected. (default: None)
|
higher than this value, the language is detected.
|
||||||
|
(default: None)
|
||||||
--language_detection_segments LANGUAGE_DETECTION_SEGMENTS
|
--language_detection_segments LANGUAGE_DETECTION_SEGMENTS
|
||||||
Number of segments/chunks to consider for the language detection. (default: 1)
|
Number of segments/chunks to consider for the language
|
||||||
|
detection. (default: 1)
|
||||||
--temperature TEMPERATURE
|
--temperature TEMPERATURE
|
||||||
temperature to use for sampling (default: 0)
|
temperature to use for sampling (default: 0)
|
||||||
--best_of BEST_OF, -bo BEST_OF
|
--best_of BEST_OF, -bo BEST_OF
|
||||||
number of candidates when sampling with non-zero temperature (default: 5)
|
number of candidates when sampling with non-zero
|
||||||
|
temperature (default: 5)
|
||||||
--beam_size BEAM_SIZE, -bs BEAM_SIZE
|
--beam_size BEAM_SIZE, -bs BEAM_SIZE
|
||||||
number of beams in beam search, only applicable when temperature is zero (default: 5)
|
number of beams in beam search, only applicable when
|
||||||
|
temperature is zero (default: 5)
|
||||||
--patience PATIENCE, -p PATIENCE
|
--patience PATIENCE, -p PATIENCE
|
||||||
optional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the
|
optional patience value to use in beam decoding, as in
|
||||||
default (1.0) is equivalent to conventional beam search (default: 2.0)
|
https://arxiv.org/abs/2204.05424, the default (1.0) is
|
||||||
|
equivalent to conventional beam search (default: 2.0)
|
||||||
--length_penalty LENGTH_PENALTY
|
--length_penalty LENGTH_PENALTY
|
||||||
optional token length penalty coefficient (alpha) as in https://arxiv.org/abs/1609.08144, uses
|
optional token length penalty coefficient (alpha) as
|
||||||
simple length normalization by default (default: 1.0)
|
in https://arxiv.org/abs/1609.08144, uses simple
|
||||||
|
length normalization by default (default: 1.0)
|
||||||
--repetition_penalty REPETITION_PENALTY
|
--repetition_penalty REPETITION_PENALTY
|
||||||
Penalty applied to the score of previously generated tokens (set > 1.0 to penalize). (default:
|
Penalty applied to the score of previously generated
|
||||||
1.0)
|
tokens (set > 1.0 to penalize). (default: 1.0)
|
||||||
--no_repeat_ngram_size NO_REPEAT_NGRAM_SIZE
|
--no_repeat_ngram_size NO_REPEAT_NGRAM_SIZE
|
||||||
Prevent repetitions of ngrams with this size (set 0 to disable). (default: 0)
|
Prevent repetitions of ngrams with this size (set 0 to
|
||||||
|
disable). (default: 0)
|
||||||
--suppress_blank SUPPRESS_BLANK
|
--suppress_blank SUPPRESS_BLANK
|
||||||
Suppress blank outputs at the beginning of the sampling. (default: True)
|
Suppress blank outputs at the beginning of the
|
||||||
|
sampling. (default: True)
|
||||||
--suppress_tokens SUPPRESS_TOKENS
|
--suppress_tokens SUPPRESS_TOKENS
|
||||||
comma-separated list of token ids to suppress during sampling; '-1' will suppress most special
|
comma-separated list of token ids to suppress during
|
||||||
characters except common punctuations (default: -1)
|
sampling; '-1' will suppress most special characters
|
||||||
|
except common punctuations. `None` - will pass empty
|
||||||
|
list. (default: -1)
|
||||||
--initial_prompt INITIAL_PROMPT, -prompt INITIAL_PROMPT
|
--initial_prompt INITIAL_PROMPT, -prompt INITIAL_PROMPT
|
||||||
optional text to provide context as a prompt for the first window. Use 'None' to disable it.
|
optional text to provide context as a prompt for the
|
||||||
Note: 'auto' and 'default' are experimental ~universal prompt presets, they work if --language
|
first window. Use 'None' to disable it. Note: 'auto'
|
||||||
is set. (default: auto)
|
and 'default' are experimental ~universal prompt
|
||||||
--prefix PREFIX Optional text to provide as a prefix for the first window (default: None)
|
presets, they work if --language is set. (default:
|
||||||
|
auto)
|
||||||
|
--prefix PREFIX Optional text to provide as a prefix for the first
|
||||||
|
window (default: None)
|
||||||
--condition_on_previous_text CONDITION_ON_PREVIOUS_TEXT, -condition CONDITION_ON_PREVIOUS_TEXT
|
--condition_on_previous_text CONDITION_ON_PREVIOUS_TEXT, -condition CONDITION_ON_PREVIOUS_TEXT
|
||||||
if True, provide the previous output of the model as a prompt for the next window; disabling
|
if True, provide the previous output of the model as a
|
||||||
may make the text inconsistent across windows, but the model becomes less prone to getting
|
prompt for the next window; disabling may make the
|
||||||
stuck in a failure loop. If disabled then you may want to disable --reprompt too. (default:
|
text inconsistent across windows, but the model
|
||||||
True)
|
becomes less prone to getting stuck in a failure loop.
|
||||||
|
If disabled then you may want to disable --reprompt
|
||||||
|
too. (default: True)
|
||||||
--prompt_reset_on_temperature PROMPT_RESET_ON_TEMPERATURE
|
--prompt_reset_on_temperature PROMPT_RESET_ON_TEMPERATURE
|
||||||
Resets prompt if temperature is above this value. Arg has effect only if
|
Resets prompt if temperature is above this value. Arg
|
||||||
condition_on_previous_text is True. (default: 0.5)
|
has effect only if condition_on_previous_text is True.
|
||||||
|
(default: 0.5)
|
||||||
--without_timestamps WITHOUT_TIMESTAMPS
|
--without_timestamps WITHOUT_TIMESTAMPS
|
||||||
Only sample text tokens. (default: False)
|
Only sample text tokens. (default: False)
|
||||||
--max_initial_timestamp MAX_INITIAL_TIMESTAMP
|
--max_initial_timestamp MAX_INITIAL_TIMESTAMP
|
||||||
The initial timestamp cannot be later than this. (default: 1.0)
|
The initial timestamp cannot be later than this.
|
||||||
|
(default: 1.0)
|
||||||
--temperature_increment_on_fallback TEMPERATURE_INCREMENT_ON_FALLBACK, -fallback TEMPERATURE_INCREMENT_ON_FALLBACK
|
--temperature_increment_on_fallback TEMPERATURE_INCREMENT_ON_FALLBACK, -fallback TEMPERATURE_INCREMENT_ON_FALLBACK
|
||||||
temperature to increase when falling back when the decoding fails to meet either of the
|
temperature to increase when falling back when the
|
||||||
thresholds below. To disable fallback set it to 'None'. (default: 0.2)
|
decoding fails to meet either of the thresholds below.
|
||||||
|
To disable fallback set it to 'None'. (default: 0.2)
|
||||||
--compression_ratio_threshold COMPRESSION_RATIO_THRESHOLD
|
--compression_ratio_threshold COMPRESSION_RATIO_THRESHOLD
|
||||||
if the gzip compression ratio is higher than this value, treat the decoding as failed
|
if the gzip compression ratio is higher than this
|
||||||
(default: 2.4)
|
value, treat the decoding as failed (default: 2.4)
|
||||||
--logprob_threshold LOGPROB_THRESHOLD
|
--logprob_threshold LOGPROB_THRESHOLD
|
||||||
if the average log probability is lower than this value, treat the decoding as failed
|
if the average log probability is lower than this
|
||||||
(default: -1.0)
|
value, treat the decoding as failed (default: -1.0)
|
||||||
--no_speech_threshold NO_SPEECH_THRESHOLD
|
--no_speech_threshold NO_SPEECH_THRESHOLD
|
||||||
if the probability of the <|nospeech|> token is higher than this value AND the decoding has
|
if the probability of the <|nospeech|> token is higher
|
||||||
failed due to 'logprob_threshold', consider the segment as silence (default: 0.6)
|
than this value AND the decoding has failed due to
|
||||||
--v3_offsets_off Disables custom offsets to the defaults of pseudo-vad thresholds when 'large-v3' models are in
|
'logprob_threshold', consider the segment as silence
|
||||||
use. Those offset were made to make 'large-v3' to hallucinate less by default. (default:
|
(default: 0.6)
|
||||||
False)
|
--v3_offsets_off Disables custom offsets to the defaults of pseudo-vad
|
||||||
|
thresholds when 'large-v3' models are in use. Note:
|
||||||
|
Offsets made to combat 'large-v3' hallucinations.
|
||||||
|
(default: False)
|
||||||
--hallucination_silence_threshold HALLUCINATION_SILENCE_THRESHOLD, -hst HALLUCINATION_SILENCE_THRESHOLD
|
--hallucination_silence_threshold HALLUCINATION_SILENCE_THRESHOLD, -hst HALLUCINATION_SILENCE_THRESHOLD
|
||||||
(Experimental) When word_timestamps is True, skip silent periods longer than this threshold
|
(Experimental) When word_timestamps is True, skip
|
||||||
(in seconds) when a possible hallucination is detected. Optimal value is somewhere between 2 -
|
silent periods longer than this threshold (in seconds)
|
||||||
8 seconds. Inactive if None. (default: None)
|
when a possible hallucination is detected. Optimal
|
||||||
|
value is somewhere between 2 - 8 seconds. Inactive if
|
||||||
|
None. (default: None)
|
||||||
--hallucination_silence_th_temp {0.0,0.2,0.5,0.8,1.0}, -hst_temp {0.0,0.2,0.5,0.8,1.0}
|
--hallucination_silence_th_temp {0.0,0.2,0.5,0.8,1.0}, -hst_temp {0.0,0.2,0.5,0.8,1.0}
|
||||||
(Experimental) Additional heuristic for '--hallucination_silence_threshold'. If temperature is
|
(Experimental) Additional heuristic for '--
|
||||||
higher that this threshold then consider segment as possible hallucination ignoring the hst
|
hallucination_silence_threshold'. If temperature is
|
||||||
score. Inactive if 1.0. (default: 1.0)
|
higher that this threshold then consider segment as
|
||||||
--clip_timestamps CLIP_TIMESTAMPS
|
possible hallucination ignoring the hst score.
|
||||||
Comma-separated list start,end,start,end,... timestamps (in seconds) of clips to process. The
|
Inactive if 1.0. (default: 1.0)
|
||||||
last end timestamp defaults to the end of the file. VAD is auto-disabled. (default: 0)
|
--clip_timestamps CLIP_TIMESTAMPS, -clip CLIP_TIMESTAMPS
|
||||||
|
Comma-separated list start,end,start,end,...
|
||||||
|
timestamps (in seconds) of clips to process. The last
|
||||||
|
end timestamp defaults to the end of the file. VAD is
|
||||||
|
auto-disabled. (default: 0)
|
||||||
--no_speech_strict_lvl {0,1,2}
|
--no_speech_strict_lvl {0,1,2}
|
||||||
(experimental) Level of stricter actions when no_speech_prob > 0.93. Use beam_size=5 if this
|
(experimental) Level of stricter actions when
|
||||||
is enabled. Options: 0 - Disabled (do nothing), 1 - Reset propmt (see
|
no_speech_prob > 0.93. Use beam_size=5 if this is
|
||||||
condition_on_previous_text), 2 - Invalidate the cached encoder output (if no_speech_threshold
|
enabled. Options: 0 - Disabled (do nothing), 1 - Reset
|
||||||
is not None). Arg meant to combat cases where the model is getting stuck in a failure loop or
|
propmt (see condition_on_previous_text), 2 -
|
||||||
outputs nonsense (default: 0)
|
Invalidate the cached encoder output (if
|
||||||
|
no_speech_threshold is not None). Arg meant to combat
|
||||||
|
cases where the model is getting stuck in a failure
|
||||||
|
loop or outputs nonsense (default: 0)
|
||||||
--word_timestamps WORD_TIMESTAMPS, -wt WORD_TIMESTAMPS
|
--word_timestamps WORD_TIMESTAMPS, -wt WORD_TIMESTAMPS
|
||||||
Extract word-level timestamps and refine the results based on them (default: True)
|
Extract word-level timestamps and refine the results
|
||||||
|
based on them (default: True)
|
||||||
--highlight_words HIGHLIGHT_WORDS, -hw HIGHLIGHT_WORDS
|
--highlight_words HIGHLIGHT_WORDS, -hw HIGHLIGHT_WORDS
|
||||||
underline each word as it is spoken AKA karaoke in srt and vtt output formats (default: False)
|
underline each word as it is spoken AKA karaoke in srt
|
||||||
|
and vtt output formats (default: False)
|
||||||
--prepend_punctuations PREPEND_PUNCTUATIONS
|
--prepend_punctuations PREPEND_PUNCTUATIONS
|
||||||
if word_timestamps is True, merge these punctuation symbols with the next word (default:
|
if word_timestamps is True, merge these punctuation
|
||||||
"'“¿([{-)
|
symbols with the next word (default: "'“¿([{-)
|
||||||
--append_punctuations APPEND_PUNCTUATIONS
|
--append_punctuations APPEND_PUNCTUATIONS
|
||||||
if word_timestamps is True, merge these punctuation symbols with the previous word (default:
|
if word_timestamps is True, merge these punctuation
|
||||||
|
symbols with the previous word (default:
|
||||||
"'.。,,!!??::”)]}、)
|
"'.。,,!!??::”)]}、)
|
||||||
--threads THREADS number of threads used for CPU inference; By default number of the real cores but no more that
|
--threads THREADS number of threads used for CPU inference; By default
|
||||||
4 (default: 0)
|
number of the real cores but no more that 4 (default:
|
||||||
|
0)
|
||||||
--version Show Faster-Whisper's version number
|
--version Show Faster-Whisper's version number
|
||||||
--vad_filter VAD_FILTER, -vad VAD_FILTER
|
--vad_filter VAD_FILTER, -vad VAD_FILTER
|
||||||
Enable the voice activity detection (VAD) to filter out parts of the audio without speech.
|
Enable the voice activity detection (VAD) to filter
|
||||||
(default: True)
|
out parts of the audio without speech. (default: True)
|
||||||
--vad_threshold VAD_THRESHOLD
|
--vad_threshold VAD_THRESHOLD
|
||||||
Probabilities above this value are considered as speech. (default: 0.45)
|
Probabilities above this value are considered as
|
||||||
|
speech. (default: 0.45)
|
||||||
--vad_min_speech_duration_ms VAD_MIN_SPEECH_DURATION_MS
|
--vad_min_speech_duration_ms VAD_MIN_SPEECH_DURATION_MS
|
||||||
Final speech chunks shorter min_speech_duration_ms are thrown out. (default: 350)
|
Final speech chunks shorter min_speech_duration_ms are
|
||||||
|
thrown out. (default: 350)
|
||||||
--vad_max_speech_duration_s VAD_MAX_SPEECH_DURATION_S
|
--vad_max_speech_duration_s VAD_MAX_SPEECH_DURATION_S
|
||||||
Maximum duration of speech chunks in seconds. Longer will be split at the timestamp of the
|
Maximum duration of speech chunks in seconds. Longer
|
||||||
last silence. (default: None)
|
will be split at the timestamp of the last silence.
|
||||||
|
(default: None)
|
||||||
--vad_min_silence_duration_ms VAD_MIN_SILENCE_DURATION_MS
|
--vad_min_silence_duration_ms VAD_MIN_SILENCE_DURATION_MS
|
||||||
In the end of each speech chunk time to wait before separating it. (default: 3000)
|
In the end of each speech chunk time to wait before
|
||||||
|
separating it. (default: 3000)
|
||||||
--vad_speech_pad_ms VAD_SPEECH_PAD_MS
|
--vad_speech_pad_ms VAD_SPEECH_PAD_MS
|
||||||
Final speech chunks are padded by speech_pad_ms each side. (default: 900)
|
Final speech chunks are padded by speech_pad_ms each
|
||||||
|
side. (default: 900)
|
||||||
--vad_window_size_samples VAD_WINDOW_SIZE_SAMPLES
|
--vad_window_size_samples VAD_WINDOW_SIZE_SAMPLES
|
||||||
Size of audio chunks fed to the silero VAD model. Values other than 512, 1024, 1536 may affect
|
Size of audio chunks fed to the silero VAD model.
|
||||||
model perfomance!!! (default: 1536)
|
Values other than 512, 1024, 1536 may affect model
|
||||||
--vad_dump Dumps VAD timings to a subtitle file for inspection. (default: False)
|
perfomance!!! (default: 1536)
|
||||||
|
--vad_method {silero_v3,silero_v4,pyannote_v3,pyannote_onnx_v3,auditok,webrtc}, --vad_alt_method {silero_v3,silero_v4,pyannote_v3,pyannote_onnx_v3,auditok,webrtc}
|
||||||
|
Read there - 'https://github.com/Purfview/whisper-
|
||||||
|
standalone-win/discussions/231'. (default: None)
|
||||||
|
--vad_dump Dumps VAD timings to a subtitle file for inspection.
|
||||||
|
Additionaly dumps various intermediate audio files for
|
||||||
|
debug. (default: False)
|
||||||
|
--nullify_non_speech, -nullify
|
||||||
|
For dev experiments. (default: False)
|
||||||
--max_new_tokens MAX_NEW_TOKENS
|
--max_new_tokens MAX_NEW_TOKENS
|
||||||
Maximum number of new tokens to generate per-chunk. (default: None)
|
Maximum number of new tokens to generate per-chunk.
|
||||||
|
(default: None)
|
||||||
--chunk_length CHUNK_LENGTH
|
--chunk_length CHUNK_LENGTH
|
||||||
The length of audio segments. If it is not None, it will overwrite the default chunk_length of
|
The length of audio segments. If it is not None, it
|
||||||
the FeatureExtractor. (default: None)
|
will overwrite the default chunk_length of the
|
||||||
|
FeatureExtractor. (default: None)
|
||||||
--compute_type {default,auto,int8,int8_float16,int8_float32,int8_bfloat16,int16,float16,float32,bfloat16}, -ct {default,auto,int8,int8_float16,int8_float32,int8_bfloat16,int16,float16,float32,bfloat16}
|
--compute_type {default,auto,int8,int8_float16,int8_float32,int8_bfloat16,int16,float16,float32,bfloat16}, -ct {default,auto,int8,int8_float16,int8_float32,int8_bfloat16,int16,float16,float32,bfloat16}
|
||||||
Type of quantization to use (see https://opennmt.net/CTranslate2/quantization.html). (default:
|
Type of quantization to use (see
|
||||||
auto)
|
https://opennmt.net/CTranslate2/quantization.html).
|
||||||
|
(default: auto)
|
||||||
--batch_recursive, -br
|
--batch_recursive, -br
|
||||||
Enables recursive batch processing. Note: If set then it changes defaults of --output_dir.
|
Enables recursive batch processing. Note: If set then
|
||||||
|
it changes defaults of --output_dir. (default: False)
|
||||||
|
--beep_off Disables the beep sound when operation is finished.
|
||||||
(default: False)
|
(default: False)
|
||||||
--beep_off Disables the beep sound when operation is finished. (default: False)
|
--skip Skips media file if subtitle exists. Works if input is
|
||||||
--skip Skips media file if subtitle exists. Works if input is wildcard or directory. (default: False)
|
|
||||||
--checkcuda, -cc Returns CUDA device count. (for Subtitle Edit's internal use)
|
|
||||||
--print_progress, -pp
|
|
||||||
Prints progress bar instead of transcription. (default: False)
|
|
||||||
--postfix Adds language as a postfix to subtitle's filename. (default: False)
|
|
||||||
--check_files Checks input files for errors before passing all them for transcription. Works if input is
|
|
||||||
wildcard or directory. (default: False)
|
wildcard or directory. (default: False)
|
||||||
--PR163_off (For dev experiments) Disables PR163. . (default: False)
|
--checkcuda, -cc Returns CUDA device count. (for Subtitle Edit's
|
||||||
|
internal use)
|
||||||
|
--print_progress, -pp
|
||||||
|
Prints progress bar instead of transcription.
|
||||||
|
(default: False)
|
||||||
|
--postfix Adds language as a postfix to subtitle's filename.
|
||||||
|
(default: False)
|
||||||
|
--check_files Checks input files for errors before passing all them
|
||||||
|
for transcription. Works if input is wildcard or
|
||||||
|
directory. (default: False)
|
||||||
|
--PR163_off (For dev experiments) Disables PR163. . (default:
|
||||||
|
False)
|
||||||
--hallucinations_list_off
|
--hallucinations_list_off
|
||||||
(For dev experiments) Disables hallucinations_list, allows hallucinations added to prompt.
|
(For dev experiments) Disables hallucinations_list,
|
||||||
(default: False)
|
allows hallucinations added to prompt. (default:
|
||||||
--one_word {0,1,2} 0) Disabled. 1) Outputs srt and vtt subtitles with one word per line. 2) As '1', plus removes
|
False)
|
||||||
whitespace and ensures >= 50ms for sub lines. Note: VAD may slightly reduce the accuracy of
|
--alt_writer_off (For dev experiments) Forcefully disables alternative
|
||||||
timestamps on some lines. (default: 0)
|
subs writer func. (default: False)
|
||||||
--sentence Enables splitting lines to sentences for srt and vtt subs. Every sentence starts in the new
|
--one_word {0,1,2} 0) Disabled. 1) Outputs srt and vtt subtitles with one
|
||||||
segment. By default meant to output whole sentence per line for better translations, but not
|
word per line. 2) As '1', plus removes whitespace and
|
||||||
limited to, read about '--max_...' parameters. Note: has no effect on 'highlight_words'.
|
ensures >= 50ms for sub lines. Note: VAD may slightly
|
||||||
(default: False)
|
reduce the accuracy of timestamps on some lines.
|
||||||
--standard Quick hardcoded preset to split lines in standard way. 42 chars per 2 lines with
|
(default: 0)
|
||||||
max_comma_cent=70 and --sentence are activated automatically. (default: False)
|
--sentence Enables splitting lines to sentences for srt and vtt
|
||||||
--standard_asia Quick hardcoded preset to split lines in standard way for some Asian languages. 16 chars per 2
|
subs. Every sentence starts in the new segment. By
|
||||||
lines with max_comma_cent=80 and --sentence are activated automatically. (default: False)
|
default meant to output whole sentence per line for
|
||||||
|
better translations, but not limited to, read about '
|
||||||
|
--max_...' parameters. Note: has no effect on
|
||||||
|
'highlight_words'. (default: False)
|
||||||
|
--standard Quick hardcoded preset to split lines in standard way.
|
||||||
|
42 chars per 2 lines with max_comma_cent=70 and
|
||||||
|
--sentence are activated automatically. (default:
|
||||||
|
False)
|
||||||
|
--standard_asia Quick hardcoded preset to split lines in standard way
|
||||||
|
for some Asian languages. 16 chars per 2 lines with
|
||||||
|
max_comma_cent=80 and --sentence are activated
|
||||||
|
automatically. (default: False)
|
||||||
--max_comma MAX_COMMA
|
--max_comma MAX_COMMA
|
||||||
(requires --sentence) After this line length a comma is treated as the end of sentence. Note:
|
(requires --sentence) After this line length a comma
|
||||||
disabled if it's over or equal to --max_line_width. (default: 250)
|
is treated as the end of sentence. Note: disabled if
|
||||||
|
it's over or equal to --max_line_width. (default: 250)
|
||||||
--max_comma_cent {50,60,70,80,90,100}
|
--max_comma_cent {50,60,70,80,90,100}
|
||||||
(requires --sentence) Percentage of --max_line_width when it starts breaking the line after
|
(requires --sentence) Percentage of --max_line_width
|
||||||
comma. Note: 100 = disabled. (default: 100)
|
when it starts breaking the line after comma. Note:
|
||||||
--max_gap MAX_GAP (requires --sentence) Threshold for a gap length in seconds, longer gaps are treated as dots.
|
100 = disabled. (default: 100)
|
||||||
(default: 3.0)
|
--max_gap MAX_GAP (requires --sentence) Threshold for a gap length in
|
||||||
|
seconds, longer gaps are treated as dots. (default:
|
||||||
|
3.0)
|
||||||
--max_line_width MAX_LINE_WIDTH
|
--max_line_width MAX_LINE_WIDTH
|
||||||
The maximum number of characters in a line before breaking the line. (default: 1000)
|
The maximum number of characters in a line before
|
||||||
|
breaking the line. Note: It works with `--sentence`
|
||||||
|
too. (default: 1000)
|
||||||
--max_line_count MAX_LINE_COUNT
|
--max_line_count MAX_LINE_COUNT
|
||||||
The maximum number of lines in one sub segment. (default: 1)
|
The maximum number of lines in one sub segment. Note:
|
||||||
|
It works with `--sentence` too. (default: 1)
|
||||||
--min_dist_to_end {0,4,5,6,7,8,9,10,11,12}
|
--min_dist_to_end {0,4,5,6,7,8,9,10,11,12}
|
||||||
(requires --sentence) If from words like 'the', 'Mr.' and ect. to the end of line distance is
|
(requires --sentence) If from words like 'the', 'Mr.'
|
||||||
less than set then it starts in a new line. Note: 0 = disabled. (default: 0)
|
and ect. to the end of line distance is less than set
|
||||||
|
then it starts in a new line. Note: 0 = disabled.
|
||||||
|
(default: 0)
|
||||||
--prompt_max {16,32,64,128,223}
|
--prompt_max {16,32,64,128,223}
|
||||||
(experimental) The maximum size of prompt. (default: 223)
|
(experimental) The maximum size of prompt. (default:
|
||||||
--reprompt {0,1,2} (experimental) 0) Disabled. 1) Inserts initial_prompt after the prompt resets. 2) Ensures that
|
223)
|
||||||
initial_prompt is present in prompt for all windows/chunks. Note: auto-disabled if
|
--reprompt {0,1,2} (experimental) 0) Disabled. 1) Inserts initial_prompt
|
||||||
initial_prompt=None. It's similar to 'hotwords' feature. (default: 2)
|
after the prompt resets. 2) Ensures that
|
||||||
|
initial_prompt is present in prompt for all
|
||||||
|
windows/chunks. Note: auto-disabled if
|
||||||
|
initial_prompt=None. It's similar to 'hotwords'
|
||||||
|
feature. (default: 2)
|
||||||
--prompt_reset_on_no_end {0,1,2}
|
--prompt_reset_on_no_end {0,1,2}
|
||||||
(experimental) Resets prompt if there is no end of sentence in window/chunk. 0 - disabled, 1 -
|
(experimental) Resets prompt if there is no end of
|
||||||
looks for period, 2 - looks for period or comma. Note: it's auto-disabled if reprompt=0.
|
sentence in window/chunk. 0 - disabled, 1 - looks for
|
||||||
(default: 2)
|
period, 2 - looks for period or comma. Note: it's
|
||||||
--ff_dump Dumps pre-processed audio by the filters to the 16000Hz file and prevents deletion of some
|
auto-disabled if reprompt=0. (default: 2)
|
||||||
|
--ff_dump Dumps pre-processed audio by the filters to the
|
||||||
|
16000Hz file and prevents deletion of some
|
||||||
intermediate audio files. (default: False)
|
intermediate audio files. (default: False)
|
||||||
--ff_track {1,2,3,4,5,6}
|
--ff_track {1,2,3,4,5,6}
|
||||||
Audio track selector. 1 - selects the first audio track. (default: 1)
|
Audio track selector. 1 - selects the first audio
|
||||||
--ff_fc Selects only front-center channel (FC) to process. (default: False)
|
track. (default: 1)
|
||||||
--ff_mp3 Audio filter: Conversion to MP3 and back. (default: False)
|
--ff_fc Selects only front-center channel (FC) to process.
|
||||||
--ff_sync Audio filter: Stretch/squeeze samples to the given timestamps, with a maximum of 3600 samples
|
|
||||||
per second compensation. Input file must be container that support storing PTS like mp4,
|
|
||||||
mkv... (default: False)
|
|
||||||
--ff_rnndn_sh Audio filter: Suppress non-speech with GregorR's SH model using Recurrent Neural Networks.
|
|
||||||
Notes: It's more aggressive than Xiph, discards singing. (default: False)
|
|
||||||
--ff_rnndn_xiph Audio filter: Suppress non-speech with Xiph's original model using Recurrent Neural Networks.
|
|
||||||
(default: False)
|
(default: False)
|
||||||
--ff_fftdn [0 - 97] Audio filter: General denoise with Fast Fourier Transform. Notes: 12 - normal strength, 0 -
|
--ff_mp3 Audio filter: Conversion to MP3 and back. (default:
|
||||||
disabled. (default: 0)
|
False)
|
||||||
|
--ff_sync Audio filter: Stretch/squeeze samples to the given
|
||||||
|
timestamps, with a maximum of 3600 samples per second
|
||||||
|
compensation. Input file must be container that
|
||||||
|
support storing PTS like mp4, mkv... (default: False)
|
||||||
|
--ff_rnndn_sh Audio filter: Suppress non-speech with GregorR's SH
|
||||||
|
model using Recurrent Neural Networks. Notes: It's
|
||||||
|
more aggressive than Xiph, discards singing. (default:
|
||||||
|
False)
|
||||||
|
--ff_rnndn_xiph Audio filter: Suppress non-speech with Xiph's original
|
||||||
|
model using Recurrent Neural Networks. (default:
|
||||||
|
False)
|
||||||
|
--ff_fftdn [0 - 97] Audio filter: General denoise with Fast Fourier
|
||||||
|
Transform. Notes: 12 - normal strength, 0 - disabled.
|
||||||
|
(default: 0)
|
||||||
--ff_tempo [0.5 - 2.0]
|
--ff_tempo [0.5 - 2.0]
|
||||||
Audio filter: Adjust audio tempo. Values below 1.0 slows down audio, above - speeds up. 1.0 =
|
Audio filter: Adjust audio tempo. Values below 1.0
|
||||||
disabled. (default: 1.0)
|
slows down audio, above - speeds up. 1.0 = disabled.
|
||||||
--ff_gate Audio filter: Reduce lower parts of a signal. (default: False)
|
(default: 1.0)
|
||||||
--ff_speechnorm Audio filter: Extreme and fast speech amplification. (default: False)
|
--ff_gate Audio filter: Reduce lower parts of a signal.
|
||||||
--ff_loudnorm Audio filter: EBU R128 loudness normalization. (default: False)
|
(default: False)
|
||||||
|
--ff_speechnorm Audio filter: Extreme and fast speech amplification.
|
||||||
|
(default: False)
|
||||||
|
--ff_loudnorm Audio filter: EBU R128 loudness normalization.
|
||||||
|
(default: False)
|
||||||
--ff_silence_suppress noise duration
|
--ff_silence_suppress noise duration
|
||||||
Audio filter: Suppress quiet parts of audio. Takes two values. First value - noise tolerance
|
Audio filter: Suppress quiet parts of audio. Takes two
|
||||||
in decibels [-70 - 0] (0=disabled), second value - minimum silence duration in seconds [0.1 -
|
values. First value - noise tolerance in decibels [-70
|
||||||
10]. (default: [0, 3.0])
|
- 0] (0=disabled), second value - minimum silence
|
||||||
--ff_lowhighpass Audio filter: Pass 50Hz - 7800 band. sinc + afir. (default: False)</value>
|
duration in seconds [0.1 - 10]. (default: [0, 3.0])
|
||||||
|
--ff_lowhighpass Audio filter: Pass 50Hz - 7800 band. sinc + afir.
|
||||||
|
(default: False)
|
||||||
|
--ff_mdx_kim2 Audio filter: High quality vocals extraction. MDX-Net
|
||||||
|
'Kim_Vocal_2' model made by KimberleyJensen. Notes:
|
||||||
|
It's better than 'HT Demucs v4 FT', first in the
|
||||||
|
filters chain, recommended to use on movies and
|
||||||
|
series. (default: False)
|
||||||
|
--mdx_chunk MDX_CHUNK
|
||||||
|
Chunk size in seconds for MDX-Net filter. Notes:
|
||||||
|
Smaller is using less memory, but can be slower and
|
||||||
|
produce a bit lower quality. (default: 15)
|
||||||
|
--mdx_device MDX_DEVICE
|
||||||
|
Device to use for MDX-Net filter. Default is 'cuda' if
|
||||||
|
CUDA device is detected, else is 'cpu'. If CUDA GPU is
|
||||||
|
a second device then set 'cuda:1'. (default: cpu)
|
||||||
|
--diarize Enable diarization. (default: False)
|
||||||
|
--diarize_device DIARIZE_DEVICE
|
||||||
|
Device to use for '--diarize'. Default is 'cuda' if
|
||||||
|
CUDA device is detected, else is 'cpu'. If CUDA GPU is
|
||||||
|
a second device then set 'cuda:1'. (default: cpu)
|
||||||
|
--diarize_threads DIARIZE_THREADS
|
||||||
|
--speaker SPEAKER To replace 'SPEAKER' string with your own word.
|
||||||
|
(default: SPEAKER)</value>
|
||||||
</data>
|
</data>
|
||||||
</root>
|
</root>
|
Loading…
Reference in New Issue
Block a user