mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-10-27 22:42:38 +01:00
Add cmd line help for FasterWhisper
This commit is contained in:
parent
3a3671f765
commit
3925fa5c2d
41
src/ui/Forms/AudioToText/WhisperAdvanced.Designer.cs
generated
41
src/ui/Forms/AudioToText/WhisperAdvanced.Designer.cs
generated
@ -41,10 +41,13 @@
|
||||
this.textBoxConstMe = new System.Windows.Forms.TextBox();
|
||||
this.tabPageOpenAI = new System.Windows.Forms.TabPage();
|
||||
this.textBoxOpenAI = new System.Windows.Forms.TextBox();
|
||||
this.tabPageFasterWhisper = new System.Windows.Forms.TabPage();
|
||||
this.textBox1 = new System.Windows.Forms.TextBox();
|
||||
this.tabControlCommandLineHelp.SuspendLayout();
|
||||
this.TabPageCPP.SuspendLayout();
|
||||
this.tabPageConstMe.SuspendLayout();
|
||||
this.tabPageOpenAI.SuspendLayout();
|
||||
this.tabPageFasterWhisper.SuspendLayout();
|
||||
this.SuspendLayout();
|
||||
//
|
||||
// labelWhisperExtraCmdLine
|
||||
@ -111,6 +114,7 @@
|
||||
this.tabControlCommandLineHelp.Controls.Add(this.TabPageCPP);
|
||||
this.tabControlCommandLineHelp.Controls.Add(this.tabPageConstMe);
|
||||
this.tabControlCommandLineHelp.Controls.Add(this.tabPageOpenAI);
|
||||
this.tabControlCommandLineHelp.Controls.Add(this.tabPageFasterWhisper);
|
||||
this.tabControlCommandLineHelp.Location = new System.Drawing.Point(15, 108);
|
||||
this.tabControlCommandLineHelp.Name = "tabControlCommandLineHelp";
|
||||
this.tabControlCommandLineHelp.SelectedIndex = 0;
|
||||
@ -148,7 +152,7 @@
|
||||
this.tabPageConstMe.Location = new System.Drawing.Point(4, 22);
|
||||
this.tabPageConstMe.Name = "tabPageConstMe";
|
||||
this.tabPageConstMe.Padding = new System.Windows.Forms.Padding(3);
|
||||
this.tabPageConstMe.Size = new System.Drawing.Size(852, 438);
|
||||
this.tabPageConstMe.Size = new System.Drawing.Size(830, 419);
|
||||
this.tabPageConstMe.TabIndex = 1;
|
||||
this.tabPageConstMe.Text = "Const-me";
|
||||
this.tabPageConstMe.UseVisualStyleBackColor = true;
|
||||
@ -162,7 +166,7 @@
|
||||
this.textBoxConstMe.Name = "textBoxConstMe";
|
||||
this.textBoxConstMe.ReadOnly = true;
|
||||
this.textBoxConstMe.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
|
||||
this.textBoxConstMe.Size = new System.Drawing.Size(846, 432);
|
||||
this.textBoxConstMe.Size = new System.Drawing.Size(824, 413);
|
||||
this.textBoxConstMe.TabIndex = 1;
|
||||
this.textBoxConstMe.Text = resources.GetString("textBoxConstMe.Text");
|
||||
this.textBoxConstMe.WordWrap = false;
|
||||
@ -173,7 +177,7 @@
|
||||
this.tabPageOpenAI.Location = new System.Drawing.Point(4, 22);
|
||||
this.tabPageOpenAI.Name = "tabPageOpenAI";
|
||||
this.tabPageOpenAI.Padding = new System.Windows.Forms.Padding(3);
|
||||
this.tabPageOpenAI.Size = new System.Drawing.Size(852, 438);
|
||||
this.tabPageOpenAI.Size = new System.Drawing.Size(830, 419);
|
||||
this.tabPageOpenAI.TabIndex = 2;
|
||||
this.tabPageOpenAI.Text = "OpenAI";
|
||||
this.tabPageOpenAI.UseVisualStyleBackColor = true;
|
||||
@ -187,11 +191,36 @@
|
||||
this.textBoxOpenAI.Name = "textBoxOpenAI";
|
||||
this.textBoxOpenAI.ReadOnly = true;
|
||||
this.textBoxOpenAI.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
|
||||
this.textBoxOpenAI.Size = new System.Drawing.Size(846, 432);
|
||||
this.textBoxOpenAI.Size = new System.Drawing.Size(824, 413);
|
||||
this.textBoxOpenAI.TabIndex = 2;
|
||||
this.textBoxOpenAI.Text = resources.GetString("textBoxOpenAI.Text");
|
||||
this.textBoxOpenAI.WordWrap = false;
|
||||
//
|
||||
// tabPagePurviewFasterWhisper
|
||||
//
|
||||
this.tabPageFasterWhisper.Controls.Add(this.textBox1);
|
||||
this.tabPageFasterWhisper.Location = new System.Drawing.Point(4, 22);
|
||||
this.tabPageFasterWhisper.Name = "tabPageFasterWhisper";
|
||||
this.tabPageFasterWhisper.Padding = new System.Windows.Forms.Padding(3);
|
||||
this.tabPageFasterWhisper.Size = new System.Drawing.Size(830, 419);
|
||||
this.tabPageFasterWhisper.TabIndex = 3;
|
||||
this.tabPageFasterWhisper.Text = "Faster Whisper";
|
||||
this.tabPageFasterWhisper.UseVisualStyleBackColor = true;
|
||||
//
|
||||
// textBox1
|
||||
//
|
||||
this.textBox1.Dock = System.Windows.Forms.DockStyle.Fill;
|
||||
this.textBox1.Font = new System.Drawing.Font("Consolas", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
|
||||
this.textBox1.Location = new System.Drawing.Point(3, 3);
|
||||
this.textBox1.Multiline = true;
|
||||
this.textBox1.Name = "textBox1";
|
||||
this.textBox1.ReadOnly = true;
|
||||
this.textBox1.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
|
||||
this.textBox1.Size = new System.Drawing.Size(824, 413);
|
||||
this.textBox1.TabIndex = 3;
|
||||
this.textBox1.Text = resources.GetString("textBox1.Text");
|
||||
this.textBox1.WordWrap = false;
|
||||
//
|
||||
// WhisperAdvanced
|
||||
//
|
||||
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
|
||||
@ -219,6 +248,8 @@
|
||||
this.tabPageConstMe.PerformLayout();
|
||||
this.tabPageOpenAI.ResumeLayout(false);
|
||||
this.tabPageOpenAI.PerformLayout();
|
||||
this.tabPageFasterWhisper.ResumeLayout(false);
|
||||
this.tabPageFasterWhisper.PerformLayout();
|
||||
this.ResumeLayout(false);
|
||||
this.PerformLayout();
|
||||
|
||||
@ -238,5 +269,7 @@
|
||||
private System.Windows.Forms.TabPage tabPageOpenAI;
|
||||
private System.Windows.Forms.TextBox textBoxConstMe;
|
||||
private System.Windows.Forms.TextBox textBoxOpenAI;
|
||||
private System.Windows.Forms.TabPage tabPageFasterWhisper;
|
||||
private System.Windows.Forms.TextBox textBox1;
|
||||
}
|
||||
}
|
@ -42,6 +42,11 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
|
||||
{
|
||||
tabControlCommandLineHelp.SelectedTab = tabPageConstMe;
|
||||
}
|
||||
else if (whisperEngine == WhisperChoice.CTranslate2 ||
|
||||
whisperEngine == WhisperChoice.PurfviewFasterWhisper)
|
||||
{
|
||||
tabControlCommandLineHelp.SelectedTab = tabPageFasterWhisper;
|
||||
}
|
||||
else
|
||||
{
|
||||
tabControlCommandLineHelp.SelectedTab = tabPageOpenAI;
|
||||
@ -57,7 +62,6 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
|
||||
{
|
||||
// ignore
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void buttonOK_Click(object sender, EventArgs e)
|
||||
|
@ -117,6 +117,81 @@
|
||||
<resheader name="writer">
|
||||
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||
</resheader>
|
||||
<data name="textBox1.Text" xml:space="preserve">
|
||||
<value> --temperature TEMPERATURE
|
||||
temperature to use for sampling (default: 0)
|
||||
--best_of BEST_OF number of candidates when sampling with non-zero temperature (default: 5)
|
||||
--beam_size BEAM_SIZE
|
||||
number of beams in beam search, only applicable when temperature is zero (default: 1)
|
||||
--patience PATIENCE optional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the
|
||||
default (1.0) is equivalent to conventional beam search (default: 1.0)
|
||||
--length_penalty LENGTH_PENALTY
|
||||
optional token length penalty coefficient (alpha) as in https://arxiv.org/abs/1609.08144, uses
|
||||
simple length normalization by default (default: 1.0)
|
||||
--suppress_tokens SUPPRESS_TOKENS
|
||||
comma-separated list of token ids to suppress during sampling; '-1' will suppress most special
|
||||
characters except common punctuations (default: -1)
|
||||
--initial_prompt INITIAL_PROMPT
|
||||
optional text to provide as a prompt for the first window. (default: None)
|
||||
--condition_on_previous_text CONDITION_ON_PREVIOUS_TEXT
|
||||
if True, provide the previous output of the model as a prompt for the next window; disabling
|
||||
may make the text inconsistent across windows, but the model becomes less prone to getting
|
||||
stuck in a failure loop (default: True)
|
||||
--temperature_increment_on_fallback TEMPERATURE_INCREMENT_ON_FALLBACK
|
||||
temperature to increase when falling back when the decoding fails to meet either of the
|
||||
thresholds below (default: 0.2)
|
||||
--compression_ratio_threshold COMPRESSION_RATIO_THRESHOLD
|
||||
if the gzip compression ratio is higher than this value, treat the decoding as failed
|
||||
(default: 2.4)
|
||||
--logprob_threshold LOGPROB_THRESHOLD
|
||||
if the average log probability is lower than this value, treat the decoding as failed
|
||||
(default: -1.0)
|
||||
--no_speech_threshold NO_SPEECH_THRESHOLD
|
||||
if the probability of the <|nospeech|> token is higher than this value AND the decoding has
|
||||
failed due to `logprob_threshold`, consider the segment as silence (default: 0.6)
|
||||
--no_speech_strict_lvl {0,1,2}
|
||||
Level of stricter actions when no_speech_prob > 0.93. Use beam_size=5 if this is enabled.
|
||||
Options: 0 - Disabled (do nothing), 1 - Reset propmt (see condition_on_previous_text), 2 -
|
||||
Invalidate the cached encoder output (if no_speech_threshold is not None). Arg meant to combat
|
||||
cases where the model is getting stuck in a failure loop or outputs nonsense (default: 0)
|
||||
--word_timestamps WORD_TIMESTAMPS
|
||||
(experimental) extract word-level timestamps and refine the results based on them (default:
|
||||
True)
|
||||
--highlight_words HIGHLIGHT_WORDS
|
||||
underline each word as it is spoken AKA karaoke in srt and vtt output formats (default: False)
|
||||
--prepend_punctuations PREPEND_PUNCTUATIONS
|
||||
if word_timestamps is True, merge these punctuation symbols with the next word (default:
|
||||
"'“¿([{-)
|
||||
--append_punctuations APPEND_PUNCTUATIONS
|
||||
if word_timestamps is True, merge these punctuation symbols with the previous word (default:
|
||||
"'.。,,!!??::”)]}、)
|
||||
--threads THREADS number of threads used for CPU inference; By default number of the real cores but no more that
|
||||
4 (default: 0)
|
||||
--version Show Faster-Whisper's version number
|
||||
--vad_filter VAD_FILTER
|
||||
Enable the voice activity detection (VAD) to filter out parts of the audio without speech.
|
||||
(default: True)
|
||||
--vad_threshold VAD_THRESHOLD
|
||||
Probabilities above this value are considered as speech. (default: 0.45)
|
||||
--vad_min_speech_duration_ms VAD_MIN_SPEECH_DURATION_MS
|
||||
Final speech chunks shorter min_speech_duration_ms are thrown out. (default: 350)
|
||||
--vad_max_speech_duration_s VAD_MAX_SPEECH_DURATION_S
|
||||
Maximum duration of speech chunks in seconds. Longer will be split at the timestamp of the
|
||||
last silence. (default: None)
|
||||
--vad_min_silence_duration_ms VAD_MIN_SILENCE_DURATION_MS
|
||||
In the end of each speech chunk time to wait before separating it. (default: 3000)
|
||||
--vad_speech_pad_ms VAD_SPEECH_PAD_MS
|
||||
Final speech chunks are padded by speech_pad_ms each side. (default: 900)
|
||||
--vad_window_size_samples VAD_WINDOW_SIZE_SAMPLES
|
||||
Size of audio chunks fed to the silero VAD model. Values other than 512, 1024, 1536 may affect
|
||||
model perfomance!!! (default: 1536)
|
||||
--compute_type {default,auto,int8,int8_float16,int16,float16,float32,int8_bfloat16,bfloat16}
|
||||
Type of quantization to use (see https://opennmt.net/CTranslate2/quantization.html) (default:
|
||||
default)
|
||||
--beep BEEP Enables beep sound when operation is finished. (default: True)
|
||||
--skip SKIP Skips files if 'srt' subtitle exists. Works if input is wildcard or directory. (default:
|
||||
False)</value>
|
||||
</data>
|
||||
<data name="textBoxCpp.Text" xml:space="preserve">
|
||||
<value>CPP:
|
||||
-t N, --threads N [4 ] number of threads to use during computation
|
||||
|
Loading…
Reference in New Issue
Block a user