diff --git a/src/libse/Common/Settings.cs b/src/libse/Common/Settings.cs
index 01255521e..8c896cfea 100644
--- a/src/libse/Common/Settings.cs
+++ b/src/libse/Common/Settings.cs
@@ -476,6 +476,7 @@ namespace Nikse.SubtitleEdit.Core.Common
public string WhisperLocation { get; set; }
public string WhisperCtranslate2Location { get; set; }
public string WhisperPurfviewFasterWhisperLocation { get; set; }
+ public string WhisperPurfviewFasterWhisperDefaultCmd { get; set; }
public string WhisperXLocation { get; set; }
public string WhisperStableTsLocation { get; set; }
public string WhisperCppModelLocation { get; set; }
@@ -710,6 +711,7 @@ namespace Nikse.SubtitleEdit.Core.Common
VoskPostProcessing = true;
WhisperChoice = Configuration.IsRunningOnWindows ? AudioToText.WhisperChoice.PurfviewFasterWhisper : AudioToText.WhisperChoice.OpenAi;
WhisperDeleteTempFiles = true;
+ WhisperPurfviewFasterWhisperDefaultCmd = "--standard";
WhisperExtraSettings = "";
WhisperLanguageCode = "en";
WhisperAutoAdjustTimings = true;
@@ -7128,6 +7130,12 @@ $HorzAlign = Center
settings.Tools.WhisperPurfviewFasterWhisperLocation = subNode.InnerText;
}
+ subNode = node.SelectSingleNode("WhisperPurfviewFasterWhisperDefaultCmd");
+ if (subNode != null)
+ {
+ settings.Tools.WhisperPurfviewFasterWhisperDefaultCmd = subNode.InnerText;
+ }
+
subNode = node.SelectSingleNode("WhisperExtraSettings");
if (subNode != null)
{
@@ -12084,6 +12092,7 @@ $HorzAlign = Center
textWriter.WriteElementString("WhisperLocation", settings.Tools.WhisperLocation);
textWriter.WriteElementString("WhisperCtranslate2Location", settings.Tools.WhisperCtranslate2Location);
textWriter.WriteElementString("WhisperPurfviewFasterWhisperLocation", settings.Tools.WhisperPurfviewFasterWhisperLocation);
+ textWriter.WriteElementString("WhisperPurfviewFasterWhisperDefaultCmd", settings.Tools.WhisperPurfviewFasterWhisperDefaultCmd);
textWriter.WriteElementString("WhisperXLocation", settings.Tools.WhisperXLocation);
textWriter.WriteElementString("WhisperStableTsLocation", settings.Tools.WhisperStableTsLocation);
textWriter.WriteElementString("WhisperCppModelLocation", settings.Tools.WhisperCppModelLocation);
diff --git a/src/ui/Forms/AudioToText/WhisperAdvanced.cs b/src/ui/Forms/AudioToText/WhisperAdvanced.cs
index 8fdb239e2..368fbaa8e 100644
--- a/src/ui/Forms/AudioToText/WhisperAdvanced.cs
+++ b/src/ui/Forms/AudioToText/WhisperAdvanced.cs
@@ -73,6 +73,15 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
}
Configuration.Settings.Tools.WhisperExtraSettings = comboBoxWhisperExtra.Text;
+
+ if (Configuration.Settings.Tools.WhisperChoice == WhisperChoice.PurfviewFasterWhisper)
+ {
+ if (Configuration.Settings.Tools.WhisperPurfviewFasterWhisperDefaultCmd != comboBoxWhisperExtra.Text)
+ {
+ Configuration.Settings.Tools.WhisperPurfviewFasterWhisperDefaultCmd = string.Empty;
+ }
+ }
+
DialogResult = DialogResult.OK;
}
diff --git a/src/ui/Forms/AudioToText/WhisperAdvanced.resx b/src/ui/Forms/AudioToText/WhisperAdvanced.resx
index 254903b2c..91ad71b6f 100644
--- a/src/ui/Forms/AudioToText/WhisperAdvanced.resx
+++ b/src/ui/Forms/AudioToText/WhisperAdvanced.resx
@@ -238,85 +238,83 @@
- --device DEVICE, -d DEVICE
- Device to use. Default is 'cuda' if CUDA device is detected, else is 'cpu'. If CUDA GPU is a second device then set 'cuda:1'.
- --verbose VERBOSE, -v VERBOSE
- whether to print out debug messages (default: False)
- --task {transcribe,translate}
- whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate') (default: transcribe)
- --temperature TEMPERATURE
- temperature to use for sampling (default: 0)
- --best_of BEST_OF, -bo BEST_OF
- number of candidates when sampling with non-zero temperature (default: 5)
- --beam_size BEAM_SIZE, -bs BEAM_SIZE
- number of beams in beam search, only applicable when temperature is zero (default: 5)
- --patience PATIENCE, -p PATIENCE
- optional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the default (1.0) is equivalent to conventional beam search (default: 1.0)
- --length_penalty LENGTH_PENALTY
- optional token length penalty coefficient (alpha) as in https://arxiv.org/abs/1609.08144, uses simple length normalization by default (default: 1.0)
- --repetition_penalty REPETITION_PENALTY
- Penalty applied to the score of previously generated tokens (set > 1.0 to penalize). (default: 1.0)
- --no_repeat_ngram_size NO_REPEAT_NGRAM_SIZE
- Prevent repetitions of ngrams with this size (set 0 to disable). (default: 0)
- --suppress_tokens SUPPRESS_TOKENS
- comma-separated list of token ids to suppress during sampling; '-1' will suppress most special characters except common punctuations (default: -1)
- --initial_prompt INITIAL_PROMPT, -prompt INITIAL_PROMPT
- optional text to provide as a prompt for the first window. Use 'None' to disable it (default: ,.?!)
- --condition_on_previous_text CONDITION_ON_PREVIOUS_TEXT
- if True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop
- (default: True)
- --prompt_reset_on_temperature PROMPT_RESET_ON_TEMPERATURE
- Resets prompt if temperature is above this value. Arg has effect only if condition_on_previous_text is True. (default: 0.5)
- --temperature_increment_on_fallback TEMPERATURE_INCREMENT_ON_FALLBACK
- temperature to increase when falling back when the decoding fails to meet either of the thresholds below. To disable fallback set it to 'None'. (default: 0.2)
- --compression_ratio_threshold COMPRESSION_RATIO_THRESHOLD
- if the gzip compression ratio is higher than this value, treat the decoding as failed (default: 2.4)
- --logprob_threshold LOGPROB_THRESHOLD
- if the average log probability is lower than this value, treat the decoding as failed (default: -1.0)
- --no_speech_threshold NO_SPEECH_THRESHOLD
- if the probability of the <|nospeech|> token is higher than this value AND the decoding has failed due to `logprob_threshold`, consider the segment as silence (default: 0.6)
- --no_speech_strict_lvl {0,1,2}
- (experimental) Level of stricter actions when no_speech_prob > 0.93. Use beam_size=5 if this is enabled. Options: 0 - Disabled (do nothing), 1 - Reset propmt (see condition_on_previous_text), 2 - Invalidate the
- cached encoder output (if no_speech_threshold is not None). Arg meant to combat cases where the model is getting stuck in a failure loop or outputs nonsense (default: 0)
- --word_timestamps WORD_TIMESTAMPS, -wt WORD_TIMESTAMPS
- (experimental) extract word-level timestamps and refine the results based on them (default: True)
- --highlight_words HIGHLIGHT_WORDS, -hw HIGHLIGHT_WORDS
- underline each word as it is spoken AKA karaoke in srt and vtt output formats (default: False)
- --prepend_punctuations PREPEND_PUNCTUATIONS
- if word_timestamps is True, merge these punctuation symbols with the next word (default: "'“¿([{-)
- --append_punctuations APPEND_PUNCTUATIONS
- if word_timestamps is True, merge these punctuation symbols with the previous word (default: "'.。,,!!??::”)]}、)
- --threads THREADS number of threads used for CPU inference; By default number of the real cores but no more that 4 (default: 0)
- --version Show Faster-Whisper's version number
- --vad_filter VAD_FILTER, -vad VAD_FILTER
- Enable the voice activity detection (VAD) to filter out parts of the audio without speech. (default: True)
- --vad_threshold VAD_THRESHOLD
- Probabilities above this value are considered as speech. (default: 0.45)
- --vad_min_speech_duration_ms VAD_MIN_SPEECH_DURATION_MS
- Final speech chunks shorter min_speech_duration_ms are thrown out. (default: 350)
- --vad_max_speech_duration_s VAD_MAX_SPEECH_DURATION_S
- Maximum duration of speech chunks in seconds. Longer will be split at the timestamp of the last silence. (default: None)
- --vad_min_silence_duration_ms VAD_MIN_SILENCE_DURATION_MS
- In the end of each speech chunk time to wait before separating it. (default: 3000)
- --vad_speech_pad_ms VAD_SPEECH_PAD_MS
- Final speech chunks are padded by speech_pad_ms each side. (default: 900)
- --vad_window_size_samples VAD_WINDOW_SIZE_SAMPLES
- Size of audio chunks fed to the silero VAD model. Values other than 512, 1024, 1536 may affect model perfomance!!! (default: 1536)
- --compute_type {default,auto,int8,int8_float16,int8_float32,int8_bfloat16,int16,float16,float32,bfloat16}, -ct {default,auto,int8,int8_float16,int8_float32,int8_bfloat16,int16,float16,float32,bfloat16}
- Type of quantization to use (see https://opennmt.net/CTranslate2/quantization.html). (default: auto)
- --batch_recursive, -br
- Enables recursive batch processing. Note: If set then it changes defaults of --output_dir. (default: False)
- --beep_off Disables the beep sound when operation is finished. (default: False)
- --skip Skips files if 'srt' subtitle exists. Works if input is wildcard or directory. (default: False)
- --checkcuda, -cc Returns CUDA device count. (for Subtitle Edit's internal use)
- --print_progress, -pp
- Prints progress bar instead of transcription. (default: False)
- --postfix Adds language as a postfix to subtitle's filename. (default: False)
- --one_word {0,1,2} 0) Disabled. 1) Outputs srt and vtt subtitles with one word per line. 2) As `1`, plus removes
- whitespace and ensures >= 50ms for sub lines. Note: VAD may slightly reduce the accuracy of
- timestamps on some lines. (default: 0)
- --check_files Checks input files for errors before passing all them for transcription. Works if input is
- wildcard or directory. (default: False)
- --PR163_off Disables PR163. For dev experiments. (default: False)
+ [--device DEVICE]
+[--output_dir OUTPUT_DIR]
+[--output_format {lrc,txt,text,vtt,srt,tsv,json,all}]
+[--verbose VERBOSE]
+[--task {transcribe,translate}]
+[--temperature TEMPERATURE]
+[--best_of BEST_OF]
+[--beam_size BEAM_SIZE]
+[--patience PATIENCE]
+[--length_penalty LENGTH_PENALTY]
+[--repetition_penalty REPETITION_PENALTY]
+[--no_repeat_ngram_size NO_REPEAT_NGRAM_SIZE]
+[--suppress_blank SUPPRESS_BLANK]
+[--suppress_tokens SUPPRESS_TOKENS]
+[--initial_prompt INITIAL_PROMPT]
+[--prefix PREFIX]
+[--condition_on_previous_text CONDITION_ON_PREVIOUS_TEXT]
+[--prompt_reset_on_temperature PROMPT_RESET_ON_TEMPERATURE]
+[--without_timestamps WITHOUT_TIMESTAMPS]
+[--max_initial_timestamp MAX_INITIAL_TIMESTAMP]
+[--temperature_increment_on_fallback TEMPERATURE_INCREMENT_ON_FALLBACK]
+[--compression_ratio_threshold COMPRESSION_RATIO_THRESHOLD]
+[--logprob_threshold LOGPROB_THRESHOLD]
+[--no_speech_threshold NO_SPEECH_THRESHOLD]
+[--hallucination_silence_threshold HALLUCINATION_SILENCE_THRESHOLD]
+[--clip_timestamps CLIP_TIMESTAMPS]
+[--no_speech_strict_lvl {0,1,2}]
+[--word_timestamps WORD_TIMESTAMPS]
+[--highlight_words HIGHLIGHT_WORDS]
+[--prepend_punctuations PREPEND_PUNCTUATIONS]
+[--append_punctuations APPEND_PUNCTUATIONS]
+[--threads THREADS]
+[--version]
+[--vad_filter VAD_FILTER]
+[--vad_threshold VAD_THRESHOLD]
+[--vad_min_speech_duration_ms VAD_MIN_SPEECH_DURATION_MS]
+[--vad_max_speech_duration_s VAD_MAX_SPEECH_DURATION_S]
+[--vad_min_silence_duration_ms VAD_MIN_SILENCE_DURATION_MS]
+[--vad_speech_pad_ms VAD_SPEECH_PAD_MS]
+[--vad_window_size_samples VAD_WINDOW_SIZE_SAMPLES]
+[--max_new_tokens MAX_NEW_TOKENS]
+[--chunk_length CHUNK_LENGTH]
+[--compute_type {default,auto,int8,int8_float16,int8_float32,int8_bfloat16,int16,float16,float32,bfloat16}]
+[--batch_recursive]
+[--beep_off]
+[--skip]
+[--checkcuda]
+[--print_progress]
+[--postfix]
+[--check_files]
+[--PR163_off]
+[--hallucinations_list_off]
+[--one_word {0,1,2}]
+[--sentence]
+[--standard]
+[--standard_asia]
+[--max_comma MAX_COMMA]
+[--max_comma_cent {50,60,70,80,90,100}]
+[--max_gap MAX_GAP]
+[--max_line_width MAX_LINE_WIDTH]
+[--max_line_count MAX_LINE_COUNT]
+[--min_dist_to_end {0,4,5,6,7,8,9,10,11,12}]
+[--prompt_max {16,32,64,128,223}]
+[--reprompt {0,1,2}]
+[--prompt_reset_on_no_end {0,1,2}]
+[--ff_dump]
+[--ff_mp3]
+[--ff_sync]
+[--ff_rnndn_sh]
+[--ff_rnndn_xiph]
+[--ff_fftdn [0 - 97]]
+[--ff_tempo [0.5 - 2.0]]
+[--ff_gate]
+[--ff_speechnorm]
+[--ff_loudnorm]
+[--ff_silence_suppress noise duration]
+[--ff_lowhighpass]
+
-
+
\ No newline at end of file
diff --git a/src/ui/Forms/AudioToText/WhisperAudioToText.cs b/src/ui/Forms/AudioToText/WhisperAudioToText.cs
index c183b14fc..2d316f286 100644
--- a/src/ui/Forms/AudioToText/WhisperAudioToText.cs
+++ b/src/ui/Forms/AudioToText/WhisperAudioToText.cs
@@ -130,6 +130,11 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
labelEngine.Text = LanguageSettings.Current.AudioToText.Engine;
labelEngine.Left = comboBoxWhisperEngine.Left - labelEngine.Width - 5;
+ if (Configuration.Settings.Tools.WhisperChoice == WhisperChoice.PurfviewFasterWhisper && !string.IsNullOrEmpty(Configuration.Settings.Tools.WhisperPurfviewFasterWhisperDefaultCmd))
+ {
+ Configuration.Settings.Tools.WhisperExtraSettings = Configuration.Settings.Tools.WhisperPurfviewFasterWhisperDefaultCmd;
+ }
+
Init();
InitializeWhisperEngines(comboBoxWhisperEngine);
}
diff --git a/src/ui/Forms/AudioToText/WhisperDownload.cs b/src/ui/Forms/AudioToText/WhisperDownload.cs
index 520714fe6..1658f7ed3 100644
--- a/src/ui/Forms/AudioToText/WhisperDownload.cs
+++ b/src/ui/Forms/AudioToText/WhisperDownload.cs
@@ -88,15 +88,16 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
};
- private const string DownloadUrlPurfviewFasterWhisper = "https://github.com/Purfview/whisper-standalone-win/releases/download/faster-whisper/Whisper-Faster_r167.2_windows.zip";
+ private const string DownloadUrlPurfviewFasterWhisper = "https://github.com/Purfview/whisper-standalone-win/releases/download/faster-whisper/Whisper-Faster_r186.1_windows.zip";
private static readonly string[] Sha512HashesPurfviewFasterWhisper =
{
- "a16e2b5460d7f4b0d45de3f0e07b231d58ad4c79d077ad6b9c84a4e2ced4bd1cd3a7d9f01689f1d847ec8ff59c8f81cb742fcf2b153291ed6f15ec8b27adb998", // r167.2
+ "e78616511a92b21cb8ac82e23cdbd06f5b9310751e5f3fa940b5c48743b69bad130aaf6d629ae07c5388326f117be8f181b125ed04aacd23f1a80d8891be889b", // r186.1
};
private static readonly string[] OldSha512HashesPurfviewFasterWhisper =
{
+ "a16e2b5460d7f4b0d45de3f0e07b231d58ad4c79d077ad6b9c84a4e2ced4bd1cd3a7d9f01689f1d847ec8ff59c8f81cb742fcf2b153291ed6f15ec8b27adb998", // r167.2
"1995feca9dd971eccfb41f8dc330d418a531e615cee56eac7cc053fd343fe5200f9e64e2b4feafdde49b018ac518d1ee1b244aedd32dcb84e3fb69c1035b8a4f", // r160.7
"10ac03f098f991fe9474430a7f44c6fe0574dfb88d37ea4a31b764c540337918c529c4eceaf0524e88975b11b771c61dd67501d2a59fe05008a10195d2768edf", // r160.6
"9d65922c41a8848e70f04af8deed7279f827264e1fa305c165849e391917713f0336eee07320b2c2cbb6191167953f4d6d1e23a378bfa5a4273c6065a0eba5b3", // r160.5
@@ -388,7 +389,8 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
var hashVersion160_5 = "6983c90c96e47f53fb1451c1f0a32151ef144fe2e549affc7319d0c7666ea44dcbb0d7dc87ccdaaf0b3d8b2abe92060440e151495109f2681b99940f0eec5ad0";
var hashVersion160_6 = "f616a4fecfb40e74b3e096207f08fbe84a0d08ad872380cf2791eba8458ed854399de2d547be98bc35c65ce0b6959a149b981e745aa75876ffa8eb2fc6a8719e";
var hashVersion160_7 = "0f6b5b0a8d3d169ca7947866552dec30ac43406cda6b7e748c273ed78574087e330571925d8a36d48e5a3ea197d450be0289277677fdbad069038ac0788ea82e";
- return hash == hashVersion153 || hash == hashVersion160_3 || hash == hashVersion160_4 || hash == hashVersion160_5 || hash == hashVersion160_6 || hash == hashVersion160_7;
+ var hashVersion167_2 = "628dee27ab3030798c42983d0f544668f54e7c8d1c7a433b322b9c07286eedd10666d9b1f89764a75301b334cea9c7ad8bfbfeee00a98113b4730ee5cafe8812";
+ return hash == hashVersion153 || hash == hashVersion160_3 || hash == hashVersion160_4 || hash == hashVersion160_5 || hash == hashVersion160_6 || hash == hashVersion160_7 || hash == hashVersion167_2;
}
if (whisperChoice == WhisperChoice.Cpp)
diff --git a/src/ui/Forms/Options/Settings.cs b/src/ui/Forms/Options/Settings.cs
index af42e0cf6..7c16c2051 100644
--- a/src/ui/Forms/Options/Settings.cs
+++ b/src/ui/Forms/Options/Settings.cs
@@ -44,7 +44,6 @@ namespace Nikse.SubtitleEdit.Forms.Options
private List _rulesProfiles;
private List _pluginShortcuts;
private readonly bool _loading;
- private readonly BackgroundWorker _shortcutsBackgroundWorker;
private string _defaultLanguages;
private static IEnumerable GetSubtitleFormats() => SubtitleFormat.AllSubtitleFormats.Where(format => !format.IsVobSubIndexFile).Select(format => format.FriendlyName);
@@ -130,7 +129,6 @@ namespace Nikse.SubtitleEdit.Forms.Options
"Network",
"File type associations"});
- _shortcutsBackgroundWorker = new BackgroundWorker();
Init();
_loading = false;
@@ -139,12 +137,7 @@ namespace Nikse.SubtitleEdit.Forms.Options
public void Init()
{
- _shortcutsBackgroundWorker.DoWork += (sender, args) =>
- {
- MakeShortcutsTreeView(LanguageSettings.Current.Settings);
- };
- _shortcutsBackgroundWorker.RunWorkerAsync();
-
+ MakeShortcutsTreeView(LanguageSettings.Current.Settings);
labelStatus.Text = string.Empty;
_rulesProfiles = new List(Configuration.Settings.General.Profiles);
var gs = Configuration.Settings.General;
@@ -2508,12 +2501,6 @@ namespace Nikse.SubtitleEdit.Forms.Options
case ShortcutsSection:
section = panelShortcuts;
Cursor = Cursors.WaitCursor;
- while (_shortcutsBackgroundWorker.IsBusy)
- {
- System.Threading.Thread.Sleep(10);
- Application.DoEvents();
- }
-
ShowShortcutsTreeView();
Cursor = Cursors.Default;
break;