From 07c966d19bf6b6a14b5d419efb64095f82648b3e Mon Sep 17 00:00:00 2001 From: niksedk Date: Mon, 3 Oct 2022 18:54:55 +0200 Subject: [PATCH] Refact audio-to-text folder structure --- LanguageBaseEnglish.xml | 7 +- .../VoskAudioToText.Designer.cs} | 4 +- .../VoskAudioToText.cs} | 8 +- .../VoskAudioToText.resx} | 0 .../VoskAudioToTextSelectedLines.Designer.cs} | 4 +- .../VoskAudioToTextSelectedLines.cs} | 17 +- .../VoskAudioToTextSelectedLines.resx} | 0 .../VoskDictate.Designer.cs} | 4 +- .../Dictate.cs => AudioToText/VoskDictate.cs} | 26 +- .../VoskDictate.resx} | 0 .../VoskModelDownload.Designer.cs} | 4 +- .../VoskModelDownload.cs} | 6 +- .../VoskModelDownload.resx} | 0 .../WhisperAudioToText.Designer.cs} | 4 +- .../WhisperAudioToText.cs} | 17 +- .../WhisperAudioToText.resx} | 0 ...hisperAudioToTextSelectedLines.Designer.cs | 327 +++++++++++++ .../WhisperAudioToTextSelectedLines.cs | 446 ++++++++++++++++++ .../WhisperAudioToTextSelectedLines.resx | 123 +++++ .../WhisperModelDownload.Designer.cs | 2 +- .../WhisperModelDownload.cs | 10 +- .../WhisperModelDownload.resx | 0 src/ui/Forms/Main.cs | 66 ++- src/ui/Logic/Language.cs | 2 +- src/ui/Logic/LanguageDeserializer.cs | 19 +- src/ui/Logic/LanguageStructure.cs | 2 +- src/ui/SubtitleEdit.csproj | 65 +-- 27 files changed, 1060 insertions(+), 103 deletions(-) rename src/ui/Forms/{SpeechRecognition/AudioToText.Designer.cs => AudioToText/VoskAudioToText.Designer.cs} (99%) rename src/ui/Forms/{SpeechRecognition/AudioToText.cs => AudioToText/VoskAudioToText.cs} (99%) rename src/ui/Forms/{SpeechRecognition/AudioToText.resx => AudioToText/VoskAudioToText.resx} (100%) rename src/ui/Forms/{SpeechRecognition/AudioToTextSelectedLines.Designer.cs => AudioToText/VoskAudioToTextSelectedLines.Designer.cs} (99%) rename src/ui/Forms/{SpeechRecognition/AudioToTextSelectedLines.cs => AudioToText/VoskAudioToTextSelectedLines.cs} (95%) rename src/ui/Forms/{SpeechRecognition/AudioToTextSelectedLines.resx => AudioToText/VoskAudioToTextSelectedLines.resx} (100%) rename src/ui/Forms/{SpeechRecognition/Dictate.Designer.cs => AudioToText/VoskDictate.Designer.cs} (99%) rename src/ui/Forms/{SpeechRecognition/Dictate.cs => AudioToText/VoskDictate.cs} (91%) rename src/ui/Forms/{SpeechRecognition/AudioToTextModelDownload.resx => AudioToText/VoskDictate.resx} (100%) rename src/ui/Forms/{SpeechRecognition/AudioToTextModelDownload.Designer.cs => AudioToText/VoskModelDownload.Designer.cs} (98%) rename src/ui/Forms/{SpeechRecognition/AudioToTextModelDownload.cs => AudioToText/VoskModelDownload.cs} (97%) rename src/ui/Forms/{SpeechRecognition/Dictate.resx => AudioToText/VoskModelDownload.resx} (100%) rename src/ui/Forms/{SpeechRecognition/AudioToTextWhisper.Designer.cs => AudioToText/WhisperAudioToText.Designer.cs} (99%) rename src/ui/Forms/{SpeechRecognition/AudioToTextWhisper.cs => AudioToText/WhisperAudioToText.cs} (99%) rename src/ui/Forms/{SpeechRecognition/AudioToTextWhisper.resx => AudioToText/WhisperAudioToText.resx} (100%) create mode 100644 src/ui/Forms/AudioToText/WhisperAudioToTextSelectedLines.Designer.cs create mode 100644 src/ui/Forms/AudioToText/WhisperAudioToTextSelectedLines.cs create mode 100644 src/ui/Forms/AudioToText/WhisperAudioToTextSelectedLines.resx rename src/ui/Forms/{SpeechRecognition => AudioToText}/WhisperModelDownload.Designer.cs (99%) rename src/ui/Forms/{SpeechRecognition => AudioToText}/WhisperModelDownload.cs (97%) rename src/ui/Forms/{SpeechRecognition => AudioToText}/WhisperModelDownload.resx (100%) diff --git a/LanguageBaseEnglish.xml b/LanguageBaseEnglish.xml index 03ece231c..c1d25affd 100644 --- a/LanguageBaseEnglish.xml +++ b/LanguageBaseEnglish.xml @@ -176,9 +176,13 @@ Note: Do check free disk space. Audio to text Generate text from audio via Vosk/Kaldi speech recognition + Generate text from audio via Whisper speech recognition Vosk website + Whisper website Models + Languages and models Choose model + Choose model Open models folder Loading Vosk speech recognition model... Transcribing audio to text... @@ -186,6 +190,7 @@ Note: Do check free disk space. {0} files saved to video source folder Use post-processing (line merge, fix casing, punctuation, and more) Batch mode + Keep partial transcription Advanced Sub Station Alpha attachments @@ -1265,7 +1270,7 @@ To use an API key, go to "Options -> Settings -> Tools" to enter your Goog Generate text from video... Generate blank video... Generate video with burned-in sub... - Audio to text... + Audio to text ({0})... Import chapters from video Generate/import shot changes... Remove/export shot changes... diff --git a/src/ui/Forms/SpeechRecognition/AudioToText.Designer.cs b/src/ui/Forms/AudioToText/VoskAudioToText.Designer.cs similarity index 99% rename from src/ui/Forms/SpeechRecognition/AudioToText.Designer.cs rename to src/ui/Forms/AudioToText/VoskAudioToText.Designer.cs index 5203c0820..c62d36eb2 100644 --- a/src/ui/Forms/SpeechRecognition/AudioToText.Designer.cs +++ b/src/ui/Forms/AudioToText/VoskAudioToText.Designer.cs @@ -1,6 +1,6 @@ -namespace Nikse.SubtitleEdit.Forms.SpeechRecognition +namespace Nikse.SubtitleEdit.Forms.AudioToText { - sealed partial class AudioToText + sealed partial class VoskAudioToText { /// /// Required designer variable. diff --git a/src/ui/Forms/SpeechRecognition/AudioToText.cs b/src/ui/Forms/AudioToText/VoskAudioToText.cs similarity index 99% rename from src/ui/Forms/SpeechRecognition/AudioToText.cs rename to src/ui/Forms/AudioToText/VoskAudioToText.cs index 000e4b7d8..d17a31550 100644 --- a/src/ui/Forms/SpeechRecognition/AudioToText.cs +++ b/src/ui/Forms/AudioToText/VoskAudioToText.cs @@ -13,9 +13,9 @@ using Nikse.SubtitleEdit.Core.SubtitleFormats; using Nikse.SubtitleEdit.Logic; using Vosk; -namespace Nikse.SubtitleEdit.Forms.SpeechRecognition +namespace Nikse.SubtitleEdit.Forms.AudioToText { - public sealed partial class AudioToText : Form + public sealed partial class VoskAudioToText : Form { private readonly string _videoFileName; private readonly int _audioTrackNumber; @@ -34,7 +34,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition public Subtitle TranscribedSubtitle { get; private set; } - public AudioToText(string videoFileName, int audioTrackNumber, Form parentForm) + public VoskAudioToText(string videoFileName, int audioTrackNumber, Form parentForm) { UiUtil.PreInitialize(this); InitializeComponent(); @@ -611,7 +611,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition private void buttonDownload_Click(object sender, EventArgs e) { - using (var form = new AudioToTextModelDownload { AutoClose = true }) + using (var form = new VoskModelDownload { AutoClose = true }) { form.ShowDialog(this); FillModels(comboBoxModels, form.LastDownloadedModel); diff --git a/src/ui/Forms/SpeechRecognition/AudioToText.resx b/src/ui/Forms/AudioToText/VoskAudioToText.resx similarity index 100% rename from src/ui/Forms/SpeechRecognition/AudioToText.resx rename to src/ui/Forms/AudioToText/VoskAudioToText.resx diff --git a/src/ui/Forms/SpeechRecognition/AudioToTextSelectedLines.Designer.cs b/src/ui/Forms/AudioToText/VoskAudioToTextSelectedLines.Designer.cs similarity index 99% rename from src/ui/Forms/SpeechRecognition/AudioToTextSelectedLines.Designer.cs rename to src/ui/Forms/AudioToText/VoskAudioToTextSelectedLines.Designer.cs index d3335809c..97073eaab 100644 --- a/src/ui/Forms/SpeechRecognition/AudioToTextSelectedLines.Designer.cs +++ b/src/ui/Forms/AudioToText/VoskAudioToTextSelectedLines.Designer.cs @@ -1,6 +1,6 @@ -namespace Nikse.SubtitleEdit.Forms.SpeechRecognition +namespace Nikse.SubtitleEdit.Forms.AudioToText { - sealed partial class AudioToTextSelectedLines + sealed partial class VoskAudioToTextSelectedLines { /// /// Required designer variable. diff --git a/src/ui/Forms/SpeechRecognition/AudioToTextSelectedLines.cs b/src/ui/Forms/AudioToText/VoskAudioToTextSelectedLines.cs similarity index 95% rename from src/ui/Forms/SpeechRecognition/AudioToTextSelectedLines.cs rename to src/ui/Forms/AudioToText/VoskAudioToTextSelectedLines.cs index 1a6b259d2..23db7649b 100644 --- a/src/ui/Forms/SpeechRecognition/AudioToTextSelectedLines.cs +++ b/src/ui/Forms/AudioToText/VoskAudioToTextSelectedLines.cs @@ -1,6 +1,5 @@ using System; using System.Collections.Generic; -using System.Globalization; using System.IO; using System.Linq; using System.Text; @@ -10,9 +9,9 @@ using Nikse.SubtitleEdit.Core.Common; using Nikse.SubtitleEdit.Logic; using Vosk; -namespace Nikse.SubtitleEdit.Forms.SpeechRecognition +namespace Nikse.SubtitleEdit.Forms.AudioToText { - public sealed partial class AudioToTextSelectedLines : Form + public sealed partial class VoskAudioToTextSelectedLines : Form { private readonly string _voskFolder; private bool _cancel; @@ -26,7 +25,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition public Subtitle TranscribedSubtitle { get; private set; } - public AudioToTextSelectedLines(List audioClips, Form parentForm) + public VoskAudioToTextSelectedLines(List audioClips, Form parentForm) { UiUtil.PreInitialize(this); InitializeComponent(); @@ -48,7 +47,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition checkBoxUsePostProcessing.Checked = Configuration.Settings.Tools.VoskPostProcessing; _voskFolder = Path.Combine(Configuration.DataDirectory, "Vosk"); - AudioToText.FillModels(comboBoxModels, string.Empty); + VoskAudioToText.FillModels(comboBoxModels, string.Empty); textBoxLog.Visible = false; textBoxLog.Dock = DockStyle.Fill; @@ -232,7 +231,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition if (rec.AcceptWaveform(buffer, bytesRead)) { var res = rec.Result(); - var results = AudioToText.ParseJsonToResult(res); + var results = VoskAudioToText.ParseJsonToResult(res); list.AddRange(results); } else @@ -250,7 +249,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition } var finalResult = rec.FinalResult(); - var finalResults = AudioToText.ParseJsonToResult(finalResult); + var finalResults = VoskAudioToText.ParseJsonToResult(finalResult); list.AddRange(finalResults); timer1.Stop(); return list; @@ -344,10 +343,10 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition private void buttonDownload_Click(object sender, EventArgs e) { - using (var form = new AudioToTextModelDownload { AutoClose = true }) + using (var form = new VoskModelDownload { AutoClose = true }) { form.ShowDialog(this); - AudioToText.FillModels(comboBoxModels, form.LastDownloadedModel); + VoskAudioToText.FillModels(comboBoxModels, form.LastDownloadedModel); } } diff --git a/src/ui/Forms/SpeechRecognition/AudioToTextSelectedLines.resx b/src/ui/Forms/AudioToText/VoskAudioToTextSelectedLines.resx similarity index 100% rename from src/ui/Forms/SpeechRecognition/AudioToTextSelectedLines.resx rename to src/ui/Forms/AudioToText/VoskAudioToTextSelectedLines.resx diff --git a/src/ui/Forms/SpeechRecognition/Dictate.Designer.cs b/src/ui/Forms/AudioToText/VoskDictate.Designer.cs similarity index 99% rename from src/ui/Forms/SpeechRecognition/Dictate.Designer.cs rename to src/ui/Forms/AudioToText/VoskDictate.Designer.cs index dc569a67f..808c51339 100644 --- a/src/ui/Forms/SpeechRecognition/Dictate.Designer.cs +++ b/src/ui/Forms/AudioToText/VoskDictate.Designer.cs @@ -1,6 +1,6 @@ -namespace Nikse.SubtitleEdit.Forms.SpeechRecognition +namespace Nikse.SubtitleEdit.Forms.AudioToText { - partial class Dictate + partial class VoskDictate { /// /// Required designer variable. diff --git a/src/ui/Forms/SpeechRecognition/Dictate.cs b/src/ui/Forms/AudioToText/VoskDictate.cs similarity index 91% rename from src/ui/Forms/SpeechRecognition/Dictate.cs rename to src/ui/Forms/AudioToText/VoskDictate.cs index ec58a8942..4558b7200 100644 --- a/src/ui/Forms/SpeechRecognition/Dictate.cs +++ b/src/ui/Forms/AudioToText/VoskDictate.cs @@ -1,17 +1,17 @@ -using NAudio.Wave; -using Nikse.SubtitleEdit.Core.AudioToText; -using Nikse.SubtitleEdit.Core.Common; -using Nikse.SubtitleEdit.Logic; -using System; +using System; using System.Collections.Generic; using System.IO; using System.Text; using System.Windows.Forms; +using NAudio.Wave; +using Nikse.SubtitleEdit.Core.AudioToText; +using Nikse.SubtitleEdit.Core.Common; +using Nikse.SubtitleEdit.Logic; using Vosk; -namespace Nikse.SubtitleEdit.Forms.SpeechRecognition +namespace Nikse.SubtitleEdit.Forms.AudioToText { - public partial class Dictate : Form + public partial class VoskDictate : Form { private static WaveFileWriter _waveFile; private static Model _model; @@ -21,7 +21,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition public static bool RecordingOn { get; set; } public static double RecordingVolumePercent { get; set; } - public Dictate() + public VoskDictate() { UiUtil.PreInitialize(this); InitializeComponent(); @@ -33,7 +33,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition buttonOK.Text = LanguageSettings.Current.General.Ok; buttonCancel.Text = LanguageSettings.Current.General.Cancel; UiUtil.FixLargeFonts(this, buttonOK); - AudioToText.FillModels(comboBoxModels, string.Empty); + VoskAudioToText.FillModels(comboBoxModels, string.Empty); checkBoxUsePostProcessing.Checked = Configuration.Settings.Tools.VoskPostProcessing; } @@ -82,7 +82,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition if (rec.AcceptWaveform(buffer, bytesRead)) { var res = rec.Result(); - var results = AudioToText.ParseJsonToResult(res); + var results = VoskAudioToText.ParseJsonToResult(res); list.AddRange(results); } else @@ -93,7 +93,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition } var finalResult = rec.FinalResult(); - var finalResults = AudioToText.ParseJsonToResult(finalResult); + var finalResults = VoskAudioToText.ParseJsonToResult(finalResult); list.AddRange(finalResults); try @@ -175,10 +175,10 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition private void buttonDownload_Click(object sender, EventArgs e) { - using (var form = new AudioToTextModelDownload { AutoClose = true }) + using (var form = new VoskModelDownload { AutoClose = true }) { form.ShowDialog(this); - AudioToText.FillModels(comboBoxModels, form.LastDownloadedModel); + VoskAudioToText.FillModels(comboBoxModels, form.LastDownloadedModel); } } diff --git a/src/ui/Forms/SpeechRecognition/AudioToTextModelDownload.resx b/src/ui/Forms/AudioToText/VoskDictate.resx similarity index 100% rename from src/ui/Forms/SpeechRecognition/AudioToTextModelDownload.resx rename to src/ui/Forms/AudioToText/VoskDictate.resx diff --git a/src/ui/Forms/SpeechRecognition/AudioToTextModelDownload.Designer.cs b/src/ui/Forms/AudioToText/VoskModelDownload.Designer.cs similarity index 98% rename from src/ui/Forms/SpeechRecognition/AudioToTextModelDownload.Designer.cs rename to src/ui/Forms/AudioToText/VoskModelDownload.Designer.cs index 2739291e0..424a29749 100644 --- a/src/ui/Forms/SpeechRecognition/AudioToTextModelDownload.Designer.cs +++ b/src/ui/Forms/AudioToText/VoskModelDownload.Designer.cs @@ -1,6 +1,6 @@ -namespace Nikse.SubtitleEdit.Forms.SpeechRecognition +namespace Nikse.SubtitleEdit.Forms.AudioToText { - sealed partial class AudioToTextModelDownload + sealed partial class VoskModelDownload { /// /// Required designer variable. diff --git a/src/ui/Forms/SpeechRecognition/AudioToTextModelDownload.cs b/src/ui/Forms/AudioToText/VoskModelDownload.cs similarity index 97% rename from src/ui/Forms/SpeechRecognition/AudioToTextModelDownload.cs rename to src/ui/Forms/AudioToText/VoskModelDownload.cs index f186d9738..3df436c81 100644 --- a/src/ui/Forms/SpeechRecognition/AudioToTextModelDownload.cs +++ b/src/ui/Forms/AudioToText/VoskModelDownload.cs @@ -7,15 +7,15 @@ using Nikse.SubtitleEdit.Core.AudioToText; using Nikse.SubtitleEdit.Core.Common; using Nikse.SubtitleEdit.Logic; -namespace Nikse.SubtitleEdit.Forms.SpeechRecognition +namespace Nikse.SubtitleEdit.Forms.AudioToText { - public sealed partial class AudioToTextModelDownload : Form + public sealed partial class VoskModelDownload : Form { public bool AutoClose { get; internal set; } public string LastDownloadedModel { get; internal set; } private readonly CancellationTokenSource _cancellationTokenSource; - public AudioToTextModelDownload() + public VoskModelDownload() { UiUtil.PreInitialize(this); InitializeComponent(); diff --git a/src/ui/Forms/SpeechRecognition/Dictate.resx b/src/ui/Forms/AudioToText/VoskModelDownload.resx similarity index 100% rename from src/ui/Forms/SpeechRecognition/Dictate.resx rename to src/ui/Forms/AudioToText/VoskModelDownload.resx diff --git a/src/ui/Forms/SpeechRecognition/AudioToTextWhisper.Designer.cs b/src/ui/Forms/AudioToText/WhisperAudioToText.Designer.cs similarity index 99% rename from src/ui/Forms/SpeechRecognition/AudioToTextWhisper.Designer.cs rename to src/ui/Forms/AudioToText/WhisperAudioToText.Designer.cs index 8fce420fb..9be69ce26 100644 --- a/src/ui/Forms/SpeechRecognition/AudioToTextWhisper.Designer.cs +++ b/src/ui/Forms/AudioToText/WhisperAudioToText.Designer.cs @@ -1,6 +1,6 @@ -namespace Nikse.SubtitleEdit.Forms.SpeechRecognition +namespace Nikse.SubtitleEdit.Forms.AudioToText { - sealed partial class AudioToTextWhisper + sealed partial class WhisperAudioToText { /// /// Required designer variable. diff --git a/src/ui/Forms/SpeechRecognition/AudioToTextWhisper.cs b/src/ui/Forms/AudioToText/WhisperAudioToText.cs similarity index 99% rename from src/ui/Forms/SpeechRecognition/AudioToTextWhisper.cs rename to src/ui/Forms/AudioToText/WhisperAudioToText.cs index c7110c141..971b7cc65 100644 --- a/src/ui/Forms/SpeechRecognition/AudioToTextWhisper.cs +++ b/src/ui/Forms/AudioToText/WhisperAudioToText.cs @@ -1,8 +1,4 @@ -using Nikse.SubtitleEdit.Core.AudioToText; -using Nikse.SubtitleEdit.Core.Common; -using Nikse.SubtitleEdit.Core.SubtitleFormats; -using Nikse.SubtitleEdit.Logic; -using System; +using System; using System.Collections.Generic; using System.Diagnostics; using System.Drawing; @@ -11,10 +7,14 @@ using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Windows.Forms; +using Nikse.SubtitleEdit.Core.AudioToText; +using Nikse.SubtitleEdit.Core.Common; +using Nikse.SubtitleEdit.Core.SubtitleFormats; +using Nikse.SubtitleEdit.Logic; -namespace Nikse.SubtitleEdit.Forms.SpeechRecognition +namespace Nikse.SubtitleEdit.Forms.AudioToText { - public sealed partial class AudioToTextWhisper : Form + public sealed partial class WhisperAudioToText : Form { private readonly string _videoFileName; private readonly int _audioTrackNumber; @@ -31,7 +31,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition public Subtitle TranscribedSubtitle { get; private set; } - public AudioToTextWhisper(string videoFileName, int audioTrackNumber, Form parentForm) + public WhisperAudioToText(string videoFileName, int audioTrackNumber, Form parentForm) { UiUtil.PreInitialize(this); InitializeComponent(); @@ -299,7 +299,6 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition var process = GetWhisperProcess(waveFileName, model.Name, comboBoxLanguages.Text, OutputHandler); ShowProgressBar(); progressBar1.Style = ProgressBarStyle.Marquee; - double seconds = 0; buttonCancel.Visible = true; try { diff --git a/src/ui/Forms/SpeechRecognition/AudioToTextWhisper.resx b/src/ui/Forms/AudioToText/WhisperAudioToText.resx similarity index 100% rename from src/ui/Forms/SpeechRecognition/AudioToTextWhisper.resx rename to src/ui/Forms/AudioToText/WhisperAudioToText.resx diff --git a/src/ui/Forms/AudioToText/WhisperAudioToTextSelectedLines.Designer.cs b/src/ui/Forms/AudioToText/WhisperAudioToTextSelectedLines.Designer.cs new file mode 100644 index 000000000..ed51ec6ba --- /dev/null +++ b/src/ui/Forms/AudioToText/WhisperAudioToTextSelectedLines.Designer.cs @@ -0,0 +1,327 @@ +namespace Nikse.SubtitleEdit.Forms.AudioToText +{ + sealed partial class WhisperAudioToTextSelectedLines + { + /// + /// Required designer variable. + /// + private System.ComponentModel.IContainer components = null; + + /// + /// Clean up any resources being used. + /// + /// true if managed resources should be disposed; otherwise, false. + protected override void Dispose(bool disposing) + { + if (disposing && (components != null)) + { + components.Dispose(); + } + base.Dispose(disposing); + } + + #region Windows Form Designer generated code + + /// + /// Required method for Designer support - do not modify + /// the contents of this method with the code editor. + /// + private void InitializeComponent() + { + this.components = new System.ComponentModel.Container(); + this.buttonCancel = new System.Windows.Forms.Button(); + this.buttonGenerate = new System.Windows.Forms.Button(); + this.progressBar1 = new System.Windows.Forms.ProgressBar(); + this.labelProgress = new System.Windows.Forms.Label(); + this.textBoxLog = new System.Windows.Forms.TextBox(); + this.labelInfo = new System.Windows.Forms.Label(); + this.groupBoxModels = new System.Windows.Forms.GroupBox(); + this.buttonDownload = new System.Windows.Forms.Button(); + this.linkLabelOpenModelsFolder = new System.Windows.Forms.LinkLabel(); + this.labelModel = new System.Windows.Forms.Label(); + this.comboBoxModels = new System.Windows.Forms.ComboBox(); + this.linkLabeWhisperWebSite = new System.Windows.Forms.LinkLabel(); + this.labelTime = new System.Windows.Forms.Label(); + this.timer1 = new System.Windows.Forms.Timer(this.components); + this.checkBoxUsePostProcessing = new System.Windows.Forms.CheckBox(); + this.groupBoxInputFiles = new System.Windows.Forms.GroupBox(); + this.listViewInputFiles = new System.Windows.Forms.ListView(); + this.columnHeaderFileName = ((System.Windows.Forms.ColumnHeader)(new System.Windows.Forms.ColumnHeader())); + this.labelChooseLanguage = new System.Windows.Forms.Label(); + this.comboBoxLanguages = new System.Windows.Forms.ComboBox(); + this.groupBoxModels.SuspendLayout(); + this.groupBoxInputFiles.SuspendLayout(); + this.SuspendLayout(); + // + // buttonCancel + // + this.buttonCancel.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); + this.buttonCancel.DialogResult = System.Windows.Forms.DialogResult.Cancel; + this.buttonCancel.ImeMode = System.Windows.Forms.ImeMode.NoControl; + this.buttonCancel.Location = new System.Drawing.Point(622, 427); + this.buttonCancel.Name = "buttonCancel"; + this.buttonCancel.Size = new System.Drawing.Size(75, 23); + this.buttonCancel.TabIndex = 6; + this.buttonCancel.Text = "C&ancel"; + this.buttonCancel.UseVisualStyleBackColor = true; + this.buttonCancel.Click += new System.EventHandler(this.buttonCancel_Click); + // + // buttonGenerate + // + this.buttonGenerate.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); + this.buttonGenerate.ImeMode = System.Windows.Forms.ImeMode.NoControl; + this.buttonGenerate.Location = new System.Drawing.Point(491, 427); + this.buttonGenerate.Name = "buttonGenerate"; + this.buttonGenerate.Size = new System.Drawing.Size(125, 23); + this.buttonGenerate.TabIndex = 5; + this.buttonGenerate.Text = "&Generate"; + this.buttonGenerate.UseVisualStyleBackColor = true; + this.buttonGenerate.Click += new System.EventHandler(this.ButtonGenerate_Click); + // + // progressBar1 + // + this.progressBar1.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left) + | System.Windows.Forms.AnchorStyles.Right))); + this.progressBar1.Location = new System.Drawing.Point(12, 427); + this.progressBar1.Name = "progressBar1"; + this.progressBar1.Size = new System.Drawing.Size(473, 12); + this.progressBar1.TabIndex = 4; + this.progressBar1.Visible = false; + // + // labelProgress + // + this.labelProgress.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left))); + this.labelProgress.AutoSize = true; + this.labelProgress.Location = new System.Drawing.Point(12, 409); + this.labelProgress.Name = "labelProgress"; + this.labelProgress.Size = new System.Drawing.Size(70, 13); + this.labelProgress.TabIndex = 4; + this.labelProgress.Text = "labelProgress"; + // + // textBoxLog + // + this.textBoxLog.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) + | System.Windows.Forms.AnchorStyles.Left) + | System.Windows.Forms.AnchorStyles.Right))); + this.textBoxLog.Location = new System.Drawing.Point(529, 9); + this.textBoxLog.Multiline = true; + this.textBoxLog.Name = "textBoxLog"; + this.textBoxLog.ScrollBars = System.Windows.Forms.ScrollBars.Both; + this.textBoxLog.Size = new System.Drawing.Size(168, 258); + this.textBoxLog.TabIndex = 0; + // + // labelInfo + // + this.labelInfo.AutoSize = true; + this.labelInfo.Location = new System.Drawing.Point(12, 9); + this.labelInfo.Name = "labelInfo"; + this.labelInfo.Size = new System.Drawing.Size(275, 13); + this.labelInfo.TabIndex = 1; + this.labelInfo.Text = "Generate text from audio via Whisper speech recognition"; + // + // groupBoxModels + // + this.groupBoxModels.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) + | System.Windows.Forms.AnchorStyles.Right))); + this.groupBoxModels.Controls.Add(this.labelChooseLanguage); + this.groupBoxModels.Controls.Add(this.comboBoxLanguages); + this.groupBoxModels.Controls.Add(this.buttonDownload); + this.groupBoxModels.Controls.Add(this.linkLabelOpenModelsFolder); + this.groupBoxModels.Controls.Add(this.labelModel); + this.groupBoxModels.Controls.Add(this.comboBoxModels); + this.groupBoxModels.Location = new System.Drawing.Point(15, 66); + this.groupBoxModels.Name = "groupBoxModels"; + this.groupBoxModels.Size = new System.Drawing.Size(682, 82); + this.groupBoxModels.TabIndex = 1; + this.groupBoxModels.TabStop = false; + this.groupBoxModels.Text = "Models"; + // + // buttonDownload + // + this.buttonDownload.ImeMode = System.Windows.Forms.ImeMode.NoControl; + this.buttonDownload.Location = new System.Drawing.Point(503, 51); + this.buttonDownload.Name = "buttonDownload"; + this.buttonDownload.Size = new System.Drawing.Size(28, 23); + this.buttonDownload.TabIndex = 1; + this.buttonDownload.Text = "..."; + this.buttonDownload.UseVisualStyleBackColor = true; + this.buttonDownload.Click += new System.EventHandler(this.buttonDownload_Click); + // + // linkLabelOpenModelsFolder + // + this.linkLabelOpenModelsFolder.AutoSize = true; + this.linkLabelOpenModelsFolder.Location = new System.Drawing.Point(539, 59); + this.linkLabelOpenModelsFolder.Name = "linkLabelOpenModelsFolder"; + this.linkLabelOpenModelsFolder.Size = new System.Drawing.Size(98, 13); + this.linkLabelOpenModelsFolder.TabIndex = 2; + this.linkLabelOpenModelsFolder.TabStop = true; + this.linkLabelOpenModelsFolder.Text = "Open models folder"; + this.linkLabelOpenModelsFolder.LinkClicked += new System.Windows.Forms.LinkLabelLinkClickedEventHandler(this.linkLabelOpenModelFolder_LinkClicked); + // + // labelModel + // + this.labelModel.AutoSize = true; + this.labelModel.Location = new System.Drawing.Point(254, 37); + this.labelModel.Name = "labelModel"; + this.labelModel.Size = new System.Drawing.Size(167, 13); + this.labelModel.TabIndex = 0; + this.labelModel.Text = "Choose speech recognition model"; + // + // comboBoxModels + // + this.comboBoxModels.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboBoxModels.FormattingEnabled = true; + this.comboBoxModels.Location = new System.Drawing.Point(257, 53); + this.comboBoxModels.Name = "comboBoxModels"; + this.comboBoxModels.Size = new System.Drawing.Size(240, 21); + this.comboBoxModels.TabIndex = 0; + // + // linkLabeWhisperWebSite + // + this.linkLabeWhisperWebSite.AutoSize = true; + this.linkLabeWhisperWebSite.Location = new System.Drawing.Point(12, 26); + this.linkLabeWhisperWebSite.Name = "linkLabeWhisperWebSite"; + this.linkLabeWhisperWebSite.Size = new System.Drawing.Size(85, 13); + this.linkLabeWhisperWebSite.TabIndex = 0; + this.linkLabeWhisperWebSite.TabStop = true; + this.linkLabeWhisperWebSite.Text = "Whisper website"; + this.linkLabeWhisperWebSite.LinkClicked += new System.Windows.Forms.LinkLabelLinkClickedEventHandler(this.linkLabelWhisperWebsite_LinkClicked); + // + // labelTime + // + this.labelTime.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left))); + this.labelTime.AutoSize = true; + this.labelTime.Location = new System.Drawing.Point(12, 442); + this.labelTime.Name = "labelTime"; + this.labelTime.Size = new System.Drawing.Size(88, 13); + this.labelTime.TabIndex = 6; + this.labelTime.Text = "Remaining time..."; + // + // timer1 + // + this.timer1.Interval = 1000; + this.timer1.Tick += new System.EventHandler(this.timer1_Tick); + // + // checkBoxUsePostProcessing + // + this.checkBoxUsePostProcessing.AutoSize = true; + this.checkBoxUsePostProcessing.Location = new System.Drawing.Point(15, 162); + this.checkBoxUsePostProcessing.Name = "checkBoxUsePostProcessing"; + this.checkBoxUsePostProcessing.Size = new System.Drawing.Size(312, 17); + this.checkBoxUsePostProcessing.TabIndex = 2; + this.checkBoxUsePostProcessing.Text = "Use post-processing (line merge, fix casing, and punctuation)"; + this.checkBoxUsePostProcessing.UseVisualStyleBackColor = true; + // + // groupBoxInputFiles + // + this.groupBoxInputFiles.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) + | System.Windows.Forms.AnchorStyles.Left) + | System.Windows.Forms.AnchorStyles.Right))); + this.groupBoxInputFiles.Controls.Add(this.listViewInputFiles); + this.groupBoxInputFiles.Location = new System.Drawing.Point(15, 200); + this.groupBoxInputFiles.Name = "groupBoxInputFiles"; + this.groupBoxInputFiles.Size = new System.Drawing.Size(682, 185); + this.groupBoxInputFiles.TabIndex = 3; + this.groupBoxInputFiles.TabStop = false; + this.groupBoxInputFiles.Text = "Input files"; + // + // listViewInputFiles + // + this.listViewInputFiles.AllowDrop = true; + this.listViewInputFiles.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) + | System.Windows.Forms.AnchorStyles.Left) + | System.Windows.Forms.AnchorStyles.Right))); + this.listViewInputFiles.Columns.AddRange(new System.Windows.Forms.ColumnHeader[] { + this.columnHeaderFileName}); + this.listViewInputFiles.FullRowSelect = true; + this.listViewInputFiles.HideSelection = false; + this.listViewInputFiles.Location = new System.Drawing.Point(6, 18); + this.listViewInputFiles.Name = "listViewInputFiles"; + this.listViewInputFiles.Size = new System.Drawing.Size(670, 150); + this.listViewInputFiles.TabIndex = 0; + this.listViewInputFiles.UseCompatibleStateImageBehavior = false; + this.listViewInputFiles.View = System.Windows.Forms.View.Details; + // + // columnHeaderFileName + // + this.columnHeaderFileName.Text = "File name"; + this.columnHeaderFileName.Width = 455; + // + // labelChooseLanguage + // + this.labelChooseLanguage.AutoSize = true; + this.labelChooseLanguage.Location = new System.Drawing.Point(3, 37); + this.labelChooseLanguage.Name = "labelChooseLanguage"; + this.labelChooseLanguage.Size = new System.Drawing.Size(90, 13); + this.labelChooseLanguage.TabIndex = 6; + this.labelChooseLanguage.Text = "Choose language"; + // + // comboBoxLanguages + // + this.comboBoxLanguages.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboBoxLanguages.FormattingEnabled = true; + this.comboBoxLanguages.Location = new System.Drawing.Point(6, 53); + this.comboBoxLanguages.Name = "comboBoxLanguages"; + this.comboBoxLanguages.Size = new System.Drawing.Size(194, 21); + this.comboBoxLanguages.TabIndex = 7; + // + // WhisperAudioToTextSelectedLines + // + this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); + this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; + this.ClientSize = new System.Drawing.Size(709, 464); + this.Controls.Add(this.groupBoxInputFiles); + this.Controls.Add(this.checkBoxUsePostProcessing); + this.Controls.Add(this.labelTime); + this.Controls.Add(this.linkLabeWhisperWebSite); + this.Controls.Add(this.groupBoxModels); + this.Controls.Add(this.labelInfo); + this.Controls.Add(this.labelProgress); + this.Controls.Add(this.progressBar1); + this.Controls.Add(this.buttonCancel); + this.Controls.Add(this.buttonGenerate); + this.Controls.Add(this.textBoxLog); + this.KeyPreview = true; + this.MinimumSize = new System.Drawing.Size(720, 450); + this.Name = "WhisperAudioToTextSelectedLines"; + this.ShowIcon = false; + this.ShowInTaskbar = false; + this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent; + this.Text = "Audio to text"; + this.FormClosing += new System.Windows.Forms.FormClosingEventHandler(this.AudioToText_FormClosing); + this.Load += new System.EventHandler(this.AudioToText_Load); + this.Shown += new System.EventHandler(this.AudioToTextSelectedLines_Shown); + this.ResizeEnd += new System.EventHandler(this.AudioToTextSelectedLines_ResizeEnd); + this.KeyDown += new System.Windows.Forms.KeyEventHandler(this.AudioToText_KeyDown); + this.groupBoxModels.ResumeLayout(false); + this.groupBoxModels.PerformLayout(); + this.groupBoxInputFiles.ResumeLayout(false); + this.ResumeLayout(false); + this.PerformLayout(); + + } + + #endregion + + private System.Windows.Forms.Button buttonCancel; + private System.Windows.Forms.Button buttonGenerate; + private System.Windows.Forms.ProgressBar progressBar1; + private System.Windows.Forms.Label labelProgress; + private System.Windows.Forms.TextBox textBoxLog; + private System.Windows.Forms.Label labelInfo; + private System.Windows.Forms.GroupBox groupBoxModels; + private System.Windows.Forms.LinkLabel linkLabeWhisperWebSite; + private System.Windows.Forms.Label labelModel; + private System.Windows.Forms.ComboBox comboBoxModels; + private System.Windows.Forms.LinkLabel linkLabelOpenModelsFolder; + private System.Windows.Forms.Label labelTime; + private System.Windows.Forms.Timer timer1; + private System.Windows.Forms.CheckBox checkBoxUsePostProcessing; + private System.Windows.Forms.Button buttonDownload; + private System.Windows.Forms.GroupBox groupBoxInputFiles; + private System.Windows.Forms.ListView listViewInputFiles; + private System.Windows.Forms.ColumnHeader columnHeaderFileName; + private System.Windows.Forms.Label labelChooseLanguage; + private System.Windows.Forms.ComboBox comboBoxLanguages; + } +} \ No newline at end of file diff --git a/src/ui/Forms/AudioToText/WhisperAudioToTextSelectedLines.cs b/src/ui/Forms/AudioToText/WhisperAudioToTextSelectedLines.cs new file mode 100644 index 000000000..7731222ff --- /dev/null +++ b/src/ui/Forms/AudioToText/WhisperAudioToTextSelectedLines.cs @@ -0,0 +1,446 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; +using System.Windows.Forms; +using Nikse.SubtitleEdit.Core.AudioToText; +using Nikse.SubtitleEdit.Core.Common; +using Nikse.SubtitleEdit.Logic; + +namespace Nikse.SubtitleEdit.Forms.AudioToText +{ + public sealed partial class WhisperAudioToTextSelectedLines : Form + { + private bool _cancel; + private int _batchFileNumber; + private readonly List _audioClips; + private readonly Form _parentForm; + private readonly Regex _timeRegex = new Regex(@"^\[\d\d:\d\d[\.,]\d\d\d --> \d\d:\d\d[\.,]\d\d\d\]", RegexOptions.Compiled); + private List _resultList; + private string _languageCode; + + public Subtitle TranscribedSubtitle { get; private set; } + + public WhisperAudioToTextSelectedLines(List audioClips, Form parentForm) + { + UiUtil.PreInitialize(this); + InitializeComponent(); + UiUtil.FixFonts(this); + UiUtil.FixLargeFonts(this, buttonGenerate); + _parentForm = parentForm; + + Text = LanguageSettings.Current.AudioToText.Title; + labelInfo.Text = LanguageSettings.Current.AudioToText.WhisperInfo; + groupBoxModels.Text = LanguageSettings.Current.AudioToText.LanguagesAndModels; + labelModel.Text = LanguageSettings.Current.AudioToText.ChooseModel; + linkLabelOpenModelsFolder.Text = LanguageSettings.Current.AudioToText.OpenModelsFolder; + checkBoxUsePostProcessing.Text = LanguageSettings.Current.AudioToText.UsePostProcessing; + buttonGenerate.Text = LanguageSettings.Current.Watermark.Generate; + buttonCancel.Text = LanguageSettings.Current.General.Cancel; + groupBoxInputFiles.Text = LanguageSettings.Current.BatchConvert.Input; + linkLabeWhisperWebSite.Text = LanguageSettings.Current.AudioToText.WhisperWebsite; + + columnHeaderFileName.Text = LanguageSettings.Current.JoinSubtitles.FileName; + + checkBoxUsePostProcessing.Checked = Configuration.Settings.Tools.VoskPostProcessing; + + comboBoxLanguages.Items.Clear(); + comboBoxLanguages.Items.AddRange(WhisperLanguage.Languages.ToArray()); + var lang = WhisperLanguage.Languages.FirstOrDefault(p => p.Code == Configuration.Settings.Tools.WhisperLanguageCode); + if (lang != null) + { + comboBoxLanguages.Text = lang.ToString(); + } + else + { + comboBoxLanguages.Text = "English"; + } + + WhisperAudioToText.FillModels(comboBoxModels, string.Empty); + + textBoxLog.Visible = false; + textBoxLog.Dock = DockStyle.Fill; + labelProgress.Text = string.Empty; + labelTime.Text = string.Empty; + listViewInputFiles.Visible = true; + _audioClips = audioClips; + progressBar1.Maximum = 100; + foreach (var audioClip in audioClips) + { + listViewInputFiles.Items.Add(audioClip.AudioFileName); + } + } + + private void ButtonGenerate_Click(object sender, EventArgs e) + { + if (comboBoxModels.Items.Count == 0) + { + buttonDownload_Click(null, null); + return; + } + + if (listViewInputFiles.Items.Count == 0) + { + return; + } + + GenerateBatch(); + TaskbarList.SetProgressState(_parentForm.Handle, TaskbarButtonProgressFlags.NoProgress); + } + + private void ShowProgressBar() + { + progressBar1.Maximum = 100; + progressBar1.Value = 0; + progressBar1.Visible = true; + progressBar1.BringToFront(); + progressBar1.Refresh(); + progressBar1.Top = labelProgress.Bottom + 3; + } + + private void GenerateBatch() + { + _languageCode = GetLanguage(comboBoxLanguages.Text); + groupBoxInputFiles.Enabled = false; + comboBoxLanguages.Enabled = false; + comboBoxModels.Enabled = false; + _batchFileNumber = 0; + var postProcessor = new AudioToTextPostProcessor(GetLanguage(comboBoxModels.Text)) + { + ParagraphMaxChars = Configuration.Settings.General.SubtitleLineMaximumLength * 2, + }; + textBoxLog.AppendText("Batch mode" + Environment.NewLine); + foreach (ListViewItem lvi in listViewInputFiles.Items) + { + _batchFileNumber++; + var videoFileName = lvi.Text; + listViewInputFiles.SelectedIndices.Clear(); + lvi.Selected = true; + ShowProgressBar(); + buttonGenerate.Enabled = false; + buttonDownload.Enabled = false; + comboBoxModels.Enabled = false; + comboBoxLanguages.Enabled = false; + var waveFileName = videoFileName; + + textBoxLog.AppendText("Wav file name: " + waveFileName + Environment.NewLine); + progressBar1.Style = ProgressBarStyle.Blocks; + var transcript = TranscribeViaWhisper(waveFileName); + if (_cancel) + { + TaskbarList.SetProgressState(_parentForm.Handle, TaskbarButtonProgressFlags.NoProgress); + groupBoxInputFiles.Enabled = true; + return; + } + + TranscribedSubtitle = postProcessor.Generate(transcript, checkBoxUsePostProcessing.Checked, true, true, true, true); + + SaveToAudioClip(_batchFileNumber - 1); + TaskbarList.SetProgressValue(_parentForm.Handle, _batchFileNumber, listViewInputFiles.Items.Count); + } + + progressBar1.Value = 100; + labelTime.Text = string.Empty; + PostFix(postProcessor); + DialogResult = DialogResult.OK; + } + + public List TranscribeViaWhisper(string waveFileName) + { + var model = comboBoxModels.Items[comboBoxModels.SelectedIndex] as WhisperModel; + if (model == null) + { + return new List(); + } + + labelProgress.Text = LanguageSettings.Current.AudioToText.Transcribing; + labelProgress.Text = string.Format(LanguageSettings.Current.AudioToText.TranscribingXOfY, _batchFileNumber, listViewInputFiles.Items.Count); + + labelProgress.Refresh(); + Application.DoEvents(); + _resultList = new List(); + var process = GetWhisperProcess(waveFileName, model.Name, comboBoxLanguages.Text, OutputHandler); + ShowProgressBar(); + progressBar1.Style = ProgressBarStyle.Marquee; + buttonCancel.Visible = true; + try + { + process.PriorityClass = ProcessPriorityClass.Normal; + } + catch + { + // ignored + } + + _cancel = false; + + labelProgress.Text = LanguageSettings.Current.AudioToText.Transcribing; + while (!process.HasExited) + { + Application.DoEvents(); + System.Threading.Thread.Sleep(100); + + Refresh(); + if (_cancel) + { + process.Kill(); + progressBar1.Visible = false; + buttonCancel.Visible = false; + DialogResult = DialogResult.Cancel; + return null; + } + } + + Application.DoEvents(); + System.Threading.Thread.Sleep(100); + + return _resultList; + } + + private void OutputHandler(object sendingProcess, DataReceivedEventArgs outLine) + { + if (string.IsNullOrWhiteSpace(outLine.Data)) + { + return; + } + + foreach (var line in outLine.Data.SplitToLines()) + { + if (_timeRegex.IsMatch(line)) + { + var start = line.Substring(1, 10); + var end = line.Substring(14, 10); + var text = line.Remove(0, 25).Trim(); + var rt = new ResultText + { + Start = GetSeconds(start), + End = GetSeconds(end), + Text = Utilities.AutoBreakLine(text, _languageCode), + }; + + _resultList.Add(rt); + } + } + } + + private static decimal GetSeconds(string timeCode) + { + return (decimal)(TimeCode.ParseToMilliseconds(timeCode) / 1000.0); + } + + private void PostFix(AudioToTextPostProcessor postProcessor) + { + var postSub = new Subtitle(); + foreach (var audioClip in _audioClips) + { + postSub.Paragraphs.Add(audioClip.Paragraph); + } + + var postSubFixed = postProcessor.Generate(postSub, checkBoxUsePostProcessing.Checked, true, false, true, false); + for (var index = 0; index < _audioClips.Count; index++) + { + var audioClip = _audioClips[index]; + if (index < postSubFixed.Paragraphs.Count) + { + audioClip.Paragraph.Text = postSubFixed.Paragraphs[index].Text; + } + } + } + + private void SaveToAudioClip(int index) + { + var audioClip = _audioClips[index]; + + var sb = new StringBuilder(); + foreach (var p in TranscribedSubtitle.Paragraphs) + { + sb.AppendLine(p.Text); + } + + audioClip.Paragraph.Text = sb.ToString().Trim(); + + try + { + File.Delete(audioClip.AudioFileName); + } + catch + { + // ignore + } + } + + internal static string GetLanguage(string text) + { + var languageCodeList = VoskModel.Models.Select(p => p.TwoLetterLanguageCode); + foreach (var languageCode in languageCodeList) + { + if (text.Contains("model-" + languageCode) || text.Contains("model-small-" + languageCode) || text.StartsWith(languageCode, StringComparison.OrdinalIgnoreCase)) + { + return languageCode; + } + + if (languageCode == "jp" && (text.Contains("model-ja") || text.Contains("model-small-ja"))) + { + return languageCode; + } + } + + return "en"; + } + + private void buttonCancel_Click(object sender, EventArgs e) + { + if (buttonGenerate.Enabled) + { + DialogResult = DialogResult.Cancel; + } + else + { + _cancel = true; + } + } + + + private Process GetWhisperProcess(string waveFileName, string model, string language, DataReceivedEventHandler dataReceivedHandler = null) + { + //TODO: some check! + //if (!File.Exists(Configuration.Settings.General.FFmpegLocation) && Configuration.IsRunningOnWindows) + //{ + // return null; + //} + + // whisper --model tiny.en --language English --fp16 False a.wav + var parameters = $"--model {model} --language \"{language}\" --fp16 False \"{waveFileName}\""; + var process = new Process { StartInfo = new ProcessStartInfo("whisper", parameters) { WindowStyle = ProcessWindowStyle.Hidden, CreateNoWindow = true } }; + + textBoxLog.AppendText("Calling whisper with : whisper " + parameters + Environment.NewLine); + + + if (dataReceivedHandler != null) + { + process.StartInfo.UseShellExecute = false; + process.StartInfo.RedirectStandardOutput = true; + process.StartInfo.RedirectStandardError = true; + process.OutputDataReceived += dataReceivedHandler; + process.ErrorDataReceived += dataReceivedHandler; + } + + process.Start(); + + if (dataReceivedHandler != null) + { + process.BeginOutputReadLine(); + process.BeginErrorReadLine(); + } + + return process; + } + + private void linkLabelWhisperWebsite_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e) + { + UiUtil.OpenUrl("https://github.com/openai/whisper"); + } + + private void AudioToText_FormClosing(object sender, FormClosingEventArgs e) + { + if (comboBoxModels.SelectedItem is WhisperModel model) + { + Configuration.Settings.Tools.WhisperModel = model.Name; + } + + if (comboBoxLanguages.SelectedItem is WhisperLanguage language) + { + Configuration.Settings.Tools.WhisperLanguageCode = language.Code; + } + + Configuration.Settings.Tools.VoskPostProcessing = checkBoxUsePostProcessing.Checked; + } + + private void AudioToText_KeyDown(object sender, KeyEventArgs e) + { + if (e.KeyCode == Keys.F2) + { + if (textBoxLog.Visible) + { + textBoxLog.Visible = false; + } + else + { + textBoxLog.Visible = true; + textBoxLog.BringToFront(); + } + + e.SuppressKeyPress = true; + } + else if (e.KeyCode == Keys.Escape && buttonGenerate.Enabled) + { + DialogResult = DialogResult.Cancel; + e.SuppressKeyPress = true; + } + else if (e.KeyData == UiUtil.HelpKeys) + { + linkLabelWhisperWebsite_LinkClicked(null, null); + e.SuppressKeyPress = true; + } + } + + private void linkLabelOpenModelFolder_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e) + { + UiUtil.OpenFolder(WhisperModel.ModelFolder); + } + + private void timer1_Tick(object sender, EventArgs e) + { + } + + public static string ToProgressTime(float estimatedTotalMs) + { + var timeCode = new TimeCode(estimatedTotalMs); + if (timeCode.TotalSeconds < 60) + { + return string.Format(LanguageSettings.Current.GenerateVideoWithBurnedInSubs.TimeRemainingSeconds, (int)Math.Round(timeCode.TotalSeconds)); + } + + if (timeCode.TotalSeconds / 60 > 5) + { + return string.Format(LanguageSettings.Current.GenerateVideoWithBurnedInSubs.TimeRemainingMinutes, (int)Math.Round(timeCode.TotalSeconds / 60)); + } + + return string.Format(LanguageSettings.Current.GenerateVideoWithBurnedInSubs.TimeRemainingMinutesAndSeconds, timeCode.Minutes + timeCode.Hours * 60, timeCode.Seconds); + } + + private void buttonDownload_Click(object sender, EventArgs e) + { + using (var form = new WhisperModelDownload { AutoClose = true }) + { + form.ShowDialog(this); + VoskAudioToText.FillModels(comboBoxModels, form.LastDownloadedModel); + } + } + + private void ShowHideBatchMode() + { + Height = checkBoxUsePostProcessing.Bottom + progressBar1.Height + buttonCancel.Height + 450; + listViewInputFiles.Visible = true; + } + + private void AudioToText_Load(object sender, EventArgs e) + { + ShowHideBatchMode(); + listViewInputFiles.Columns[0].Width = -2; + } + + private void AudioToTextSelectedLines_Shown(object sender, EventArgs e) + { + buttonGenerate.Focus(); + } + + private void AudioToTextSelectedLines_ResizeEnd(object sender, EventArgs e) + { + listViewInputFiles.AutoSizeLastColumn(); + } + } +} diff --git a/src/ui/Forms/AudioToText/WhisperAudioToTextSelectedLines.resx b/src/ui/Forms/AudioToText/WhisperAudioToTextSelectedLines.resx new file mode 100644 index 000000000..1f666f268 --- /dev/null +++ b/src/ui/Forms/AudioToText/WhisperAudioToTextSelectedLines.resx @@ -0,0 +1,123 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + 17, 17 + + \ No newline at end of file diff --git a/src/ui/Forms/SpeechRecognition/WhisperModelDownload.Designer.cs b/src/ui/Forms/AudioToText/WhisperModelDownload.Designer.cs similarity index 99% rename from src/ui/Forms/SpeechRecognition/WhisperModelDownload.Designer.cs rename to src/ui/Forms/AudioToText/WhisperModelDownload.Designer.cs index 6d9dac8cc..37daefb15 100644 --- a/src/ui/Forms/SpeechRecognition/WhisperModelDownload.Designer.cs +++ b/src/ui/Forms/AudioToText/WhisperModelDownload.Designer.cs @@ -1,4 +1,4 @@ -namespace Nikse.SubtitleEdit.Forms.SpeechRecognition +namespace Nikse.SubtitleEdit.Forms.AudioToText { sealed partial class WhisperModelDownload { diff --git a/src/ui/Forms/SpeechRecognition/WhisperModelDownload.cs b/src/ui/Forms/AudioToText/WhisperModelDownload.cs similarity index 97% rename from src/ui/Forms/SpeechRecognition/WhisperModelDownload.cs rename to src/ui/Forms/AudioToText/WhisperModelDownload.cs index c0f7c8611..5455c65e1 100644 --- a/src/ui/Forms/SpeechRecognition/WhisperModelDownload.cs +++ b/src/ui/Forms/AudioToText/WhisperModelDownload.cs @@ -1,13 +1,13 @@ -using Nikse.SubtitleEdit.Core.AudioToText; -using Nikse.SubtitleEdit.Core.Common; -using Nikse.SubtitleEdit.Logic; -using System; +using System; using System.IO; using System.Linq; using System.Threading; using System.Windows.Forms; +using Nikse.SubtitleEdit.Core.AudioToText; +using Nikse.SubtitleEdit.Core.Common; +using Nikse.SubtitleEdit.Logic; -namespace Nikse.SubtitleEdit.Forms.SpeechRecognition +namespace Nikse.SubtitleEdit.Forms.AudioToText { public sealed partial class WhisperModelDownload : Form { diff --git a/src/ui/Forms/SpeechRecognition/WhisperModelDownload.resx b/src/ui/Forms/AudioToText/WhisperModelDownload.resx similarity index 100% rename from src/ui/Forms/SpeechRecognition/WhisperModelDownload.resx rename to src/ui/Forms/AudioToText/WhisperModelDownload.resx diff --git a/src/ui/Forms/Main.cs b/src/ui/Forms/Main.cs index 821f37599..2f08faa8d 100644 --- a/src/ui/Forms/Main.cs +++ b/src/ui/Forms/Main.cs @@ -43,7 +43,7 @@ using System.Text.RegularExpressions; using System.Threading.Tasks; using System.Windows.Forms; using Nikse.SubtitleEdit.Core.AudioToText; -using Nikse.SubtitleEdit.Forms.SpeechRecognition; +using Nikse.SubtitleEdit.Forms.AudioToText; namespace Nikse.SubtitleEdit.Forms { @@ -203,7 +203,7 @@ namespace Nikse.SubtitleEdit.Forms private ListBox _intellisenceList; private ListBox _intellisenceListOriginal; private bool _updateSelectedCountStatusBar; - private Dictate _dictateForm; + private VoskDictate _dictateForm; private object _dictateTextBox; private bool _hasCurrentVosk; @@ -1744,7 +1744,8 @@ namespace Nikse.SubtitleEdit.Forms openSecondSubtitleToolStripMenuItem.Text = _language.Menu.Video.OpenSecondSubtitle; generateBlankVideoToolStripMenuItem.Text = _language.Menu.Video.GenerateBlankVideo; generateVideoWithHardcodedSubtitleToolStripMenuItem.Text = _language.Menu.Video.GenerateVideoWithBurnedInSub; - videoaudioToTextToolStripMenuItem.Text = _language.Menu.Video.VideoAudioToText; + videoaudioToTextToolStripMenuItem.Text = string.Format(_language.Menu.Video.VideoAudioToTextX, "Vosk/Kaldi"); + audioToTextWhisperTolStripMenuItem.Text = string.Format(_language.Menu.Video.VideoAudioToTextX, "Whisper"); smpteTimeModedropFrameToolStripMenuItem.Text = _language.Menu.Video.SmptTimeMode; toolStripMenuItemImportChapters.Text = _language.Menu.Video.ImportChaptersFromVideo; @@ -8781,11 +8782,14 @@ namespace Nikse.SubtitleEdit.Forms toolStripMenuItemSelectedLines.DropDownItems.Insert(0, audio); var audioClip = new ToolStripMenuItem(LanguageSettings.Current.Main.Menu.ContextMenu.ExtractAudio); UiUtil.FixFonts(audioClip); - var audioToText = new ToolStripMenuItem(LanguageSettings.Current.Main.Menu.Video.VideoAudioToText); - UiUtil.FixFonts(audioToText); + var audioToTextWhisper = new ToolStripMenuItem(string.Format(LanguageSettings.Current.Main.Menu.Video.VideoAudioToTextX, "Whisper")); + UiUtil.FixFonts(audioToTextWhisper); + var audioToTextVosk = new ToolStripMenuItem(string.Format(LanguageSettings.Current.Main.Menu.Video.VideoAudioToTextX, "Vosk/Kaldi")); + UiUtil.FixFonts(audioToTextVosk); audio.DropDownItems.Insert(0, audioClip); + audio.DropDownItems.Insert(0, audioToTextWhisper); + audio.DropDownItems.Insert(0, audioToTextVosk); - audio.DropDownItems.Insert(0, audioToText); audioClip.Click += (senderNew, eNew) => { if (!RequireFfmpegOk()) @@ -8797,7 +8801,7 @@ namespace Nikse.SubtitleEdit.Forms UiUtil.OpenFolder(Path.GetDirectoryName(audioClips[0].AudioFileName)); }; - audioToText.Click += (senderNew, eNew) => + audioToTextWhisper.Click += (senderNew, eNew) => { if (!RequireFfmpegOk()) { @@ -8805,11 +8809,41 @@ namespace Nikse.SubtitleEdit.Forms } var audioClips = GetAudioClips(); - using (var form = new AudioToTextSelectedLines(audioClips, this)) + using (var form = new WhisperAudioToTextSelectedLines(audioClips, this)) { if (form.ShowDialog(this) == DialogResult.OK) { - MakeHistoryForUndo(string.Format(_language.BeforeX, LanguageSettings.Current.Main.Menu.Video.VideoAudioToText)); + MakeHistoryForUndo(string.Format(_language.BeforeX, string.Format(LanguageSettings.Current.Main.Menu.Video.VideoAudioToTextX, "Whisper"))); + SubtitleListview1.BeginUpdate(); + foreach (var ac in audioClips) + { + var p = _subtitle.Paragraphs.FirstOrDefault(pa => pa.Id == ac.Paragraph.Id); + if (p != null) + { + p.Text = ac.Paragraph.Text; + var idx = _subtitle.Paragraphs.IndexOf(p); + SubtitleListview1.SetText(idx, p.Text); + } + } + SubtitleListview1.EndUpdate(); + RefreshSelectedParagraph(); + } + } + }; + + audioToTextVosk.Click += (senderNew, eNew) => + { + if (!RequireFfmpegOk()) + { + return; + } + + var audioClips = GetAudioClips(); + using (var form = new VoskAudioToTextSelectedLines(audioClips, this)) + { + if (form.ShowDialog(this) == DialogResult.OK) + { + MakeHistoryForUndo(string.Format(_language.BeforeX, string.Format(LanguageSettings.Current.Main.Menu.Video.VideoAudioToTextX, "Vosk/Kaldi"))); SubtitleListview1.BeginUpdate(); foreach (var ac in audioClips) { @@ -10748,7 +10782,7 @@ namespace Nikse.SubtitleEdit.Forms if (_dictateForm == null || string.IsNullOrEmpty(Configuration.Settings.Tools.VoskModel)) { _dictateForm?.Dispose(); - _dictateForm = new Dictate(); + _dictateForm = new VoskDictate(); if (_dictateForm.ShowDialog(this) != DialogResult.OK) { return; @@ -22473,7 +22507,7 @@ namespace Nikse.SubtitleEdit.Forms audioVisualizer.Invalidate(); } - if (_dictateForm != null && Dictate.RecordingOn) + if (_dictateForm != null && VoskDictate.RecordingOn) { pictureBoxRecord.Invalidate(); } @@ -24495,10 +24529,10 @@ namespace Nikse.SubtitleEdit.Forms private void PictureBoxRecord_Paint(object sender, PaintEventArgs e) { - if (_dictateForm != null && Dictate.RecordingOn) + if (_dictateForm != null && VoskDictate.RecordingOn) { - var pct = Dictate.RecordingVolumePercent; - var len = pictureBoxRecord.Height - (int)Math.Round(Dictate.RecordingVolumePercent * pictureBoxRecord.Height / 100.0); + var pct = VoskDictate.RecordingVolumePercent; + var len = pictureBoxRecord.Height - (int)Math.Round(VoskDictate.RecordingVolumePercent * pictureBoxRecord.Height / 100.0); using (var pen = new Pen(Color.DodgerBlue, 5)) { e.Graphics.DrawLine(pen, pictureBoxRecord.Width - 6, pictureBoxRecord.Height - 1, pictureBoxRecord.Width - 6, len); @@ -34406,7 +34440,7 @@ namespace Nikse.SubtitleEdit.Forms CloseVideoToolStripMenuItemClick(sender, e); } - using (var form = new AudioToText(oldVideoFileName, _videoAudioTrackNumber, this)) + using (var form = new VoskAudioToText(oldVideoFileName, _videoAudioTrackNumber, this)) { var result = form.ShowDialog(this); @@ -34577,7 +34611,7 @@ namespace Nikse.SubtitleEdit.Forms CloseVideoToolStripMenuItemClick(sender, e); } - using (var form = new AudioToTextWhisper(oldVideoFileName, _videoAudioTrackNumber, this)) + using (var form = new WhisperAudioToText(oldVideoFileName, _videoAudioTrackNumber, this)) { var result = form.ShowDialog(this); diff --git a/src/ui/Logic/Language.cs b/src/ui/Logic/Language.cs index bf042d658..8e7ce3315 100644 --- a/src/ui/Logic/Language.cs +++ b/src/ui/Logic/Language.cs @@ -1813,7 +1813,7 @@ namespace Nikse.SubtitleEdit.Logic GenerateTextFromVideo = "Generate text from video...", GenerateBlankVideo = "Generate blank video...", GenerateVideoWithBurnedInSub = "Generate video with burned-in sub...", - VideoAudioToText = "Audio to text...", + VideoAudioToTextX = "Audio to text ({0})...", ImportChaptersFromVideo = "Import chapters from video", GenerateImportShotChanges = "Generate/import shot changes...", RemoveOrExportShotChanges = "Remove/export shot changes...", diff --git a/src/ui/Logic/LanguageDeserializer.cs b/src/ui/Logic/LanguageDeserializer.cs index 6e111593c..c34d9b4d6 100644 --- a/src/ui/Logic/LanguageDeserializer.cs +++ b/src/ui/Logic/LanguageDeserializer.cs @@ -475,15 +475,27 @@ namespace Nikse.SubtitleEdit.Logic case "AudioToText/Info": language.AudioToText.Info = reader.Value; break; + case "AudioToText/WhisperInfo": + language.AudioToText.WhisperInfo = reader.Value; + break; case "AudioToText/VoskWebsite": language.AudioToText.VoskWebsite = reader.Value; break; + case "AudioToText/WhisperWebsite": + language.AudioToText.WhisperWebsite = reader.Value; + break; case "AudioToText/Models": language.AudioToText.Models = reader.Value; break; + case "AudioToText/LanguagesAndModels": + language.AudioToText.LanguagesAndModels = reader.Value; + break; case "AudioToText/ChooseModel": language.AudioToText.ChooseModel = reader.Value; break; + case "AudioToText/ChooseLanguage": + language.AudioToText.ChooseLanguage = reader.Value; + break; case "AudioToText/OpenModelsFolder": language.AudioToText.OpenModelsFolder = reader.Value; break; @@ -505,6 +517,9 @@ namespace Nikse.SubtitleEdit.Logic case "AudioToText/BatchMode": language.AudioToText.BatchMode = reader.Value; break; + case "AudioToText/KeepPartialTranscription": + language.AudioToText.KeepPartialTranscription = reader.Value; + break; case "AssaAttachments/Title": language.AssaAttachments.Title = reader.Value; break; @@ -4207,8 +4222,8 @@ namespace Nikse.SubtitleEdit.Logic case "Main/Menu/Video/GenerateVideoWithBurnedInSub": language.Main.Menu.Video.GenerateVideoWithBurnedInSub = reader.Value; break; - case "Main/Menu/Video/VideoAudioToText": - language.Main.Menu.Video.VideoAudioToText = reader.Value; + case "Main/Menu/Video/VideoAudioToTextX": + language.Main.Menu.Video.VideoAudioToTextX = reader.Value; break; case "Main/Menu/Video/ImportChaptersFromVideo": language.Main.Menu.Video.ImportChaptersFromVideo = reader.Value; diff --git a/src/ui/Logic/LanguageStructure.cs b/src/ui/Logic/LanguageStructure.cs index f5d6b3fbb..f18faba87 100644 --- a/src/ui/Logic/LanguageStructure.cs +++ b/src/ui/Logic/LanguageStructure.cs @@ -1659,7 +1659,7 @@ public string GenerateTextFromVideo { get; set; } public string GenerateBlankVideo { get; set; } public string GenerateVideoWithBurnedInSub { get; set; } - public string VideoAudioToText { get; set; } + public string VideoAudioToTextX { get; set; } public string ImportChaptersFromVideo { get; set; } public string GenerateImportShotChanges { get; set; } public string RemoveOrExportShotChanges { get; set; } diff --git a/src/ui/SubtitleEdit.csproj b/src/ui/SubtitleEdit.csproj index 4500accbc..644711a61 100644 --- a/src/ui/SubtitleEdit.csproj +++ b/src/ui/SubtitleEdit.csproj @@ -207,23 +207,29 @@ AudioClipsGet.cs - + Form - + + WhisperAudioToTextSelectedLines.cs + + + Form + + WhisperModelDownload.cs - + Form - - AudioToTextSelectedLines.cs + + VoskAudioToTextSelectedLines.cs - + Form - - AudioToText.cs + + VoskAudioToText.cs Form @@ -285,11 +291,11 @@ AddWaveformBatch.cs - + Form - - AudioToTextModelDownload.cs + + VoskModelDownload.cs Form @@ -297,17 +303,17 @@ ConvertColorsToDialog.cs - + Form - - AudioToTextWhisper.cs + + WhisperAudioToText.cs - + Form - - Dictate.cs + + VoskDictate.cs Form @@ -1489,14 +1495,17 @@ AudioClipsGet.cs - + + WhisperAudioToTextSelectedLines.cs + + WhisperModelDownload.cs - - AudioToTextSelectedLines.cs + + VoskAudioToTextSelectedLines.cs - - AudioToText.cs + + VoskAudioToText.cs BatchConvertMkvEnding.cs @@ -1528,17 +1537,17 @@ AddWaveformBatch.cs - - AudioToTextModelDownload.cs + + VoskModelDownload.cs ConvertColorsToDialog.cs - - AudioToTextWhisper.cs + + WhisperAudioToText.cs - - Dictate.cs + + VoskDictate.cs DownloadVosk.cs