Refact audio-to-text folder structure

This commit is contained in:
niksedk 2022-10-03 18:54:55 +02:00
parent 71eb9691f4
commit 07c966d19b
27 changed files with 1060 additions and 103 deletions

View File

@ -176,9 +176,13 @@ Note: Do check free disk space.</WaveFileMalformed>
<AudioToText>
<Title>Audio to text</Title>
<Info>Generate text from audio via Vosk/Kaldi speech recognition</Info>
<WhisperInfo>Generate text from audio via Whisper speech recognition</WhisperInfo>
<VoskWebsite>Vosk website</VoskWebsite>
<WhisperWebsite>Whisper website</WhisperWebsite>
<Models>Models</Models>
<LanguagesAndModels>Languages and models</LanguagesAndModels>
<ChooseModel>Choose model</ChooseModel>
<ChooseLanguage>Choose model</ChooseLanguage>
<OpenModelsFolder>Open models folder</OpenModelsFolder>
<LoadingVoskModel>Loading Vosk speech recognition model...</LoadingVoskModel>
<Transcribing>Transcribing audio to text...</Transcribing>
@ -186,6 +190,7 @@ Note: Do check free disk space.</WaveFileMalformed>
<XFilesSavedToVideoSourceFolder>{0} files saved to video source folder</XFilesSavedToVideoSourceFolder>
<UsePostProcessing>Use post-processing (line merge, fix casing, punctuation, and more)</UsePostProcessing>
<BatchMode>Batch mode</BatchMode>
<KeepPartialTranscription>Keep partial transcription</KeepPartialTranscription>
</AudioToText>
<AssaAttachments>
<Title>Advanced Sub Station Alpha attachments</Title>
@ -1265,7 +1270,7 @@ To use an API key, go to "Options -&gt; Settings -&gt; Tools" to enter your Goog
<GenerateTextFromVideo>Generate text from video...</GenerateTextFromVideo>
<GenerateBlankVideo>Generate blank video...</GenerateBlankVideo>
<GenerateVideoWithBurnedInSub>Generate video with burned-in sub...</GenerateVideoWithBurnedInSub>
<VideoAudioToText>Audio to text...</VideoAudioToText>
<VideoAudioToTextX>Audio to text ({0})...</VideoAudioToTextX>
<ImportChaptersFromVideo>Import chapters from video</ImportChaptersFromVideo>
<GenerateImportShotChanges>Generate/import shot changes...</GenerateImportShotChanges>
<RemoveOrExportShotChanges>Remove/export shot changes...</RemoveOrExportShotChanges>

View File

@ -1,6 +1,6 @@
namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
namespace Nikse.SubtitleEdit.Forms.AudioToText
{
sealed partial class AudioToText
sealed partial class VoskAudioToText
{
/// <summary>
/// Required designer variable.

View File

@ -13,9 +13,9 @@ using Nikse.SubtitleEdit.Core.SubtitleFormats;
using Nikse.SubtitleEdit.Logic;
using Vosk;
namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
namespace Nikse.SubtitleEdit.Forms.AudioToText
{
public sealed partial class AudioToText : Form
public sealed partial class VoskAudioToText : Form
{
private readonly string _videoFileName;
private readonly int _audioTrackNumber;
@ -34,7 +34,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
public Subtitle TranscribedSubtitle { get; private set; }
public AudioToText(string videoFileName, int audioTrackNumber, Form parentForm)
public VoskAudioToText(string videoFileName, int audioTrackNumber, Form parentForm)
{
UiUtil.PreInitialize(this);
InitializeComponent();
@ -611,7 +611,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
private void buttonDownload_Click(object sender, EventArgs e)
{
using (var form = new AudioToTextModelDownload { AutoClose = true })
using (var form = new VoskModelDownload { AutoClose = true })
{
form.ShowDialog(this);
FillModels(comboBoxModels, form.LastDownloadedModel);

View File

@ -1,6 +1,6 @@
namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
namespace Nikse.SubtitleEdit.Forms.AudioToText
{
sealed partial class AudioToTextSelectedLines
sealed partial class VoskAudioToTextSelectedLines
{
/// <summary>
/// Required designer variable.

View File

@ -1,6 +1,5 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
@ -10,9 +9,9 @@ using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Logic;
using Vosk;
namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
namespace Nikse.SubtitleEdit.Forms.AudioToText
{
public sealed partial class AudioToTextSelectedLines : Form
public sealed partial class VoskAudioToTextSelectedLines : Form
{
private readonly string _voskFolder;
private bool _cancel;
@ -26,7 +25,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
public Subtitle TranscribedSubtitle { get; private set; }
public AudioToTextSelectedLines(List<AudioClipsGet.AudioClip> audioClips, Form parentForm)
public VoskAudioToTextSelectedLines(List<AudioClipsGet.AudioClip> audioClips, Form parentForm)
{
UiUtil.PreInitialize(this);
InitializeComponent();
@ -48,7 +47,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
checkBoxUsePostProcessing.Checked = Configuration.Settings.Tools.VoskPostProcessing;
_voskFolder = Path.Combine(Configuration.DataDirectory, "Vosk");
AudioToText.FillModels(comboBoxModels, string.Empty);
VoskAudioToText.FillModels(comboBoxModels, string.Empty);
textBoxLog.Visible = false;
textBoxLog.Dock = DockStyle.Fill;
@ -232,7 +231,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
if (rec.AcceptWaveform(buffer, bytesRead))
{
var res = rec.Result();
var results = AudioToText.ParseJsonToResult(res);
var results = VoskAudioToText.ParseJsonToResult(res);
list.AddRange(results);
}
else
@ -250,7 +249,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
}
var finalResult = rec.FinalResult();
var finalResults = AudioToText.ParseJsonToResult(finalResult);
var finalResults = VoskAudioToText.ParseJsonToResult(finalResult);
list.AddRange(finalResults);
timer1.Stop();
return list;
@ -344,10 +343,10 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
private void buttonDownload_Click(object sender, EventArgs e)
{
using (var form = new AudioToTextModelDownload { AutoClose = true })
using (var form = new VoskModelDownload { AutoClose = true })
{
form.ShowDialog(this);
AudioToText.FillModels(comboBoxModels, form.LastDownloadedModel);
VoskAudioToText.FillModels(comboBoxModels, form.LastDownloadedModel);
}
}

View File

@ -1,6 +1,6 @@
namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
namespace Nikse.SubtitleEdit.Forms.AudioToText
{
partial class Dictate
partial class VoskDictate
{
/// <summary>
/// Required designer variable.

View File

@ -1,17 +1,17 @@
using NAudio.Wave;
using Nikse.SubtitleEdit.Core.AudioToText;
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Logic;
using System;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Windows.Forms;
using NAudio.Wave;
using Nikse.SubtitleEdit.Core.AudioToText;
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Logic;
using Vosk;
namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
namespace Nikse.SubtitleEdit.Forms.AudioToText
{
public partial class Dictate : Form
public partial class VoskDictate : Form
{
private static WaveFileWriter _waveFile;
private static Model _model;
@ -21,7 +21,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
public static bool RecordingOn { get; set; }
public static double RecordingVolumePercent { get; set; }
public Dictate()
public VoskDictate()
{
UiUtil.PreInitialize(this);
InitializeComponent();
@ -33,7 +33,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
buttonOK.Text = LanguageSettings.Current.General.Ok;
buttonCancel.Text = LanguageSettings.Current.General.Cancel;
UiUtil.FixLargeFonts(this, buttonOK);
AudioToText.FillModels(comboBoxModels, string.Empty);
VoskAudioToText.FillModels(comboBoxModels, string.Empty);
checkBoxUsePostProcessing.Checked = Configuration.Settings.Tools.VoskPostProcessing;
}
@ -82,7 +82,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
if (rec.AcceptWaveform(buffer, bytesRead))
{
var res = rec.Result();
var results = AudioToText.ParseJsonToResult(res);
var results = VoskAudioToText.ParseJsonToResult(res);
list.AddRange(results);
}
else
@ -93,7 +93,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
}
var finalResult = rec.FinalResult();
var finalResults = AudioToText.ParseJsonToResult(finalResult);
var finalResults = VoskAudioToText.ParseJsonToResult(finalResult);
list.AddRange(finalResults);
try
@ -175,10 +175,10 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
private void buttonDownload_Click(object sender, EventArgs e)
{
using (var form = new AudioToTextModelDownload { AutoClose = true })
using (var form = new VoskModelDownload { AutoClose = true })
{
form.ShowDialog(this);
AudioToText.FillModels(comboBoxModels, form.LastDownloadedModel);
VoskAudioToText.FillModels(comboBoxModels, form.LastDownloadedModel);
}
}

View File

@ -1,6 +1,6 @@
namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
namespace Nikse.SubtitleEdit.Forms.AudioToText
{
sealed partial class AudioToTextModelDownload
sealed partial class VoskModelDownload
{
/// <summary>
/// Required designer variable.

View File

@ -7,15 +7,15 @@ using Nikse.SubtitleEdit.Core.AudioToText;
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Logic;
namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
namespace Nikse.SubtitleEdit.Forms.AudioToText
{
public sealed partial class AudioToTextModelDownload : Form
public sealed partial class VoskModelDownload : Form
{
public bool AutoClose { get; internal set; }
public string LastDownloadedModel { get; internal set; }
private readonly CancellationTokenSource _cancellationTokenSource;
public AudioToTextModelDownload()
public VoskModelDownload()
{
UiUtil.PreInitialize(this);
InitializeComponent();

View File

@ -1,6 +1,6 @@
namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
namespace Nikse.SubtitleEdit.Forms.AudioToText
{
sealed partial class AudioToTextWhisper
sealed partial class WhisperAudioToText
{
/// <summary>
/// Required designer variable.

View File

@ -1,8 +1,4 @@
using Nikse.SubtitleEdit.Core.AudioToText;
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Core.SubtitleFormats;
using Nikse.SubtitleEdit.Logic;
using System;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Drawing;
@ -11,10 +7,14 @@ using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Windows.Forms;
using Nikse.SubtitleEdit.Core.AudioToText;
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Core.SubtitleFormats;
using Nikse.SubtitleEdit.Logic;
namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
namespace Nikse.SubtitleEdit.Forms.AudioToText
{
public sealed partial class AudioToTextWhisper : Form
public sealed partial class WhisperAudioToText : Form
{
private readonly string _videoFileName;
private readonly int _audioTrackNumber;
@ -31,7 +31,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
public Subtitle TranscribedSubtitle { get; private set; }
public AudioToTextWhisper(string videoFileName, int audioTrackNumber, Form parentForm)
public WhisperAudioToText(string videoFileName, int audioTrackNumber, Form parentForm)
{
UiUtil.PreInitialize(this);
InitializeComponent();
@ -299,7 +299,6 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
var process = GetWhisperProcess(waveFileName, model.Name, comboBoxLanguages.Text, OutputHandler);
ShowProgressBar();
progressBar1.Style = ProgressBarStyle.Marquee;
double seconds = 0;
buttonCancel.Visible = true;
try
{

View File

@ -0,0 +1,327 @@
namespace Nikse.SubtitleEdit.Forms.AudioToText
{
sealed partial class WhisperAudioToTextSelectedLines
{
/// <summary>
/// Required designer variable.
/// </summary>
private System.ComponentModel.IContainer components = null;
/// <summary>
/// Clean up any resources being used.
/// </summary>
/// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param>
protected override void Dispose(bool disposing)
{
if (disposing && (components != null))
{
components.Dispose();
}
base.Dispose(disposing);
}
#region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
this.components = new System.ComponentModel.Container();
this.buttonCancel = new System.Windows.Forms.Button();
this.buttonGenerate = new System.Windows.Forms.Button();
this.progressBar1 = new System.Windows.Forms.ProgressBar();
this.labelProgress = new System.Windows.Forms.Label();
this.textBoxLog = new System.Windows.Forms.TextBox();
this.labelInfo = new System.Windows.Forms.Label();
this.groupBoxModels = new System.Windows.Forms.GroupBox();
this.buttonDownload = new System.Windows.Forms.Button();
this.linkLabelOpenModelsFolder = new System.Windows.Forms.LinkLabel();
this.labelModel = new System.Windows.Forms.Label();
this.comboBoxModels = new System.Windows.Forms.ComboBox();
this.linkLabeWhisperWebSite = new System.Windows.Forms.LinkLabel();
this.labelTime = new System.Windows.Forms.Label();
this.timer1 = new System.Windows.Forms.Timer(this.components);
this.checkBoxUsePostProcessing = new System.Windows.Forms.CheckBox();
this.groupBoxInputFiles = new System.Windows.Forms.GroupBox();
this.listViewInputFiles = new System.Windows.Forms.ListView();
this.columnHeaderFileName = ((System.Windows.Forms.ColumnHeader)(new System.Windows.Forms.ColumnHeader()));
this.labelChooseLanguage = new System.Windows.Forms.Label();
this.comboBoxLanguages = new System.Windows.Forms.ComboBox();
this.groupBoxModels.SuspendLayout();
this.groupBoxInputFiles.SuspendLayout();
this.SuspendLayout();
//
// buttonCancel
//
this.buttonCancel.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right)));
this.buttonCancel.DialogResult = System.Windows.Forms.DialogResult.Cancel;
this.buttonCancel.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.buttonCancel.Location = new System.Drawing.Point(622, 427);
this.buttonCancel.Name = "buttonCancel";
this.buttonCancel.Size = new System.Drawing.Size(75, 23);
this.buttonCancel.TabIndex = 6;
this.buttonCancel.Text = "C&ancel";
this.buttonCancel.UseVisualStyleBackColor = true;
this.buttonCancel.Click += new System.EventHandler(this.buttonCancel_Click);
//
// buttonGenerate
//
this.buttonGenerate.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right)));
this.buttonGenerate.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.buttonGenerate.Location = new System.Drawing.Point(491, 427);
this.buttonGenerate.Name = "buttonGenerate";
this.buttonGenerate.Size = new System.Drawing.Size(125, 23);
this.buttonGenerate.TabIndex = 5;
this.buttonGenerate.Text = "&Generate";
this.buttonGenerate.UseVisualStyleBackColor = true;
this.buttonGenerate.Click += new System.EventHandler(this.ButtonGenerate_Click);
//
// progressBar1
//
this.progressBar1.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.progressBar1.Location = new System.Drawing.Point(12, 427);
this.progressBar1.Name = "progressBar1";
this.progressBar1.Size = new System.Drawing.Size(473, 12);
this.progressBar1.TabIndex = 4;
this.progressBar1.Visible = false;
//
// labelProgress
//
this.labelProgress.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left)));
this.labelProgress.AutoSize = true;
this.labelProgress.Location = new System.Drawing.Point(12, 409);
this.labelProgress.Name = "labelProgress";
this.labelProgress.Size = new System.Drawing.Size(70, 13);
this.labelProgress.TabIndex = 4;
this.labelProgress.Text = "labelProgress";
//
// textBoxLog
//
this.textBoxLog.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.textBoxLog.Location = new System.Drawing.Point(529, 9);
this.textBoxLog.Multiline = true;
this.textBoxLog.Name = "textBoxLog";
this.textBoxLog.ScrollBars = System.Windows.Forms.ScrollBars.Both;
this.textBoxLog.Size = new System.Drawing.Size(168, 258);
this.textBoxLog.TabIndex = 0;
//
// labelInfo
//
this.labelInfo.AutoSize = true;
this.labelInfo.Location = new System.Drawing.Point(12, 9);
this.labelInfo.Name = "labelInfo";
this.labelInfo.Size = new System.Drawing.Size(275, 13);
this.labelInfo.TabIndex = 1;
this.labelInfo.Text = "Generate text from audio via Whisper speech recognition";
//
// groupBoxModels
//
this.groupBoxModels.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.groupBoxModels.Controls.Add(this.labelChooseLanguage);
this.groupBoxModels.Controls.Add(this.comboBoxLanguages);
this.groupBoxModels.Controls.Add(this.buttonDownload);
this.groupBoxModels.Controls.Add(this.linkLabelOpenModelsFolder);
this.groupBoxModels.Controls.Add(this.labelModel);
this.groupBoxModels.Controls.Add(this.comboBoxModels);
this.groupBoxModels.Location = new System.Drawing.Point(15, 66);
this.groupBoxModels.Name = "groupBoxModels";
this.groupBoxModels.Size = new System.Drawing.Size(682, 82);
this.groupBoxModels.TabIndex = 1;
this.groupBoxModels.TabStop = false;
this.groupBoxModels.Text = "Models";
//
// buttonDownload
//
this.buttonDownload.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.buttonDownload.Location = new System.Drawing.Point(503, 51);
this.buttonDownload.Name = "buttonDownload";
this.buttonDownload.Size = new System.Drawing.Size(28, 23);
this.buttonDownload.TabIndex = 1;
this.buttonDownload.Text = "...";
this.buttonDownload.UseVisualStyleBackColor = true;
this.buttonDownload.Click += new System.EventHandler(this.buttonDownload_Click);
//
// linkLabelOpenModelsFolder
//
this.linkLabelOpenModelsFolder.AutoSize = true;
this.linkLabelOpenModelsFolder.Location = new System.Drawing.Point(539, 59);
this.linkLabelOpenModelsFolder.Name = "linkLabelOpenModelsFolder";
this.linkLabelOpenModelsFolder.Size = new System.Drawing.Size(98, 13);
this.linkLabelOpenModelsFolder.TabIndex = 2;
this.linkLabelOpenModelsFolder.TabStop = true;
this.linkLabelOpenModelsFolder.Text = "Open models folder";
this.linkLabelOpenModelsFolder.LinkClicked += new System.Windows.Forms.LinkLabelLinkClickedEventHandler(this.linkLabelOpenModelFolder_LinkClicked);
//
// labelModel
//
this.labelModel.AutoSize = true;
this.labelModel.Location = new System.Drawing.Point(254, 37);
this.labelModel.Name = "labelModel";
this.labelModel.Size = new System.Drawing.Size(167, 13);
this.labelModel.TabIndex = 0;
this.labelModel.Text = "Choose speech recognition model";
//
// comboBoxModels
//
this.comboBoxModels.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxModels.FormattingEnabled = true;
this.comboBoxModels.Location = new System.Drawing.Point(257, 53);
this.comboBoxModels.Name = "comboBoxModels";
this.comboBoxModels.Size = new System.Drawing.Size(240, 21);
this.comboBoxModels.TabIndex = 0;
//
// linkLabeWhisperWebSite
//
this.linkLabeWhisperWebSite.AutoSize = true;
this.linkLabeWhisperWebSite.Location = new System.Drawing.Point(12, 26);
this.linkLabeWhisperWebSite.Name = "linkLabeWhisperWebSite";
this.linkLabeWhisperWebSite.Size = new System.Drawing.Size(85, 13);
this.linkLabeWhisperWebSite.TabIndex = 0;
this.linkLabeWhisperWebSite.TabStop = true;
this.linkLabeWhisperWebSite.Text = "Whisper website";
this.linkLabeWhisperWebSite.LinkClicked += new System.Windows.Forms.LinkLabelLinkClickedEventHandler(this.linkLabelWhisperWebsite_LinkClicked);
//
// labelTime
//
this.labelTime.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left)));
this.labelTime.AutoSize = true;
this.labelTime.Location = new System.Drawing.Point(12, 442);
this.labelTime.Name = "labelTime";
this.labelTime.Size = new System.Drawing.Size(88, 13);
this.labelTime.TabIndex = 6;
this.labelTime.Text = "Remaining time...";
//
// timer1
//
this.timer1.Interval = 1000;
this.timer1.Tick += new System.EventHandler(this.timer1_Tick);
//
// checkBoxUsePostProcessing
//
this.checkBoxUsePostProcessing.AutoSize = true;
this.checkBoxUsePostProcessing.Location = new System.Drawing.Point(15, 162);
this.checkBoxUsePostProcessing.Name = "checkBoxUsePostProcessing";
this.checkBoxUsePostProcessing.Size = new System.Drawing.Size(312, 17);
this.checkBoxUsePostProcessing.TabIndex = 2;
this.checkBoxUsePostProcessing.Text = "Use post-processing (line merge, fix casing, and punctuation)";
this.checkBoxUsePostProcessing.UseVisualStyleBackColor = true;
//
// groupBoxInputFiles
//
this.groupBoxInputFiles.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.groupBoxInputFiles.Controls.Add(this.listViewInputFiles);
this.groupBoxInputFiles.Location = new System.Drawing.Point(15, 200);
this.groupBoxInputFiles.Name = "groupBoxInputFiles";
this.groupBoxInputFiles.Size = new System.Drawing.Size(682, 185);
this.groupBoxInputFiles.TabIndex = 3;
this.groupBoxInputFiles.TabStop = false;
this.groupBoxInputFiles.Text = "Input files";
//
// listViewInputFiles
//
this.listViewInputFiles.AllowDrop = true;
this.listViewInputFiles.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.listViewInputFiles.Columns.AddRange(new System.Windows.Forms.ColumnHeader[] {
this.columnHeaderFileName});
this.listViewInputFiles.FullRowSelect = true;
this.listViewInputFiles.HideSelection = false;
this.listViewInputFiles.Location = new System.Drawing.Point(6, 18);
this.listViewInputFiles.Name = "listViewInputFiles";
this.listViewInputFiles.Size = new System.Drawing.Size(670, 150);
this.listViewInputFiles.TabIndex = 0;
this.listViewInputFiles.UseCompatibleStateImageBehavior = false;
this.listViewInputFiles.View = System.Windows.Forms.View.Details;
//
// columnHeaderFileName
//
this.columnHeaderFileName.Text = "File name";
this.columnHeaderFileName.Width = 455;
//
// labelChooseLanguage
//
this.labelChooseLanguage.AutoSize = true;
this.labelChooseLanguage.Location = new System.Drawing.Point(3, 37);
this.labelChooseLanguage.Name = "labelChooseLanguage";
this.labelChooseLanguage.Size = new System.Drawing.Size(90, 13);
this.labelChooseLanguage.TabIndex = 6;
this.labelChooseLanguage.Text = "Choose language";
//
// comboBoxLanguages
//
this.comboBoxLanguages.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxLanguages.FormattingEnabled = true;
this.comboBoxLanguages.Location = new System.Drawing.Point(6, 53);
this.comboBoxLanguages.Name = "comboBoxLanguages";
this.comboBoxLanguages.Size = new System.Drawing.Size(194, 21);
this.comboBoxLanguages.TabIndex = 7;
//
// WhisperAudioToTextSelectedLines
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(709, 464);
this.Controls.Add(this.groupBoxInputFiles);
this.Controls.Add(this.checkBoxUsePostProcessing);
this.Controls.Add(this.labelTime);
this.Controls.Add(this.linkLabeWhisperWebSite);
this.Controls.Add(this.groupBoxModels);
this.Controls.Add(this.labelInfo);
this.Controls.Add(this.labelProgress);
this.Controls.Add(this.progressBar1);
this.Controls.Add(this.buttonCancel);
this.Controls.Add(this.buttonGenerate);
this.Controls.Add(this.textBoxLog);
this.KeyPreview = true;
this.MinimumSize = new System.Drawing.Size(720, 450);
this.Name = "WhisperAudioToTextSelectedLines";
this.ShowIcon = false;
this.ShowInTaskbar = false;
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
this.Text = "Audio to text";
this.FormClosing += new System.Windows.Forms.FormClosingEventHandler(this.AudioToText_FormClosing);
this.Load += new System.EventHandler(this.AudioToText_Load);
this.Shown += new System.EventHandler(this.AudioToTextSelectedLines_Shown);
this.ResizeEnd += new System.EventHandler(this.AudioToTextSelectedLines_ResizeEnd);
this.KeyDown += new System.Windows.Forms.KeyEventHandler(this.AudioToText_KeyDown);
this.groupBoxModels.ResumeLayout(false);
this.groupBoxModels.PerformLayout();
this.groupBoxInputFiles.ResumeLayout(false);
this.ResumeLayout(false);
this.PerformLayout();
}
#endregion
private System.Windows.Forms.Button buttonCancel;
private System.Windows.Forms.Button buttonGenerate;
private System.Windows.Forms.ProgressBar progressBar1;
private System.Windows.Forms.Label labelProgress;
private System.Windows.Forms.TextBox textBoxLog;
private System.Windows.Forms.Label labelInfo;
private System.Windows.Forms.GroupBox groupBoxModels;
private System.Windows.Forms.LinkLabel linkLabeWhisperWebSite;
private System.Windows.Forms.Label labelModel;
private System.Windows.Forms.ComboBox comboBoxModels;
private System.Windows.Forms.LinkLabel linkLabelOpenModelsFolder;
private System.Windows.Forms.Label labelTime;
private System.Windows.Forms.Timer timer1;
private System.Windows.Forms.CheckBox checkBoxUsePostProcessing;
private System.Windows.Forms.Button buttonDownload;
private System.Windows.Forms.GroupBox groupBoxInputFiles;
private System.Windows.Forms.ListView listViewInputFiles;
private System.Windows.Forms.ColumnHeader columnHeaderFileName;
private System.Windows.Forms.Label labelChooseLanguage;
private System.Windows.Forms.ComboBox comboBoxLanguages;
}
}

View File

@ -0,0 +1,446 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Windows.Forms;
using Nikse.SubtitleEdit.Core.AudioToText;
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Logic;
namespace Nikse.SubtitleEdit.Forms.AudioToText
{
public sealed partial class WhisperAudioToTextSelectedLines : Form
{
private bool _cancel;
private int _batchFileNumber;
private readonly List<AudioClipsGet.AudioClip> _audioClips;
private readonly Form _parentForm;
private readonly Regex _timeRegex = new Regex(@"^\[\d\d:\d\d[\.,]\d\d\d --> \d\d:\d\d[\.,]\d\d\d\]", RegexOptions.Compiled);
private List<ResultText> _resultList;
private string _languageCode;
public Subtitle TranscribedSubtitle { get; private set; }
public WhisperAudioToTextSelectedLines(List<AudioClipsGet.AudioClip> audioClips, Form parentForm)
{
UiUtil.PreInitialize(this);
InitializeComponent();
UiUtil.FixFonts(this);
UiUtil.FixLargeFonts(this, buttonGenerate);
_parentForm = parentForm;
Text = LanguageSettings.Current.AudioToText.Title;
labelInfo.Text = LanguageSettings.Current.AudioToText.WhisperInfo;
groupBoxModels.Text = LanguageSettings.Current.AudioToText.LanguagesAndModels;
labelModel.Text = LanguageSettings.Current.AudioToText.ChooseModel;
linkLabelOpenModelsFolder.Text = LanguageSettings.Current.AudioToText.OpenModelsFolder;
checkBoxUsePostProcessing.Text = LanguageSettings.Current.AudioToText.UsePostProcessing;
buttonGenerate.Text = LanguageSettings.Current.Watermark.Generate;
buttonCancel.Text = LanguageSettings.Current.General.Cancel;
groupBoxInputFiles.Text = LanguageSettings.Current.BatchConvert.Input;
linkLabeWhisperWebSite.Text = LanguageSettings.Current.AudioToText.WhisperWebsite;
columnHeaderFileName.Text = LanguageSettings.Current.JoinSubtitles.FileName;
checkBoxUsePostProcessing.Checked = Configuration.Settings.Tools.VoskPostProcessing;
comboBoxLanguages.Items.Clear();
comboBoxLanguages.Items.AddRange(WhisperLanguage.Languages.ToArray<object>());
var lang = WhisperLanguage.Languages.FirstOrDefault(p => p.Code == Configuration.Settings.Tools.WhisperLanguageCode);
if (lang != null)
{
comboBoxLanguages.Text = lang.ToString();
}
else
{
comboBoxLanguages.Text = "English";
}
WhisperAudioToText.FillModels(comboBoxModels, string.Empty);
textBoxLog.Visible = false;
textBoxLog.Dock = DockStyle.Fill;
labelProgress.Text = string.Empty;
labelTime.Text = string.Empty;
listViewInputFiles.Visible = true;
_audioClips = audioClips;
progressBar1.Maximum = 100;
foreach (var audioClip in audioClips)
{
listViewInputFiles.Items.Add(audioClip.AudioFileName);
}
}
private void ButtonGenerate_Click(object sender, EventArgs e)
{
if (comboBoxModels.Items.Count == 0)
{
buttonDownload_Click(null, null);
return;
}
if (listViewInputFiles.Items.Count == 0)
{
return;
}
GenerateBatch();
TaskbarList.SetProgressState(_parentForm.Handle, TaskbarButtonProgressFlags.NoProgress);
}
private void ShowProgressBar()
{
progressBar1.Maximum = 100;
progressBar1.Value = 0;
progressBar1.Visible = true;
progressBar1.BringToFront();
progressBar1.Refresh();
progressBar1.Top = labelProgress.Bottom + 3;
}
private void GenerateBatch()
{
_languageCode = GetLanguage(comboBoxLanguages.Text);
groupBoxInputFiles.Enabled = false;
comboBoxLanguages.Enabled = false;
comboBoxModels.Enabled = false;
_batchFileNumber = 0;
var postProcessor = new AudioToTextPostProcessor(GetLanguage(comboBoxModels.Text))
{
ParagraphMaxChars = Configuration.Settings.General.SubtitleLineMaximumLength * 2,
};
textBoxLog.AppendText("Batch mode" + Environment.NewLine);
foreach (ListViewItem lvi in listViewInputFiles.Items)
{
_batchFileNumber++;
var videoFileName = lvi.Text;
listViewInputFiles.SelectedIndices.Clear();
lvi.Selected = true;
ShowProgressBar();
buttonGenerate.Enabled = false;
buttonDownload.Enabled = false;
comboBoxModels.Enabled = false;
comboBoxLanguages.Enabled = false;
var waveFileName = videoFileName;
textBoxLog.AppendText("Wav file name: " + waveFileName + Environment.NewLine);
progressBar1.Style = ProgressBarStyle.Blocks;
var transcript = TranscribeViaWhisper(waveFileName);
if (_cancel)
{
TaskbarList.SetProgressState(_parentForm.Handle, TaskbarButtonProgressFlags.NoProgress);
groupBoxInputFiles.Enabled = true;
return;
}
TranscribedSubtitle = postProcessor.Generate(transcript, checkBoxUsePostProcessing.Checked, true, true, true, true);
SaveToAudioClip(_batchFileNumber - 1);
TaskbarList.SetProgressValue(_parentForm.Handle, _batchFileNumber, listViewInputFiles.Items.Count);
}
progressBar1.Value = 100;
labelTime.Text = string.Empty;
PostFix(postProcessor);
DialogResult = DialogResult.OK;
}
public List<ResultText> TranscribeViaWhisper(string waveFileName)
{
var model = comboBoxModels.Items[comboBoxModels.SelectedIndex] as WhisperModel;
if (model == null)
{
return new List<ResultText>();
}
labelProgress.Text = LanguageSettings.Current.AudioToText.Transcribing;
labelProgress.Text = string.Format(LanguageSettings.Current.AudioToText.TranscribingXOfY, _batchFileNumber, listViewInputFiles.Items.Count);
labelProgress.Refresh();
Application.DoEvents();
_resultList = new List<ResultText>();
var process = GetWhisperProcess(waveFileName, model.Name, comboBoxLanguages.Text, OutputHandler);
ShowProgressBar();
progressBar1.Style = ProgressBarStyle.Marquee;
buttonCancel.Visible = true;
try
{
process.PriorityClass = ProcessPriorityClass.Normal;
}
catch
{
// ignored
}
_cancel = false;
labelProgress.Text = LanguageSettings.Current.AudioToText.Transcribing;
while (!process.HasExited)
{
Application.DoEvents();
System.Threading.Thread.Sleep(100);
Refresh();
if (_cancel)
{
process.Kill();
progressBar1.Visible = false;
buttonCancel.Visible = false;
DialogResult = DialogResult.Cancel;
return null;
}
}
Application.DoEvents();
System.Threading.Thread.Sleep(100);
return _resultList;
}
private void OutputHandler(object sendingProcess, DataReceivedEventArgs outLine)
{
if (string.IsNullOrWhiteSpace(outLine.Data))
{
return;
}
foreach (var line in outLine.Data.SplitToLines())
{
if (_timeRegex.IsMatch(line))
{
var start = line.Substring(1, 10);
var end = line.Substring(14, 10);
var text = line.Remove(0, 25).Trim();
var rt = new ResultText
{
Start = GetSeconds(start),
End = GetSeconds(end),
Text = Utilities.AutoBreakLine(text, _languageCode),
};
_resultList.Add(rt);
}
}
}
private static decimal GetSeconds(string timeCode)
{
return (decimal)(TimeCode.ParseToMilliseconds(timeCode) / 1000.0);
}
private void PostFix(AudioToTextPostProcessor postProcessor)
{
var postSub = new Subtitle();
foreach (var audioClip in _audioClips)
{
postSub.Paragraphs.Add(audioClip.Paragraph);
}
var postSubFixed = postProcessor.Generate(postSub, checkBoxUsePostProcessing.Checked, true, false, true, false);
for (var index = 0; index < _audioClips.Count; index++)
{
var audioClip = _audioClips[index];
if (index < postSubFixed.Paragraphs.Count)
{
audioClip.Paragraph.Text = postSubFixed.Paragraphs[index].Text;
}
}
}
private void SaveToAudioClip(int index)
{
var audioClip = _audioClips[index];
var sb = new StringBuilder();
foreach (var p in TranscribedSubtitle.Paragraphs)
{
sb.AppendLine(p.Text);
}
audioClip.Paragraph.Text = sb.ToString().Trim();
try
{
File.Delete(audioClip.AudioFileName);
}
catch
{
// ignore
}
}
internal static string GetLanguage(string text)
{
var languageCodeList = VoskModel.Models.Select(p => p.TwoLetterLanguageCode);
foreach (var languageCode in languageCodeList)
{
if (text.Contains("model-" + languageCode) || text.Contains("model-small-" + languageCode) || text.StartsWith(languageCode, StringComparison.OrdinalIgnoreCase))
{
return languageCode;
}
if (languageCode == "jp" && (text.Contains("model-ja") || text.Contains("model-small-ja")))
{
return languageCode;
}
}
return "en";
}
private void buttonCancel_Click(object sender, EventArgs e)
{
if (buttonGenerate.Enabled)
{
DialogResult = DialogResult.Cancel;
}
else
{
_cancel = true;
}
}
private Process GetWhisperProcess(string waveFileName, string model, string language, DataReceivedEventHandler dataReceivedHandler = null)
{
//TODO: some check!
//if (!File.Exists(Configuration.Settings.General.FFmpegLocation) && Configuration.IsRunningOnWindows)
//{
// return null;
//}
// whisper --model tiny.en --language English --fp16 False a.wav
var parameters = $"--model {model} --language \"{language}\" --fp16 False \"{waveFileName}\"";
var process = new Process { StartInfo = new ProcessStartInfo("whisper", parameters) { WindowStyle = ProcessWindowStyle.Hidden, CreateNoWindow = true } };
textBoxLog.AppendText("Calling whisper with : whisper " + parameters + Environment.NewLine);
if (dataReceivedHandler != null)
{
process.StartInfo.UseShellExecute = false;
process.StartInfo.RedirectStandardOutput = true;
process.StartInfo.RedirectStandardError = true;
process.OutputDataReceived += dataReceivedHandler;
process.ErrorDataReceived += dataReceivedHandler;
}
process.Start();
if (dataReceivedHandler != null)
{
process.BeginOutputReadLine();
process.BeginErrorReadLine();
}
return process;
}
private void linkLabelWhisperWebsite_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e)
{
UiUtil.OpenUrl("https://github.com/openai/whisper");
}
private void AudioToText_FormClosing(object sender, FormClosingEventArgs e)
{
if (comboBoxModels.SelectedItem is WhisperModel model)
{
Configuration.Settings.Tools.WhisperModel = model.Name;
}
if (comboBoxLanguages.SelectedItem is WhisperLanguage language)
{
Configuration.Settings.Tools.WhisperLanguageCode = language.Code;
}
Configuration.Settings.Tools.VoskPostProcessing = checkBoxUsePostProcessing.Checked;
}
private void AudioToText_KeyDown(object sender, KeyEventArgs e)
{
if (e.KeyCode == Keys.F2)
{
if (textBoxLog.Visible)
{
textBoxLog.Visible = false;
}
else
{
textBoxLog.Visible = true;
textBoxLog.BringToFront();
}
e.SuppressKeyPress = true;
}
else if (e.KeyCode == Keys.Escape && buttonGenerate.Enabled)
{
DialogResult = DialogResult.Cancel;
e.SuppressKeyPress = true;
}
else if (e.KeyData == UiUtil.HelpKeys)
{
linkLabelWhisperWebsite_LinkClicked(null, null);
e.SuppressKeyPress = true;
}
}
private void linkLabelOpenModelFolder_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e)
{
UiUtil.OpenFolder(WhisperModel.ModelFolder);
}
private void timer1_Tick(object sender, EventArgs e)
{
}
public static string ToProgressTime(float estimatedTotalMs)
{
var timeCode = new TimeCode(estimatedTotalMs);
if (timeCode.TotalSeconds < 60)
{
return string.Format(LanguageSettings.Current.GenerateVideoWithBurnedInSubs.TimeRemainingSeconds, (int)Math.Round(timeCode.TotalSeconds));
}
if (timeCode.TotalSeconds / 60 > 5)
{
return string.Format(LanguageSettings.Current.GenerateVideoWithBurnedInSubs.TimeRemainingMinutes, (int)Math.Round(timeCode.TotalSeconds / 60));
}
return string.Format(LanguageSettings.Current.GenerateVideoWithBurnedInSubs.TimeRemainingMinutesAndSeconds, timeCode.Minutes + timeCode.Hours * 60, timeCode.Seconds);
}
private void buttonDownload_Click(object sender, EventArgs e)
{
using (var form = new WhisperModelDownload { AutoClose = true })
{
form.ShowDialog(this);
VoskAudioToText.FillModels(comboBoxModels, form.LastDownloadedModel);
}
}
private void ShowHideBatchMode()
{
Height = checkBoxUsePostProcessing.Bottom + progressBar1.Height + buttonCancel.Height + 450;
listViewInputFiles.Visible = true;
}
private void AudioToText_Load(object sender, EventArgs e)
{
ShowHideBatchMode();
listViewInputFiles.Columns[0].Width = -2;
}
private void AudioToTextSelectedLines_Shown(object sender, EventArgs e)
{
buttonGenerate.Focus();
}
private void AudioToTextSelectedLines_ResizeEnd(object sender, EventArgs e)
{
listViewInputFiles.AutoSizeLastColumn();
}
}
}

View File

@ -0,0 +1,123 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" use="required" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<metadata name="timer1.TrayLocation" type="System.Drawing.Point, System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
<value>17, 17</value>
</metadata>
</root>

View File

@ -1,4 +1,4 @@
namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
namespace Nikse.SubtitleEdit.Forms.AudioToText
{
sealed partial class WhisperModelDownload
{

View File

@ -1,13 +1,13 @@
using Nikse.SubtitleEdit.Core.AudioToText;
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Logic;
using System;
using System;
using System.IO;
using System.Linq;
using System.Threading;
using System.Windows.Forms;
using Nikse.SubtitleEdit.Core.AudioToText;
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Logic;
namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
namespace Nikse.SubtitleEdit.Forms.AudioToText
{
public sealed partial class WhisperModelDownload : Form
{

View File

@ -43,7 +43,7 @@ using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows.Forms;
using Nikse.SubtitleEdit.Core.AudioToText;
using Nikse.SubtitleEdit.Forms.SpeechRecognition;
using Nikse.SubtitleEdit.Forms.AudioToText;
namespace Nikse.SubtitleEdit.Forms
{
@ -203,7 +203,7 @@ namespace Nikse.SubtitleEdit.Forms
private ListBox _intellisenceList;
private ListBox _intellisenceListOriginal;
private bool _updateSelectedCountStatusBar;
private Dictate _dictateForm;
private VoskDictate _dictateForm;
private object _dictateTextBox;
private bool _hasCurrentVosk;
@ -1744,7 +1744,8 @@ namespace Nikse.SubtitleEdit.Forms
openSecondSubtitleToolStripMenuItem.Text = _language.Menu.Video.OpenSecondSubtitle;
generateBlankVideoToolStripMenuItem.Text = _language.Menu.Video.GenerateBlankVideo;
generateVideoWithHardcodedSubtitleToolStripMenuItem.Text = _language.Menu.Video.GenerateVideoWithBurnedInSub;
videoaudioToTextToolStripMenuItem.Text = _language.Menu.Video.VideoAudioToText;
videoaudioToTextToolStripMenuItem.Text = string.Format(_language.Menu.Video.VideoAudioToTextX, "Vosk/Kaldi");
audioToTextWhisperTolStripMenuItem.Text = string.Format(_language.Menu.Video.VideoAudioToTextX, "Whisper");
smpteTimeModedropFrameToolStripMenuItem.Text = _language.Menu.Video.SmptTimeMode;
toolStripMenuItemImportChapters.Text = _language.Menu.Video.ImportChaptersFromVideo;
@ -8781,11 +8782,14 @@ namespace Nikse.SubtitleEdit.Forms
toolStripMenuItemSelectedLines.DropDownItems.Insert(0, audio);
var audioClip = new ToolStripMenuItem(LanguageSettings.Current.Main.Menu.ContextMenu.ExtractAudio);
UiUtil.FixFonts(audioClip);
var audioToText = new ToolStripMenuItem(LanguageSettings.Current.Main.Menu.Video.VideoAudioToText);
UiUtil.FixFonts(audioToText);
var audioToTextWhisper = new ToolStripMenuItem(string.Format(LanguageSettings.Current.Main.Menu.Video.VideoAudioToTextX, "Whisper"));
UiUtil.FixFonts(audioToTextWhisper);
var audioToTextVosk = new ToolStripMenuItem(string.Format(LanguageSettings.Current.Main.Menu.Video.VideoAudioToTextX, "Vosk/Kaldi"));
UiUtil.FixFonts(audioToTextVosk);
audio.DropDownItems.Insert(0, audioClip);
audio.DropDownItems.Insert(0, audioToTextWhisper);
audio.DropDownItems.Insert(0, audioToTextVosk);
audio.DropDownItems.Insert(0, audioToText);
audioClip.Click += (senderNew, eNew) =>
{
if (!RequireFfmpegOk())
@ -8797,7 +8801,7 @@ namespace Nikse.SubtitleEdit.Forms
UiUtil.OpenFolder(Path.GetDirectoryName(audioClips[0].AudioFileName));
};
audioToText.Click += (senderNew, eNew) =>
audioToTextWhisper.Click += (senderNew, eNew) =>
{
if (!RequireFfmpegOk())
{
@ -8805,11 +8809,41 @@ namespace Nikse.SubtitleEdit.Forms
}
var audioClips = GetAudioClips();
using (var form = new AudioToTextSelectedLines(audioClips, this))
using (var form = new WhisperAudioToTextSelectedLines(audioClips, this))
{
if (form.ShowDialog(this) == DialogResult.OK)
{
MakeHistoryForUndo(string.Format(_language.BeforeX, LanguageSettings.Current.Main.Menu.Video.VideoAudioToText));
MakeHistoryForUndo(string.Format(_language.BeforeX, string.Format(LanguageSettings.Current.Main.Menu.Video.VideoAudioToTextX, "Whisper")));
SubtitleListview1.BeginUpdate();
foreach (var ac in audioClips)
{
var p = _subtitle.Paragraphs.FirstOrDefault(pa => pa.Id == ac.Paragraph.Id);
if (p != null)
{
p.Text = ac.Paragraph.Text;
var idx = _subtitle.Paragraphs.IndexOf(p);
SubtitleListview1.SetText(idx, p.Text);
}
}
SubtitleListview1.EndUpdate();
RefreshSelectedParagraph();
}
}
};
audioToTextVosk.Click += (senderNew, eNew) =>
{
if (!RequireFfmpegOk())
{
return;
}
var audioClips = GetAudioClips();
using (var form = new VoskAudioToTextSelectedLines(audioClips, this))
{
if (form.ShowDialog(this) == DialogResult.OK)
{
MakeHistoryForUndo(string.Format(_language.BeforeX, string.Format(LanguageSettings.Current.Main.Menu.Video.VideoAudioToTextX, "Vosk/Kaldi")));
SubtitleListview1.BeginUpdate();
foreach (var ac in audioClips)
{
@ -10748,7 +10782,7 @@ namespace Nikse.SubtitleEdit.Forms
if (_dictateForm == null || string.IsNullOrEmpty(Configuration.Settings.Tools.VoskModel))
{
_dictateForm?.Dispose();
_dictateForm = new Dictate();
_dictateForm = new VoskDictate();
if (_dictateForm.ShowDialog(this) != DialogResult.OK)
{
return;
@ -22473,7 +22507,7 @@ namespace Nikse.SubtitleEdit.Forms
audioVisualizer.Invalidate();
}
if (_dictateForm != null && Dictate.RecordingOn)
if (_dictateForm != null && VoskDictate.RecordingOn)
{
pictureBoxRecord.Invalidate();
}
@ -24495,10 +24529,10 @@ namespace Nikse.SubtitleEdit.Forms
private void PictureBoxRecord_Paint(object sender, PaintEventArgs e)
{
if (_dictateForm != null && Dictate.RecordingOn)
if (_dictateForm != null && VoskDictate.RecordingOn)
{
var pct = Dictate.RecordingVolumePercent;
var len = pictureBoxRecord.Height - (int)Math.Round(Dictate.RecordingVolumePercent * pictureBoxRecord.Height / 100.0);
var pct = VoskDictate.RecordingVolumePercent;
var len = pictureBoxRecord.Height - (int)Math.Round(VoskDictate.RecordingVolumePercent * pictureBoxRecord.Height / 100.0);
using (var pen = new Pen(Color.DodgerBlue, 5))
{
e.Graphics.DrawLine(pen, pictureBoxRecord.Width - 6, pictureBoxRecord.Height - 1, pictureBoxRecord.Width - 6, len);
@ -34406,7 +34440,7 @@ namespace Nikse.SubtitleEdit.Forms
CloseVideoToolStripMenuItemClick(sender, e);
}
using (var form = new AudioToText(oldVideoFileName, _videoAudioTrackNumber, this))
using (var form = new VoskAudioToText(oldVideoFileName, _videoAudioTrackNumber, this))
{
var result = form.ShowDialog(this);
@ -34577,7 +34611,7 @@ namespace Nikse.SubtitleEdit.Forms
CloseVideoToolStripMenuItemClick(sender, e);
}
using (var form = new AudioToTextWhisper(oldVideoFileName, _videoAudioTrackNumber, this))
using (var form = new WhisperAudioToText(oldVideoFileName, _videoAudioTrackNumber, this))
{
var result = form.ShowDialog(this);

View File

@ -1813,7 +1813,7 @@ namespace Nikse.SubtitleEdit.Logic
GenerateTextFromVideo = "Generate text from video...",
GenerateBlankVideo = "Generate blank video...",
GenerateVideoWithBurnedInSub = "Generate video with burned-in sub...",
VideoAudioToText = "Audio to text...",
VideoAudioToTextX = "Audio to text ({0})...",
ImportChaptersFromVideo = "Import chapters from video",
GenerateImportShotChanges = "Generate/import shot changes...",
RemoveOrExportShotChanges = "Remove/export shot changes...",

View File

@ -475,15 +475,27 @@ namespace Nikse.SubtitleEdit.Logic
case "AudioToText/Info":
language.AudioToText.Info = reader.Value;
break;
case "AudioToText/WhisperInfo":
language.AudioToText.WhisperInfo = reader.Value;
break;
case "AudioToText/VoskWebsite":
language.AudioToText.VoskWebsite = reader.Value;
break;
case "AudioToText/WhisperWebsite":
language.AudioToText.WhisperWebsite = reader.Value;
break;
case "AudioToText/Models":
language.AudioToText.Models = reader.Value;
break;
case "AudioToText/LanguagesAndModels":
language.AudioToText.LanguagesAndModels = reader.Value;
break;
case "AudioToText/ChooseModel":
language.AudioToText.ChooseModel = reader.Value;
break;
case "AudioToText/ChooseLanguage":
language.AudioToText.ChooseLanguage = reader.Value;
break;
case "AudioToText/OpenModelsFolder":
language.AudioToText.OpenModelsFolder = reader.Value;
break;
@ -505,6 +517,9 @@ namespace Nikse.SubtitleEdit.Logic
case "AudioToText/BatchMode":
language.AudioToText.BatchMode = reader.Value;
break;
case "AudioToText/KeepPartialTranscription":
language.AudioToText.KeepPartialTranscription = reader.Value;
break;
case "AssaAttachments/Title":
language.AssaAttachments.Title = reader.Value;
break;
@ -4207,8 +4222,8 @@ namespace Nikse.SubtitleEdit.Logic
case "Main/Menu/Video/GenerateVideoWithBurnedInSub":
language.Main.Menu.Video.GenerateVideoWithBurnedInSub = reader.Value;
break;
case "Main/Menu/Video/VideoAudioToText":
language.Main.Menu.Video.VideoAudioToText = reader.Value;
case "Main/Menu/Video/VideoAudioToTextX":
language.Main.Menu.Video.VideoAudioToTextX = reader.Value;
break;
case "Main/Menu/Video/ImportChaptersFromVideo":
language.Main.Menu.Video.ImportChaptersFromVideo = reader.Value;

View File

@ -1659,7 +1659,7 @@
public string GenerateTextFromVideo { get; set; }
public string GenerateBlankVideo { get; set; }
public string GenerateVideoWithBurnedInSub { get; set; }
public string VideoAudioToText { get; set; }
public string VideoAudioToTextX { get; set; }
public string ImportChaptersFromVideo { get; set; }
public string GenerateImportShotChanges { get; set; }
public string RemoveOrExportShotChanges { get; set; }

View File

@ -207,23 +207,29 @@
<Compile Include="Forms\AudioClipsGet.Designer.cs">
<DependentUpon>AudioClipsGet.cs</DependentUpon>
</Compile>
<Compile Include="Forms\SpeechRecognition\WhisperModelDownload.cs">
<Compile Include="Forms\AudioToText\WhisperAudioToTextSelectedLines.cs">
<SubType>Form</SubType>
</Compile>
<Compile Include="Forms\SpeechRecognition\WhisperModelDownload.Designer.cs">
<Compile Include="Forms\AudioToText\WhisperAudioToTextSelectedLines.Designer.cs">
<DependentUpon>WhisperAudioToTextSelectedLines.cs</DependentUpon>
</Compile>
<Compile Include="Forms\AudioToText\WhisperModelDownload.cs">
<SubType>Form</SubType>
</Compile>
<Compile Include="Forms\AudioToText\WhisperModelDownload.Designer.cs">
<DependentUpon>WhisperModelDownload.cs</DependentUpon>
</Compile>
<Compile Include="Forms\SpeechRecognition\AudioToTextSelectedLines.cs">
<Compile Include="Forms\AudioToText\VoskAudioToTextSelectedLines.cs">
<SubType>Form</SubType>
</Compile>
<Compile Include="Forms\SpeechRecognition\AudioToTextSelectedLines.Designer.cs">
<DependentUpon>AudioToTextSelectedLines.cs</DependentUpon>
<Compile Include="Forms\AudioToText\VoskAudioToTextSelectedLines.Designer.cs">
<DependentUpon>VoskAudioToTextSelectedLines.cs</DependentUpon>
</Compile>
<Compile Include="Forms\SpeechRecognition\AudioToText.cs">
<Compile Include="Forms\AudioToText\VoskAudioToText.cs">
<SubType>Form</SubType>
</Compile>
<Compile Include="Forms\SpeechRecognition\AudioToText.Designer.cs">
<DependentUpon>AudioToText.cs</DependentUpon>
<Compile Include="Forms\AudioToText\VoskAudioToText.Designer.cs">
<DependentUpon>VoskAudioToText.cs</DependentUpon>
</Compile>
<Compile Include="Forms\BatchConvertMkvEnding.cs">
<SubType>Form</SubType>
@ -285,11 +291,11 @@
<Compile Include="Forms\AddWaveformBatch.Designer.cs">
<DependentUpon>AddWaveformBatch.cs</DependentUpon>
</Compile>
<Compile Include="Forms\SpeechRecognition\AudioToTextModelDownload.cs">
<Compile Include="Forms\AudioToText\VoskModelDownload.cs">
<SubType>Form</SubType>
</Compile>
<Compile Include="Forms\SpeechRecognition\AudioToTextModelDownload.Designer.cs">
<DependentUpon>AudioToTextModelDownload.cs</DependentUpon>
<Compile Include="Forms\AudioToText\VoskModelDownload.Designer.cs">
<DependentUpon>VoskModelDownload.cs</DependentUpon>
</Compile>
<Compile Include="Forms\ConvertColorsToDialog.cs">
<SubType>Form</SubType>
@ -297,17 +303,17 @@
<Compile Include="Forms\ConvertColorsToDialog.Designer.cs">
<DependentUpon>ConvertColorsToDialog.cs</DependentUpon>
</Compile>
<Compile Include="Forms\SpeechRecognition\AudioToTextWhisper.cs">
<Compile Include="Forms\AudioToText\WhisperAudioToText.cs">
<SubType>Form</SubType>
</Compile>
<Compile Include="Forms\SpeechRecognition\AudioToTextWhisper.Designer.cs">
<DependentUpon>AudioToTextWhisper.cs</DependentUpon>
<Compile Include="Forms\AudioToText\WhisperAudioToText.Designer.cs">
<DependentUpon>WhisperAudioToText.cs</DependentUpon>
</Compile>
<Compile Include="Forms\SpeechRecognition\Dictate.cs">
<Compile Include="Forms\AudioToText\VoskDictate.cs">
<SubType>Form</SubType>
</Compile>
<Compile Include="Forms\SpeechRecognition\Dictate.Designer.cs">
<DependentUpon>Dictate.cs</DependentUpon>
<Compile Include="Forms\AudioToText\VoskDictate.Designer.cs">
<DependentUpon>VoskDictate.cs</DependentUpon>
</Compile>
<Compile Include="Forms\DownloadVosk.cs">
<SubType>Form</SubType>
@ -1489,14 +1495,17 @@
<EmbeddedResource Include="Forms\AudioClipsGet.resx">
<DependentUpon>AudioClipsGet.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Forms\SpeechRecognition\WhisperModelDownload.resx">
<EmbeddedResource Include="Forms\AudioToText\WhisperAudioToTextSelectedLines.resx">
<DependentUpon>WhisperAudioToTextSelectedLines.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Forms\AudioToText\WhisperModelDownload.resx">
<DependentUpon>WhisperModelDownload.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Forms\SpeechRecognition\AudioToTextSelectedLines.resx">
<DependentUpon>AudioToTextSelectedLines.cs</DependentUpon>
<EmbeddedResource Include="Forms\AudioToText\VoskAudioToTextSelectedLines.resx">
<DependentUpon>VoskAudioToTextSelectedLines.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Forms\SpeechRecognition\AudioToText.resx">
<DependentUpon>AudioToText.cs</DependentUpon>
<EmbeddedResource Include="Forms\AudioToText\VoskAudioToText.resx">
<DependentUpon>VoskAudioToText.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Forms\BatchConvertMkvEnding.resx">
<DependentUpon>BatchConvertMkvEnding.cs</DependentUpon>
@ -1528,17 +1537,17 @@
<EmbeddedResource Include="Forms\AddWaveformBatch.resx">
<DependentUpon>AddWaveformBatch.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Forms\SpeechRecognition\AudioToTextModelDownload.resx">
<DependentUpon>AudioToTextModelDownload.cs</DependentUpon>
<EmbeddedResource Include="Forms\AudioToText\VoskModelDownload.resx">
<DependentUpon>VoskModelDownload.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Forms\ConvertColorsToDialog.resx">
<DependentUpon>ConvertColorsToDialog.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Forms\SpeechRecognition\AudioToTextWhisper.resx">
<DependentUpon>AudioToTextWhisper.cs</DependentUpon>
<EmbeddedResource Include="Forms\AudioToText\WhisperAudioToText.resx">
<DependentUpon>WhisperAudioToText.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Forms\SpeechRecognition\Dictate.resx">
<DependentUpon>Dictate.cs</DependentUpon>
<EmbeddedResource Include="Forms\AudioToText\VoskDictate.resx">
<DependentUpon>VoskDictate.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Forms\DownloadVosk.resx">
<DependentUpon>DownloadVosk.cs</DependentUpon>