Testing naudio record

This commit is contained in:
niksedk 2022-09-18 18:35:18 +02:00
parent ec8020621d
commit 480152a2df
9 changed files with 315 additions and 70 deletions

View File

@ -2561,6 +2561,7 @@ can edit in same subtitle file (collaboration)</Information>
<MainTextBoxAutoBreak>Auto break text</MainTextBoxAutoBreak> <MainTextBoxAutoBreak>Auto break text</MainTextBoxAutoBreak>
<MainTextBoxAutoBreakFromPos>Break at first space from cursor position</MainTextBoxAutoBreakFromPos> <MainTextBoxAutoBreakFromPos>Break at first space from cursor position</MainTextBoxAutoBreakFromPos>
<MainTextBoxAutoBreakFromPosAndGoToNext>Break at first space from cursor position and go to next</MainTextBoxAutoBreakFromPosAndGoToNext> <MainTextBoxAutoBreakFromPosAndGoToNext>Break at first space from cursor position and go to next</MainTextBoxAutoBreakFromPosAndGoToNext>
<MainTextBoxDictate>Dictate</MainTextBoxDictate>
<MainTextBoxUnbreak>Unbreak text</MainTextBoxUnbreak> <MainTextBoxUnbreak>Unbreak text</MainTextBoxUnbreak>
<MainTextBoxUnbreakNoSpace>Unbreak without space (CJK)</MainTextBoxUnbreakNoSpace> <MainTextBoxUnbreakNoSpace>Unbreak without space (CJK)</MainTextBoxUnbreakNoSpace>
<MainTextBoxAssaIntellisense>Show ASSA tag helper</MainTextBoxAssaIntellisense> <MainTextBoxAssaIntellisense>Show ASSA tag helper</MainTextBoxAssaIntellisense>

View File

@ -179,6 +179,8 @@ COPY /Y /V "..\..\DLLs\Interop.QuartzTypeLib.dll" "temp_zip\"
COPY /Y /V "System.Net.Http.Extensions.dll" "temp_zip\" COPY /Y /V "System.Net.Http.Extensions.dll" "temp_zip\"
COPY /Y /V "Newtonsoft.Json.dll" "temp_zip\" COPY /Y /V "Newtonsoft.Json.dll" "temp_zip\"
COPY /Y /V "System.Net.Http.Primitives.dll" "temp_zip\" COPY /Y /V "System.Net.Http.Primitives.dll" "temp_zip\"
COPY /Y /V "NAudio.Core.dll" "temp_zip\"
COPY /Y /V "NAudio.WinMM.dll" "temp_zip\"
COPY /Y /V "SubtitleEdit.exe" "temp_zip\" COPY /Y /V "SubtitleEdit.exe" "temp_zip\"
COPY /Y /V "Languages\*.xml" "temp_zip\Languages\" COPY /Y /V "Languages\*.xml" "temp_zip\Languages\"
COPY /Y /V "..\..\..\..\Dictionaries\*.*" "temp_zip\Dictionaries\" COPY /Y /V "..\..\..\..\Dictionaries\*.*" "temp_zip\Dictionaries\"

View File

@ -202,6 +202,9 @@ namespace Nikse.SubtitleEdit.Forms
private ListBox _intellisenceList; private ListBox _intellisenceList;
private ListBox _intellisenceListOriginal; private ListBox _intellisenceListOriginal;
private bool _updateSelectedCountStatusBar; private bool _updateSelectedCountStatusBar;
private Dictate _dictateForm;
private object _dictateTextBox;
private bool _hasCurrentVosk;
public bool IsMenuOpen { get; private set; } public bool IsMenuOpen { get; private set; }
@ -10699,12 +10702,55 @@ namespace Nikse.SubtitleEdit.Forms
else if (e.KeyData == _shortcuts.MainTextBoxRecord) else if (e.KeyData == _shortcuts.MainTextBoxRecord)
{ {
e.SuppressKeyPress = true; e.SuppressKeyPress = true;
using (var form = new Dictate())
if (_dictateTextBox != null && _dictateForm != null)
{ {
if (form.ShowDialog(this) != DialogResult.OK) return; // already recording
}
if (!RequireFfmpegOk())
{
return;
}
var voskFolder = Path.Combine(Configuration.DataDirectory, "Vosk");
if (Configuration.IsRunningOnWindows && !HasCurrentVosk(voskFolder))
{
if (MessageBox.Show(string.Format(LanguageSettings.Current.Settings.DownloadX, "libvosk"), "Subtitle Edit", MessageBoxButtons.YesNoCancel) != DialogResult.Yes)
{ {
return; return;
} }
using (var form = new DownloadVosk())
{
if (form.ShowDialog(this) != DialogResult.OK)
{
return;
}
}
}
if (_subtitle?.GetParagraphOrDefault(_subtitleListViewIndex) == null || !(sender is TextBox || sender is AdvancedTextBox))
{
return;
}
if (_dictateForm == null || string.IsNullOrEmpty(Configuration.Settings.Tools.VoskModel))
{
_dictateForm?.Dispose();
_dictateForm = new Dictate();
if (_dictateForm.ShowDialog(this) != DialogResult.OK)
{
return;
}
_dictateTextBox = null;
}
else
{
_dictateTextBox = sender;
_dictateForm.Record();
ShowStatus("Recording...");
} }
} }
@ -12945,6 +12991,8 @@ namespace Nikse.SubtitleEdit.Forms
// ignore // ignore
} }
} }
_dictateForm?.Dispose();
} }
private void SaveListViewWidths() private void SaveListViewWidths()
@ -17281,7 +17329,7 @@ namespace Nikse.SubtitleEdit.Forms
{ {
SetPlayRateAndPlay(200, false); SetPlayRateAndPlay(200, false);
} }
else else
{ {
SetPlayRateAndPlay(100, false); SetPlayRateAndPlay(100, false);
} }
@ -29830,6 +29878,28 @@ namespace Nikse.SubtitleEdit.Forms
_mainAdjustStartDownEndUpAndGoToNextParagraph = null; _mainAdjustStartDownEndUpAndGoToNextParagraph = null;
} }
else if (_dictateTextBox != null && _dictateForm != null)
{
Application.DoEvents();
Cursor = Cursors.WaitCursor;
ShowStatus(LanguageSettings.Current.AudioToText.Transcribing);
var text = _dictateForm.RecordingToText();
Cursor = Cursors.Default;
if (!string.IsNullOrWhiteSpace(text))
{
if (_dictateTextBox is TextBox tb)
{
tb.Paste(text);
}
else if (_dictateTextBox is AdvancedTextBox atb)
{
atb.SelectedText = text;
}
}
ShowStatus(string.Empty);
_dictateTextBox = null;
}
} }
private void ToolStripMenuItemSurroundWithMusicSymbolsClick(object sender, EventArgs e) private void ToolStripMenuItemSurroundWithMusicSymbolsClick(object sender, EventArgs e)
@ -34306,7 +34376,7 @@ namespace Nikse.SubtitleEdit.Forms
private bool HasCurrentVosk(string voskFolder) private bool HasCurrentVosk(string voskFolder)
{ {
if (Configuration.IsRunningOnLinux) if (Configuration.IsRunningOnLinux || _hasCurrentVosk)
{ {
return true; return true;
} }
@ -34322,7 +34392,9 @@ namespace Nikse.SubtitleEdit.Forms
? "1cc13d8e2ffd3ad7ca76941c99e8ad00567d0b8135878c3a80fb938054cf98bde1f692647e6d19df7526c98aa5ad975d72dba20bf1759baedba5c753a14480bb" ? "1cc13d8e2ffd3ad7ca76941c99e8ad00567d0b8135878c3a80fb938054cf98bde1f692647e6d19df7526c98aa5ad975d72dba20bf1759baedba5c753a14480bb"
: "77479a934650b40968d54dcf71fce17237c59b62b6c64ad3d6b5433486b76b6202eb956e93597ba466c67aa0d553db7b2863e0aeb8856a6dd29a3aba3a14bf66"; : "77479a934650b40968d54dcf71fce17237c59b62b6c64ad3d6b5433486b76b6202eb956e93597ba466c67aa0d553db7b2863e0aeb8856a6dd29a3aba3a14bf66";
var hash = Utilities.GetSha512Hash(FileUtil.ReadAllBytesShared(voskDll)); var hash = Utilities.GetSha512Hash(FileUtil.ReadAllBytesShared(voskDll));
return currentVoskDllSha512Hash == hash;
_hasCurrentVosk = currentVoskDllSha512Hash == hash;
return _hasCurrentVosk;
} }
private void Main_MouseDown(object sender, MouseEventArgs e) private void Main_MouseDown(object sender, MouseEventArgs e)
@ -34359,7 +34431,7 @@ namespace Nikse.SubtitleEdit.Forms
{ {
Cursor = Cursors.WaitCursor; Cursor = Cursors.WaitCursor;
var timeCode = new TimeCode(mediaPlayer.CurrentPosition * 1000.0 + 1000).ToHHMMSS(); var timeCode = new TimeCode(mediaPlayer.CurrentPosition * 1000.0 + 1000).ToHHMMSS();
var colorMatrix = "bt601:bt709"; // ffmpeg bug with assa color? var colorMatrix = "bt601:bt709"; // ffmpeg bug with assa color?
var bmpFileName = VideoPreviewGenerator.GetScreenShot(_videoFileName, timeCode, colorMatrix); var bmpFileName = VideoPreviewGenerator.GetScreenShot(_videoFileName, timeCode, colorMatrix);
using (var bmp = new Bitmap(bmpFileName)) using (var bmp = new Bitmap(bmpFileName))
{ {

View File

@ -64,7 +64,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
checkBoxUsePostProcessing.Checked = Configuration.Settings.Tools.VoskPostProcessing; checkBoxUsePostProcessing.Checked = Configuration.Settings.Tools.VoskPostProcessing;
_voskFolder = Path.Combine(Configuration.DataDirectory, "Vosk"); _voskFolder = Path.Combine(Configuration.DataDirectory, "Vosk");
FillModels(string.Empty); FillModels(comboBoxModels, string.Empty);
textBoxLog.Visible = false; textBoxLog.Visible = false;
textBoxLog.Dock = DockStyle.Fill; textBoxLog.Dock = DockStyle.Fill;
@ -86,11 +86,12 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
labelFC.Text = string.Empty; labelFC.Text = string.Empty;
} }
private void FillModels(string lastDownloadedModel) public static void FillModels(ComboBox comboBoxModels, string lastDownloadedModel)
{ {
var voskFolder = Path.Combine(Configuration.DataDirectory, "Vosk");
var selectName = string.IsNullOrEmpty(lastDownloadedModel) ? Configuration.Settings.Tools.VoskModel : lastDownloadedModel; var selectName = string.IsNullOrEmpty(lastDownloadedModel) ? Configuration.Settings.Tools.VoskModel : lastDownloadedModel;
comboBoxModels.Items.Clear(); comboBoxModels.Items.Clear();
foreach (var directory in Directory.GetDirectories(_voskFolder)) foreach (var directory in Directory.GetDirectories(voskFolder))
{ {
var name = Path.GetFileName(directory); var name = Path.GetFileName(directory);
if (!File.Exists(Path.Combine(directory, "final.mdl")) && !File.Exists(Path.Combine(directory, "am", "final.mdl"))) if (!File.Exists(Path.Combine(directory, "final.mdl")) && !File.Exists(Path.Combine(directory, "am", "final.mdl")))
@ -338,7 +339,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
return list; return list;
} }
private static List<ResultText> ParseJsonToResult(string result) public static List<ResultText> ParseJsonToResult(string result)
{ {
var list = new List<ResultText>(); var list = new List<ResultText>();
var jsonParser = new SeJsonParser(); var jsonParser = new SeJsonParser();
@ -597,7 +598,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
using (var form = new AudioToTextModelDownload { AutoClose = true }) using (var form = new AudioToTextModelDownload { AutoClose = true })
{ {
form.ShowDialog(this); form.ShowDialog(this);
FillModels(form.LastDownloadedModel); FillModels(comboBoxModels, form.LastDownloadedModel);
} }
} }

View File

@ -49,7 +49,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
checkBoxUsePostProcessing.Checked = Configuration.Settings.Tools.VoskPostProcessing; checkBoxUsePostProcessing.Checked = Configuration.Settings.Tools.VoskPostProcessing;
_voskFolder = Path.Combine(Configuration.DataDirectory, "Vosk"); _voskFolder = Path.Combine(Configuration.DataDirectory, "Vosk");
FillModels(string.Empty); AudioToText.FillModels(comboBoxModels, string.Empty);
textBoxLog.Visible = false; textBoxLog.Visible = false;
textBoxLog.Dock = DockStyle.Fill; textBoxLog.Dock = DockStyle.Fill;
@ -64,31 +64,6 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
} }
} }
private void FillModels(string lastDownloadedModel)
{
var selectName = string.IsNullOrEmpty(lastDownloadedModel) ? Configuration.Settings.Tools.VoskModel : lastDownloadedModel;
comboBoxModels.Items.Clear();
foreach (var directory in Directory.GetDirectories(_voskFolder))
{
var name = Path.GetFileName(directory);
if (!File.Exists(Path.Combine(directory, "final.mdl")) && !File.Exists(Path.Combine(directory, "am", "final.mdl")))
{
continue;
}
comboBoxModels.Items.Add(name);
if (name == selectName)
{
comboBoxModels.SelectedIndex = comboBoxModels.Items.Count - 1;
}
}
if (comboBoxModels.SelectedIndex < 0 && comboBoxModels.Items.Count > 0)
{
comboBoxModels.SelectedIndex = 0;
}
}
private void ButtonGenerate_Click(object sender, EventArgs e) private void ButtonGenerate_Click(object sender, EventArgs e)
{ {
if (comboBoxModels.Items.Count == 0) if (comboBoxModels.Items.Count == 0)
@ -258,7 +233,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
if (rec.AcceptWaveform(buffer, bytesRead)) if (rec.AcceptWaveform(buffer, bytesRead))
{ {
var res = rec.Result(); var res = rec.Result();
var results = ParseJsonToResult(res); var results = AudioToText.ParseJsonToResult(res);
list.AddRange(results); list.AddRange(results);
} }
else else
@ -276,36 +251,12 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
} }
var finalResult = rec.FinalResult(); var finalResult = rec.FinalResult();
var finalResults = ParseJsonToResult(finalResult); var finalResults = AudioToText.ParseJsonToResult(finalResult);
list.AddRange(finalResults); list.AddRange(finalResults);
timer1.Stop(); timer1.Stop();
return list; return list;
} }
private static List<ResultText> ParseJsonToResult(string result)
{
var list = new List<ResultText>();
var jsonParser = new SeJsonParser();
var root = jsonParser.GetArrayElementsByName(result, "result");
foreach (var item in root)
{
var conf = jsonParser.GetFirstObject(item, "conf");
var start = jsonParser.GetFirstObject(item, "start");
var end = jsonParser.GetFirstObject(item, "end");
var word = jsonParser.GetFirstObject(item, "word");
if (!string.IsNullOrWhiteSpace(word) &&
decimal.TryParse(conf, NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out var confidence) &&
decimal.TryParse(start, NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out var startSeconds) &&
decimal.TryParse(end, NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out var endSeconds))
{
var rt = new ResultText { Confidence = confidence, Text = word, Start = startSeconds, End = endSeconds };
list.Add(rt);
}
}
return list;
}
private void buttonCancel_Click(object sender, EventArgs e) private void buttonCancel_Click(object sender, EventArgs e)
{ {
if (buttonGenerate.Enabled) if (buttonGenerate.Enabled)
@ -397,7 +348,7 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
using (var form = new AudioToTextModelDownload { AutoClose = true }) using (var form = new AudioToTextModelDownload { AutoClose = true })
{ {
form.ShowDialog(this); form.ShowDialog(this);
FillModels(form.LastDownloadedModel); AudioToText.FillModels(comboBoxModels, form.LastDownloadedModel);
} }
} }

View File

@ -30,13 +30,20 @@
{ {
this.buttonCancel = new System.Windows.Forms.Button(); this.buttonCancel = new System.Windows.Forms.Button();
this.buttonOK = new System.Windows.Forms.Button(); this.buttonOK = new System.Windows.Forms.Button();
this.groupBoxModels = new System.Windows.Forms.GroupBox();
this.buttonDownload = new System.Windows.Forms.Button();
this.linkLabelOpenModelsFolder = new System.Windows.Forms.LinkLabel();
this.labelModel = new System.Windows.Forms.Label();
this.comboBoxModels = new System.Windows.Forms.ComboBox();
this.checkBoxUsePostProcessing = new System.Windows.Forms.CheckBox();
this.groupBoxModels.SuspendLayout();
this.SuspendLayout(); this.SuspendLayout();
// //
// buttonCancel // buttonCancel
// //
this.buttonCancel.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); this.buttonCancel.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right)));
this.buttonCancel.ImeMode = System.Windows.Forms.ImeMode.NoControl; this.buttonCancel.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.buttonCancel.Location = new System.Drawing.Point(713, 415); this.buttonCancel.Location = new System.Drawing.Point(539, 137);
this.buttonCancel.Name = "buttonCancel"; this.buttonCancel.Name = "buttonCancel";
this.buttonCancel.Size = new System.Drawing.Size(75, 23); this.buttonCancel.Size = new System.Drawing.Size(75, 23);
this.buttonCancel.TabIndex = 6; this.buttonCancel.TabIndex = 6;
@ -48,7 +55,7 @@
// //
this.buttonOK.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); this.buttonOK.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right)));
this.buttonOK.ImeMode = System.Windows.Forms.ImeMode.NoControl; this.buttonOK.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.buttonOK.Location = new System.Drawing.Point(632, 415); this.buttonOK.Location = new System.Drawing.Point(458, 137);
this.buttonOK.Name = "buttonOK"; this.buttonOK.Name = "buttonOK";
this.buttonOK.Size = new System.Drawing.Size(75, 23); this.buttonOK.Size = new System.Drawing.Size(75, 23);
this.buttonOK.TabIndex = 5; this.buttonOK.TabIndex = 5;
@ -56,21 +63,95 @@
this.buttonOK.UseVisualStyleBackColor = true; this.buttonOK.UseVisualStyleBackColor = true;
this.buttonOK.Click += new System.EventHandler(this.buttonOK_Click); this.buttonOK.Click += new System.EventHandler(this.buttonOK_Click);
// //
// groupBoxModels
//
this.groupBoxModels.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.groupBoxModels.Controls.Add(this.buttonDownload);
this.groupBoxModels.Controls.Add(this.linkLabelOpenModelsFolder);
this.groupBoxModels.Controls.Add(this.labelModel);
this.groupBoxModels.Controls.Add(this.comboBoxModels);
this.groupBoxModels.Location = new System.Drawing.Point(12, 12);
this.groupBoxModels.Name = "groupBoxModels";
this.groupBoxModels.Size = new System.Drawing.Size(602, 82);
this.groupBoxModels.TabIndex = 7;
this.groupBoxModels.TabStop = false;
this.groupBoxModels.Text = "Models";
//
// buttonDownload
//
this.buttonDownload.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.buttonDownload.Location = new System.Drawing.Point(265, 43);
this.buttonDownload.Name = "buttonDownload";
this.buttonDownload.Size = new System.Drawing.Size(28, 23);
this.buttonDownload.TabIndex = 2;
this.buttonDownload.Text = "...";
this.buttonDownload.UseVisualStyleBackColor = true;
this.buttonDownload.Click += new System.EventHandler(this.buttonDownload_Click);
//
// linkLabelOpenModelsFolder
//
this.linkLabelOpenModelsFolder.AutoSize = true;
this.linkLabelOpenModelsFolder.Location = new System.Drawing.Point(301, 51);
this.linkLabelOpenModelsFolder.Name = "linkLabelOpenModelsFolder";
this.linkLabelOpenModelsFolder.Size = new System.Drawing.Size(98, 13);
this.linkLabelOpenModelsFolder.TabIndex = 3;
this.linkLabelOpenModelsFolder.TabStop = true;
this.linkLabelOpenModelsFolder.Text = "Open models folder";
this.linkLabelOpenModelsFolder.LinkClicked += new System.Windows.Forms.LinkLabelLinkClickedEventHandler(this.linkLabelOpenModelsFolder_LinkClicked);
//
// labelModel
//
this.labelModel.AutoSize = true;
this.labelModel.Location = new System.Drawing.Point(16, 29);
this.labelModel.Name = "labelModel";
this.labelModel.Size = new System.Drawing.Size(167, 13);
this.labelModel.TabIndex = 0;
this.labelModel.Text = "Choose speech recognition model";
//
// comboBoxModels
//
this.comboBoxModels.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxModels.FormattingEnabled = true;
this.comboBoxModels.Location = new System.Drawing.Point(19, 45);
this.comboBoxModels.Name = "comboBoxModels";
this.comboBoxModels.Size = new System.Drawing.Size(240, 21);
this.comboBoxModels.TabIndex = 1;
//
// checkBoxUsePostProcessing
//
this.checkBoxUsePostProcessing.AutoSize = true;
this.checkBoxUsePostProcessing.Location = new System.Drawing.Point(12, 102);
this.checkBoxUsePostProcessing.Name = "checkBoxUsePostProcessing";
this.checkBoxUsePostProcessing.Size = new System.Drawing.Size(312, 17);
this.checkBoxUsePostProcessing.TabIndex = 8;
this.checkBoxUsePostProcessing.Text = "Use post-processing (line merge, fix casing, and punctuation)";
this.checkBoxUsePostProcessing.UseVisualStyleBackColor = true;
//
// Dictate // Dictate
// //
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(800, 450); this.ClientSize = new System.Drawing.Size(626, 172);
this.Controls.Add(this.checkBoxUsePostProcessing);
this.Controls.Add(this.groupBoxModels);
this.Controls.Add(this.buttonCancel); this.Controls.Add(this.buttonCancel);
this.Controls.Add(this.buttonOK); this.Controls.Add(this.buttonOK);
this.FormBorderStyle = System.Windows.Forms.FormBorderStyle.FixedDialog;
this.KeyPreview = true; this.KeyPreview = true;
this.MaximizeBox = false;
this.MinimizeBox = false;
this.Name = "Dictate"; this.Name = "Dictate";
this.ShowIcon = false; this.ShowIcon = false;
this.ShowInTaskbar = false; this.ShowInTaskbar = false;
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent; this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
this.Text = "Dictate"; this.Text = "Dictate";
this.FormClosing += new System.Windows.Forms.FormClosingEventHandler(this.Dictate_FormClosing);
this.KeyDown += new System.Windows.Forms.KeyEventHandler(this.Dictate_KeyDown); this.KeyDown += new System.Windows.Forms.KeyEventHandler(this.Dictate_KeyDown);
this.groupBoxModels.ResumeLayout(false);
this.groupBoxModels.PerformLayout();
this.ResumeLayout(false); this.ResumeLayout(false);
this.PerformLayout();
} }
@ -78,5 +159,11 @@
private System.Windows.Forms.Button buttonCancel; private System.Windows.Forms.Button buttonCancel;
private System.Windows.Forms.Button buttonOK; private System.Windows.Forms.Button buttonOK;
private System.Windows.Forms.GroupBox groupBoxModels;
private System.Windows.Forms.Button buttonDownload;
private System.Windows.Forms.LinkLabel linkLabelOpenModelsFolder;
private System.Windows.Forms.Label labelModel;
private System.Windows.Forms.ComboBox comboBoxModels;
private System.Windows.Forms.CheckBox checkBoxUsePostProcessing;
} }
} }

View File

@ -1,19 +1,126 @@
using System; using NAudio.Wave;
using System.Windows.Forms; using Nikse.SubtitleEdit.Core.AudioToText;
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Logic; using Nikse.SubtitleEdit.Logic;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Windows.Forms;
using Vosk;
namespace Nikse.SubtitleEdit.Forms.SpeechRecognition namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
{ {
public partial class Dictate : Form public partial class Dictate : Form
{ {
private static WaveFileWriter _waveFile;
private static Model _model;
private WaveInEvent _waveSource;
public string WaveFileName { get; set; }
public static bool DataRecorded { get; set; }
public Dictate() public Dictate()
{ {
UiUtil.PreInitialize(this); UiUtil.PreInitialize(this);
InitializeComponent(); InitializeComponent();
UiUtil.FixFonts(this); UiUtil.FixFonts(this);
groupBoxModels.Text = LanguageSettings.Current.AudioToText.Models;
labelModel.Text = LanguageSettings.Current.AudioToText.ChooseModel;
linkLabelOpenModelsFolder.Text = LanguageSettings.Current.AudioToText.OpenModelsFolder;
checkBoxUsePostProcessing.Text = LanguageSettings.Current.AudioToText.UsePostProcessing;
buttonOK.Text = LanguageSettings.Current.General.Ok; buttonOK.Text = LanguageSettings.Current.General.Ok;
buttonCancel.Text = LanguageSettings.Current.General.Cancel; buttonCancel.Text = LanguageSettings.Current.General.Cancel;
UiUtil.FixLargeFonts(this, buttonOK); UiUtil.FixLargeFonts(this, buttonOK);
AudioToText.FillModels(comboBoxModels, string.Empty);
checkBoxUsePostProcessing.Checked = Configuration.Settings.Tools.VoskPostProcessing;
}
public void Record()
{
_waveSource = new WaveInEvent();
_waveSource.WaveFormat = new WaveFormat(16000, 1);
_waveSource.DataAvailable += WaveSourceDataAvailable;
WaveFileName = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid()}.wav");
_waveFile = new WaveFileWriter(WaveFileName, _waveSource.WaveFormat);
DataRecorded = false;
_waveSource.StartRecording();
}
public string RecordingToText()
{
_waveSource.StopRecording();
_waveSource.Dispose();
_waveFile.Close();
if (!DataRecorded || !File.Exists(WaveFileName))
{
return string.Empty;
}
var voskFolder = Path.Combine(Configuration.DataDirectory, "Vosk");
Directory.SetCurrentDirectory(voskFolder);
Vosk.Vosk.SetLogLevel(0);
if (_model == null)
{
var modelFileName = Path.Combine(voskFolder, comboBoxModels.Text);
_model = new Model(modelFileName);
}
var rec = new VoskRecognizer(_model, 16000.0f);
rec.SetMaxAlternatives(0);
rec.SetWords(true);
var list = new List<ResultText>();
var buffer = new byte[4096];
using (var source = File.OpenRead(WaveFileName))
{
int bytesRead;
while ((bytesRead = source.Read(buffer, 0, buffer.Length)) > 0)
{
if (rec.AcceptWaveform(buffer, bytesRead))
{
var res = rec.Result();
var results = AudioToText.ParseJsonToResult(res);
list.AddRange(results);
}
else
{
var res = rec.PartialResult();
}
}
}
var finalResult = rec.FinalResult();
var finalResults = AudioToText.ParseJsonToResult(finalResult);
list.AddRange(finalResults);
try
{
File.Delete(WaveFileName);
WaveFileName = null;
}
catch
{
// ignore
}
return ResultListToText(list);
}
private static string ResultListToText(List<ResultText> list)
{
var sb = new StringBuilder();
foreach (var resultText in list)
{
sb.Append(resultText.Text);
sb.Append(" ");
}
return sb.ToString().Trim();
}
private static void WaveSourceDataAvailable(object sender, WaveInEventArgs e)
{
_waveFile.Write(e.Buffer, 0, e.BytesRecorded);
DataRecorded = true;
} }
private void buttonOK_Click(object sender, EventArgs e) private void buttonOK_Click(object sender, EventArgs e)
@ -33,5 +140,26 @@ namespace Nikse.SubtitleEdit.Forms.SpeechRecognition
DialogResult = DialogResult.Cancel; DialogResult = DialogResult.Cancel;
} }
} }
private void buttonDownload_Click(object sender, EventArgs e)
{
using (var form = new AudioToTextModelDownload { AutoClose = true })
{
form.ShowDialog(this);
AudioToText.FillModels(comboBoxModels, form.LastDownloadedModel);
}
}
private void linkLabelOpenModelsFolder_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e)
{
var voskFolder = Path.Combine(Configuration.DataDirectory, "Vosk");
UiUtil.OpenFolder(voskFolder);
}
private void Dictate_FormClosing(object sender, FormClosingEventArgs e)
{
Configuration.Settings.Tools.VoskModel = comboBoxModels.Text;
Configuration.Settings.Tools.VoskPostProcessing = checkBoxUsePostProcessing.Checked;
}
} }
} }

View File

@ -2896,7 +2896,7 @@ can edit in same subtitle file (collaboration)",
MainTextBoxAutoBreak = "Auto break text", MainTextBoxAutoBreak = "Auto break text",
MainTextBoxAutoBreakFromPos = "Break at first space from cursor position", MainTextBoxAutoBreakFromPos = "Break at first space from cursor position",
MainTextBoxAutoBreakFromPosAndGoToNext = "Break at first space from cursor position and go to next", MainTextBoxAutoBreakFromPosAndGoToNext = "Break at first space from cursor position and go to next",
MainTextBoxDictate = "Dictate", MainTextBoxDictate = "Dictate (key down=start recording, key up=end recording)",
MainTextBoxUnbreak = "Unbreak text", MainTextBoxUnbreak = "Unbreak text",
MainTextBoxUnbreakNoSpace = "Unbreak without space (CJK)", MainTextBoxUnbreakNoSpace = "Unbreak without space (CJK)",
MainTextBoxAssaIntellisense = "Show ASSA tag helper", MainTextBoxAssaIntellisense = "Show ASSA tag helper",

View File

@ -7036,6 +7036,9 @@ namespace Nikse.SubtitleEdit.Logic
case "Settings/MainTextBoxAutoBreakFromPosAndGoToNext": case "Settings/MainTextBoxAutoBreakFromPosAndGoToNext":
language.Settings.MainTextBoxAutoBreakFromPosAndGoToNext = reader.Value; language.Settings.MainTextBoxAutoBreakFromPosAndGoToNext = reader.Value;
break; break;
case "Settings/MainTextBoxDictate":
language.Settings.MainTextBoxDictate = reader.Value;
break;
case "Settings/MainTextBoxUnbreak": case "Settings/MainTextBoxUnbreak":
language.Settings.MainTextBoxUnbreak = reader.Value; language.Settings.MainTextBoxUnbreak = reader.Value;
break; break;