This commit is contained in:
niksedk 2022-07-25 19:15:20 +02:00
parent 05c6b68fa6
commit d3d3210d51
8 changed files with 1059 additions and 9 deletions

View File

@ -12,7 +12,7 @@ namespace Nikse.SubtitleEdit.Forms
{
public class AudioClip
{
public string AudioFile { get; set; }
public string AudioFileName { get; set; }
public Paragraph Paragraph { get; set; }
}
@ -38,6 +38,7 @@ namespace Nikse.SubtitleEdit.Forms
progressBar1.Value = 0;
var targetFolder = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString());
Directory.CreateDirectory(targetFolder);
AudioClips = new List<AudioClip>();
while (index < _paragraphs.Count && _abort == false)
{
var item = _paragraphs[index];
@ -82,7 +83,7 @@ namespace Nikse.SubtitleEdit.Forms
AudioClips.Add(new AudioClip
{
Paragraph = item,
AudioFile = targetFile,
AudioFileName = targetFile,
});
UpdateStatus(LanguageSettings.Current.AddWaveformBatch.Done);

View File

@ -175,6 +175,7 @@
this.comboBoxModels.Name = "comboBoxModels";
this.comboBoxModels.Size = new System.Drawing.Size(240, 21);
this.comboBoxModels.TabIndex = 1;
this.comboBoxModels.SelectedIndexChanged += new System.EventHandler(this.comboBoxModels_SelectedIndexChanged);
//
// linkLabelVoskWebSite
//

View File

@ -27,6 +27,8 @@ namespace Nikse.SubtitleEdit.Forms
private long _bytesWavRead;
private readonly List<string> _filesToDelete;
private readonly Form _parentForm;
private Model _model;
public Subtitle TranscribedSubtitle { get; private set; }
public AudioToText(string videoFileName, Form parentForm)
@ -247,8 +249,12 @@ namespace Nikse.SubtitleEdit.Forms
Application.DoEvents();
Directory.SetCurrentDirectory(_voskFolder);
Vosk.Vosk.SetLogLevel(0);
var model = new Model(modelFileName);
var rec = new VoskRecognizer(model, 16000.0f);
if (_model == null)
{
_model = new Model(modelFileName);
}
var rec = new VoskRecognizer(_model, 16000.0f);
rec.SetMaxAlternatives(0);
rec.SetWords(true);
var list = new List<ResultText>();
@ -619,5 +625,10 @@ namespace Nikse.SubtitleEdit.Forms
ShowHideBatchMode();
listViewInputFiles.Columns[0].Width = -2;
}
private void comboBoxModels_SelectedIndexChanged(object sender, EventArgs e)
{
_model = null;
}
}
}

View File

@ -0,0 +1,305 @@
namespace Nikse.SubtitleEdit.Forms
{
sealed partial class AudioToTextSelectedLines
{
/// <summary>
/// Required designer variable.
/// </summary>
private System.ComponentModel.IContainer components = null;
/// <summary>
/// Clean up any resources being used.
/// </summary>
/// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param>
protected override void Dispose(bool disposing)
{
if (disposing && (components != null))
{
components.Dispose();
}
base.Dispose(disposing);
}
#region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
this.components = new System.ComponentModel.Container();
this.buttonCancel = new System.Windows.Forms.Button();
this.buttonGenerate = new System.Windows.Forms.Button();
this.progressBar1 = new System.Windows.Forms.ProgressBar();
this.labelProgress = new System.Windows.Forms.Label();
this.textBoxLog = new System.Windows.Forms.TextBox();
this.labelInfo = new System.Windows.Forms.Label();
this.groupBoxModels = new System.Windows.Forms.GroupBox();
this.buttonDownload = new System.Windows.Forms.Button();
this.linkLabelOpenModelsFolder = new System.Windows.Forms.LinkLabel();
this.labelModel = new System.Windows.Forms.Label();
this.comboBoxModels = new System.Windows.Forms.ComboBox();
this.linkLabelVoskWebSite = new System.Windows.Forms.LinkLabel();
this.labelTime = new System.Windows.Forms.Label();
this.timer1 = new System.Windows.Forms.Timer(this.components);
this.checkBoxUsePostProcessing = new System.Windows.Forms.CheckBox();
this.groupBoxInputFiles = new System.Windows.Forms.GroupBox();
this.listViewInputFiles = new System.Windows.Forms.ListView();
this.columnHeaderFileName = ((System.Windows.Forms.ColumnHeader)(new System.Windows.Forms.ColumnHeader()));
this.groupBoxModels.SuspendLayout();
this.groupBoxInputFiles.SuspendLayout();
this.SuspendLayout();
//
// buttonCancel
//
this.buttonCancel.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right)));
this.buttonCancel.DialogResult = System.Windows.Forms.DialogResult.Cancel;
this.buttonCancel.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.buttonCancel.Location = new System.Drawing.Point(622, 427);
this.buttonCancel.Name = "buttonCancel";
this.buttonCancel.Size = new System.Drawing.Size(75, 23);
this.buttonCancel.TabIndex = 8;
this.buttonCancel.Text = "C&ancel";
this.buttonCancel.UseVisualStyleBackColor = true;
this.buttonCancel.Click += new System.EventHandler(this.buttonCancel_Click);
//
// buttonGenerate
//
this.buttonGenerate.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right)));
this.buttonGenerate.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.buttonGenerate.Location = new System.Drawing.Point(373, 427);
this.buttonGenerate.Name = "buttonGenerate";
this.buttonGenerate.Size = new System.Drawing.Size(125, 23);
this.buttonGenerate.TabIndex = 6;
this.buttonGenerate.Text = "&Generate";
this.buttonGenerate.UseVisualStyleBackColor = true;
this.buttonGenerate.Click += new System.EventHandler(this.ButtonGenerate_Click);
//
// progressBar1
//
this.progressBar1.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.progressBar1.Location = new System.Drawing.Point(12, 427);
this.progressBar1.Name = "progressBar1";
this.progressBar1.Size = new System.Drawing.Size(355, 12);
this.progressBar1.TabIndex = 5;
this.progressBar1.Visible = false;
//
// labelProgress
//
this.labelProgress.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left)));
this.labelProgress.AutoSize = true;
this.labelProgress.Location = new System.Drawing.Point(12, 409);
this.labelProgress.Name = "labelProgress";
this.labelProgress.Size = new System.Drawing.Size(70, 13);
this.labelProgress.TabIndex = 4;
this.labelProgress.Text = "labelProgress";
//
// textBoxLog
//
this.textBoxLog.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.textBoxLog.Location = new System.Drawing.Point(529, 9);
this.textBoxLog.Multiline = true;
this.textBoxLog.Name = "textBoxLog";
this.textBoxLog.ScrollBars = System.Windows.Forms.ScrollBars.Both;
this.textBoxLog.Size = new System.Drawing.Size(168, 258);
this.textBoxLog.TabIndex = 0;
//
// labelInfo
//
this.labelInfo.AutoSize = true;
this.labelInfo.Location = new System.Drawing.Point(12, 9);
this.labelInfo.Name = "labelInfo";
this.labelInfo.Size = new System.Drawing.Size(288, 13);
this.labelInfo.TabIndex = 1;
this.labelInfo.Text = "Generate text from audio via Vosk/Kaldi speech recognition";
//
// groupBoxModels
//
this.groupBoxModels.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.groupBoxModels.Controls.Add(this.buttonDownload);
this.groupBoxModels.Controls.Add(this.linkLabelOpenModelsFolder);
this.groupBoxModels.Controls.Add(this.labelModel);
this.groupBoxModels.Controls.Add(this.comboBoxModels);
this.groupBoxModels.Location = new System.Drawing.Point(15, 66);
this.groupBoxModels.Name = "groupBoxModels";
this.groupBoxModels.Size = new System.Drawing.Size(682, 82);
this.groupBoxModels.TabIndex = 3;
this.groupBoxModels.TabStop = false;
this.groupBoxModels.Text = "Models";
//
// buttonDownload
//
this.buttonDownload.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.buttonDownload.Location = new System.Drawing.Point(265, 43);
this.buttonDownload.Name = "buttonDownload";
this.buttonDownload.Size = new System.Drawing.Size(28, 23);
this.buttonDownload.TabIndex = 2;
this.buttonDownload.Text = "...";
this.buttonDownload.UseVisualStyleBackColor = true;
this.buttonDownload.Click += new System.EventHandler(this.buttonDownload_Click);
//
// linkLabelOpenModelsFolder
//
this.linkLabelOpenModelsFolder.AutoSize = true;
this.linkLabelOpenModelsFolder.Location = new System.Drawing.Point(301, 51);
this.linkLabelOpenModelsFolder.Name = "linkLabelOpenModelsFolder";
this.linkLabelOpenModelsFolder.Size = new System.Drawing.Size(98, 13);
this.linkLabelOpenModelsFolder.TabIndex = 0;
this.linkLabelOpenModelsFolder.TabStop = true;
this.linkLabelOpenModelsFolder.Text = "Open models folder";
this.linkLabelOpenModelsFolder.LinkClicked += new System.Windows.Forms.LinkLabelLinkClickedEventHandler(this.linkLabelOpenModelFolder_LinkClicked);
//
// labelModel
//
this.labelModel.AutoSize = true;
this.labelModel.Location = new System.Drawing.Point(16, 29);
this.labelModel.Name = "labelModel";
this.labelModel.Size = new System.Drawing.Size(167, 13);
this.labelModel.TabIndex = 0;
this.labelModel.Text = "Choose speech recognition model";
//
// comboBoxModels
//
this.comboBoxModels.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxModels.FormattingEnabled = true;
this.comboBoxModels.Location = new System.Drawing.Point(19, 45);
this.comboBoxModels.Name = "comboBoxModels";
this.comboBoxModels.Size = new System.Drawing.Size(240, 21);
this.comboBoxModels.TabIndex = 1;
this.comboBoxModels.SelectedIndexChanged += new System.EventHandler(this.comboBoxModels_SelectedIndexChanged);
//
// linkLabelVoskWebSite
//
this.linkLabelVoskWebSite.AutoSize = true;
this.linkLabelVoskWebSite.Location = new System.Drawing.Point(12, 26);
this.linkLabelVoskWebSite.Name = "linkLabelVoskWebSite";
this.linkLabelVoskWebSite.Size = new System.Drawing.Size(70, 13);
this.linkLabelVoskWebSite.TabIndex = 2;
this.linkLabelVoskWebSite.TabStop = true;
this.linkLabelVoskWebSite.Text = "Vosk website";
this.linkLabelVoskWebSite.LinkClicked += new System.Windows.Forms.LinkLabelLinkClickedEventHandler(this.linkLabelVoskWebsite_LinkClicked);
//
// labelTime
//
this.labelTime.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left)));
this.labelTime.AutoSize = true;
this.labelTime.Location = new System.Drawing.Point(12, 442);
this.labelTime.Name = "labelTime";
this.labelTime.Size = new System.Drawing.Size(88, 13);
this.labelTime.TabIndex = 6;
this.labelTime.Text = "Remaining time...";
//
// timer1
//
this.timer1.Interval = 1000;
this.timer1.Tick += new System.EventHandler(this.timer1_Tick);
//
// checkBoxUsePostProcessing
//
this.checkBoxUsePostProcessing.AutoSize = true;
this.checkBoxUsePostProcessing.Location = new System.Drawing.Point(15, 162);
this.checkBoxUsePostProcessing.Name = "checkBoxUsePostProcessing";
this.checkBoxUsePostProcessing.Size = new System.Drawing.Size(312, 17);
this.checkBoxUsePostProcessing.TabIndex = 4;
this.checkBoxUsePostProcessing.Text = "Use post-processing (line merge, fix casing, and punctuation)";
this.checkBoxUsePostProcessing.UseVisualStyleBackColor = true;
//
// groupBoxInputFiles
//
this.groupBoxInputFiles.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.groupBoxInputFiles.Controls.Add(this.listViewInputFiles);
this.groupBoxInputFiles.Location = new System.Drawing.Point(15, 200);
this.groupBoxInputFiles.Name = "groupBoxInputFiles";
this.groupBoxInputFiles.Size = new System.Drawing.Size(682, 185);
this.groupBoxInputFiles.TabIndex = 5;
this.groupBoxInputFiles.TabStop = false;
this.groupBoxInputFiles.Text = "Input files";
//
// listViewInputFiles
//
this.listViewInputFiles.AllowDrop = true;
this.listViewInputFiles.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.listViewInputFiles.Columns.AddRange(new System.Windows.Forms.ColumnHeader[] {
this.columnHeaderFileName});
this.listViewInputFiles.FullRowSelect = true;
this.listViewInputFiles.HideSelection = false;
this.listViewInputFiles.Location = new System.Drawing.Point(6, 18);
this.listViewInputFiles.Name = "listViewInputFiles";
this.listViewInputFiles.Size = new System.Drawing.Size(670, 150);
this.listViewInputFiles.TabIndex = 0;
this.listViewInputFiles.UseCompatibleStateImageBehavior = false;
this.listViewInputFiles.View = System.Windows.Forms.View.Details;
//
// columnHeaderFileName
//
this.columnHeaderFileName.Text = "File name";
this.columnHeaderFileName.Width = 455;
//
// AudioToTextSelectedLines
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(709, 464);
this.Controls.Add(this.groupBoxInputFiles);
this.Controls.Add(this.checkBoxUsePostProcessing);
this.Controls.Add(this.labelTime);
this.Controls.Add(this.linkLabelVoskWebSite);
this.Controls.Add(this.groupBoxModels);
this.Controls.Add(this.labelInfo);
this.Controls.Add(this.labelProgress);
this.Controls.Add(this.progressBar1);
this.Controls.Add(this.buttonCancel);
this.Controls.Add(this.buttonGenerate);
this.Controls.Add(this.textBoxLog);
this.FormBorderStyle = System.Windows.Forms.FormBorderStyle.FixedDialog;
this.KeyPreview = true;
this.MaximizeBox = false;
this.MinimizeBox = false;
this.MinimumSize = new System.Drawing.Size(575, 250);
this.Name = "AudioToTextSelectedLines";
this.ShowIcon = false;
this.ShowInTaskbar = false;
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
this.Text = "Audio to text";
this.FormClosing += new System.Windows.Forms.FormClosingEventHandler(this.AudioToText_FormClosing);
this.Load += new System.EventHandler(this.AudioToText_Load);
this.KeyDown += new System.Windows.Forms.KeyEventHandler(this.AudioToText_KeyDown);
this.groupBoxModels.ResumeLayout(false);
this.groupBoxModels.PerformLayout();
this.groupBoxInputFiles.ResumeLayout(false);
this.ResumeLayout(false);
this.PerformLayout();
}
#endregion
private System.Windows.Forms.Button buttonCancel;
private System.Windows.Forms.Button buttonGenerate;
private System.Windows.Forms.ProgressBar progressBar1;
private System.Windows.Forms.Label labelProgress;
private System.Windows.Forms.TextBox textBoxLog;
private System.Windows.Forms.Label labelInfo;
private System.Windows.Forms.GroupBox groupBoxModels;
private System.Windows.Forms.LinkLabel linkLabelVoskWebSite;
private System.Windows.Forms.Label labelModel;
private System.Windows.Forms.ComboBox comboBoxModels;
private System.Windows.Forms.LinkLabel linkLabelOpenModelsFolder;
private System.Windows.Forms.Label labelTime;
private System.Windows.Forms.Timer timer1;
private System.Windows.Forms.CheckBox checkBoxUsePostProcessing;
private System.Windows.Forms.Button buttonDownload;
private System.Windows.Forms.GroupBox groupBoxInputFiles;
private System.Windows.Forms.ListView listViewInputFiles;
private System.Windows.Forms.ColumnHeader columnHeaderFileName;
}
}

View File

@ -0,0 +1,583 @@
using Nikse.SubtitleEdit.Core.AudioToText;
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Logic;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Drawing;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using Vosk;
namespace Nikse.SubtitleEdit.Forms
{
public sealed partial class AudioToTextSelectedLines : Form
{
private readonly string _voskFolder;
private bool _cancel;
private bool _batchMode;
private int _batchFileNumber;
private long _startTicks;
private long _bytesWavTotal;
private long _bytesWavRead;
private readonly List<string> _filesToDelete;
private readonly List<AudioClipsGet.AudioClip> _audioClips;
private readonly Form _parentForm;
private Model _model;
public Subtitle TranscribedSubtitle { get; private set; }
public AudioToTextSelectedLines(List<AudioClipsGet.AudioClip> audioClips, Form parentForm)
{
UiUtil.PreInitialize(this);
InitializeComponent();
UiUtil.FixFonts(this);
UiUtil.FixLargeFonts(this, buttonGenerate);
_parentForm = parentForm;
Text = LanguageSettings.Current.AudioToText.Title;
labelInfo.Text = LanguageSettings.Current.AudioToText.Info;
labelInfo.Text = LanguageSettings.Current.AudioToText.Info;
groupBoxModels.Text = LanguageSettings.Current.AudioToText.Models;
labelModel.Text = LanguageSettings.Current.AudioToText.ChooseModel;
linkLabelOpenModelsFolder.Text = LanguageSettings.Current.AudioToText.OpenModelsFolder;
checkBoxUsePostProcessing.Text = LanguageSettings.Current.AudioToText.UsePostProcessing;
buttonGenerate.Text = LanguageSettings.Current.Watermark.Generate;
buttonCancel.Text = LanguageSettings.Current.General.Cancel;
groupBoxInputFiles.Text = LanguageSettings.Current.BatchConvert.Input;
columnHeaderFileName.Text = LanguageSettings.Current.JoinSubtitles.FileName;
checkBoxUsePostProcessing.Checked = Configuration.Settings.Tools.VoskPostProcessing;
_voskFolder = Path.Combine(Configuration.DataDirectory, "Vosk");
FillModels();
textBoxLog.Visible = false;
textBoxLog.Dock = DockStyle.Fill;
labelProgress.Text = string.Empty;
labelTime.Text = string.Empty;
_filesToDelete = new List<string>();
_batchMode = true;
listViewInputFiles.Visible = true;
_audioClips = audioClips;
foreach (var audioClip in audioClips)
{
listViewInputFiles.Items.Add(audioClip.AudioFileName);
}
}
private void FillModels()
{
comboBoxModels.Items.Clear();
foreach (var directory in Directory.GetDirectories(_voskFolder))
{
var name = Path.GetFileName(directory);
if (!File.Exists(Path.Combine(directory, "final.mdl")) && !File.Exists(Path.Combine(directory, "am", "final.mdl")))
{
continue;
}
comboBoxModels.Items.Add(name);
if (name == Configuration.Settings.Tools.VoskModel)
{
comboBoxModels.SelectedIndex = comboBoxModels.Items.Count - 1;
}
}
if (comboBoxModels.SelectedIndex < 0 && comboBoxModels.Items.Count > 0)
{
comboBoxModels.SelectedIndex = 0;
}
}
private void ButtonGenerate_Click(object sender, EventArgs e)
{
if (comboBoxModels.Items.Count == 0)
{
buttonDownload_Click(null, null);
return;
}
if (listViewInputFiles.Items.Count == 0)
{
return;
}
GenerateBatch();
TaskbarList.SetProgressState(_parentForm.Handle, TaskbarButtonProgressFlags.NoProgress);
}
private void GenerateBatch()
{
groupBoxInputFiles.Enabled = false;
_batchFileNumber = 0;
textBoxLog.AppendText("Batch mode" + Environment.NewLine);
foreach (ListViewItem lvi in listViewInputFiles.Items)
{
_batchFileNumber++;
var videoFileName = lvi.Text;
listViewInputFiles.SelectedIndices.Clear();
lvi.Selected = true;
progressBar1.Maximum = 100;
progressBar1.Value = 0;
progressBar1.Visible = true;
var modelFileName = Path.Combine(_voskFolder, comboBoxModels.Text);
buttonGenerate.Enabled = false;
buttonDownload.Enabled = false;
var waveFileName = GenerateWavFile(videoFileName, 0);
textBoxLog.AppendText("Wav file name: " + waveFileName + Environment.NewLine);
progressBar1.Style = ProgressBarStyle.Blocks;
var transcript = TranscribeViaVosk(waveFileName, modelFileName);
if (_cancel)
{
TaskbarList.SetProgressState(_parentForm.Handle, TaskbarButtonProgressFlags.NoProgress);
if (!_batchMode)
{
DialogResult = DialogResult.Cancel;
}
groupBoxInputFiles.Enabled = true;
return;
}
var postProcessor = new AudioToTextPostProcessor(GetLanguage(comboBoxModels.Text))
{
ParagraphMaxChars = Configuration.Settings.General.SubtitleLineMaximumLength * 2,
};
TranscribedSubtitle = postProcessor.Generate(transcript, checkBoxUsePostProcessing.Checked);
SaveToSourceFolder(videoFileName, _batchFileNumber - 1);
TaskbarList.SetProgressValue(_parentForm.Handle, _batchFileNumber, listViewInputFiles.Items.Count);
}
progressBar1.Visible = false;
labelTime.Text = string.Empty;
DialogResult = DialogResult.OK;
}
private void SaveToSourceFolder(string videoFileName, int index)
{
var audioClip = _audioClips[index];
var sb = new StringBuilder();
foreach (var p in TranscribedSubtitle.Paragraphs)
{
sb.AppendLine(p.Text);
}
audioClip.Paragraph.Text = sb.ToString().Trim();
try
{
File.Delete(audioClip.AudioFileName);
}
catch
{
// ignore
}
}
internal static string GetLanguage(string text)
{
var languageCodeList = DownloadModel.VoskModels.Select(p => p.TwoLetterLanguageCode);
foreach (var languageCode in languageCodeList)
{
if (text.Contains("model-" + languageCode) || text.Contains("model-small-" + languageCode) || text.StartsWith(languageCode, StringComparison.OrdinalIgnoreCase))
{
return languageCode;
}
if (languageCode == "jp" && (text.Contains("model-ja") || text.Contains("model-small-ja")))
{
return languageCode;
}
}
return "en";
}
public List<ResultText> TranscribeViaVosk(string waveFileName, string modelFileName)
{
labelProgress.Text = LanguageSettings.Current.AudioToText.LoadingVoskModel;
labelProgress.Refresh();
Application.DoEvents();
Directory.SetCurrentDirectory(_voskFolder);
Vosk.Vosk.SetLogLevel(0);
if (_model == null)
{
_model = new Model(modelFileName);
}
var rec = new VoskRecognizer(_model, 16000.0f);
rec.SetMaxAlternatives(0);
rec.SetWords(true);
var list = new List<ResultText>();
labelProgress.Text = LanguageSettings.Current.AudioToText.Transcribing;
if (_batchMode)
{
labelProgress.Text = string.Format(LanguageSettings.Current.AudioToText.TranscribingXOfY, _batchFileNumber, listViewInputFiles.Items.Count);
}
else
{
TaskbarList.SetProgressValue(_parentForm.Handle, 1, 100);
}
labelProgress.Refresh();
Application.DoEvents();
var buffer = new byte[4096];
_bytesWavTotal = new FileInfo(waveFileName).Length;
_bytesWavRead = 0;
_startTicks = DateTime.UtcNow.Ticks;
timer1.Start();
using (var source = File.OpenRead(waveFileName))
{
int bytesRead;
while ((bytesRead = source.Read(buffer, 0, buffer.Length)) > 0)
{
_bytesWavRead += bytesRead;
progressBar1.Value = (int)(_bytesWavRead * 100.0 / _bytesWavTotal);
progressBar1.Refresh();
Application.DoEvents();
if (rec.AcceptWaveform(buffer, bytesRead))
{
var res = rec.Result();
var results = ParseJsonToResult(res);
list.AddRange(results);
}
else
{
var res = rec.PartialResult();
textBoxLog.AppendText(res.RemoveChar('\r', '\n'));
}
if (!_batchMode)
{
TaskbarList.SetProgressValue(_parentForm.Handle, Math.Max(1, progressBar1.Value), progressBar1.Maximum);
}
if (_cancel)
{
TaskbarList.SetProgressState(_parentForm.Handle, TaskbarButtonProgressFlags.NoProgress);
return null;
}
}
}
var finalResult = rec.FinalResult();
var finalResults = ParseJsonToResult(finalResult);
list.AddRange(finalResults);
timer1.Stop();
return list;
}
private static List<ResultText> ParseJsonToResult(string result)
{
var list = new List<ResultText>();
var jsonParser = new SeJsonParser();
var root = jsonParser.GetArrayElementsByName(result, "result");
foreach (var item in root)
{
var conf = jsonParser.GetFirstObject(item, "conf");
var start = jsonParser.GetFirstObject(item, "start");
var end = jsonParser.GetFirstObject(item, "end");
var word = jsonParser.GetFirstObject(item, "word");
if (!string.IsNullOrWhiteSpace(word) &&
decimal.TryParse(conf, NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out var confidence) &&
decimal.TryParse(start, NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out var startSeconds) &&
decimal.TryParse(end, NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out var endSeconds))
{
var rt = new ResultText { Confidence = confidence, Text = word, Start = startSeconds, End = endSeconds };
list.Add(rt);
}
}
return list;
}
private string GenerateWavFile(string videoFileName, int audioTrackNumber)
{
var outWaveFile = Path.Combine(Path.GetTempPath(), Guid.NewGuid() + ".wav");
_filesToDelete.Add(outWaveFile);
var process = GetFfmpegProcess(videoFileName, audioTrackNumber, outWaveFile);
process.Start();
progressBar1.Style = ProgressBarStyle.Marquee;
progressBar1.Visible = true;
double seconds = 0;
buttonCancel.Visible = true;
try
{
process.PriorityClass = ProcessPriorityClass.Normal;
}
catch
{
// ignored
}
_cancel = false;
string targetDriveLetter = null;
if (Configuration.IsRunningOnWindows)
{
var root = Path.GetPathRoot(outWaveFile);
if (root.Length > 1 && root[1] == ':')
{
targetDriveLetter = root.Remove(1);
}
}
while (!process.HasExited)
{
Application.DoEvents();
System.Threading.Thread.Sleep(100);
seconds += 0.1;
if (seconds < 60)
{
labelProgress.Text = string.Format(LanguageSettings.Current.AddWaveform.ExtractingSeconds, seconds);
}
else
{
labelProgress.Text = string.Format(LanguageSettings.Current.AddWaveform.ExtractingMinutes, (int)(seconds / 60), (int)(seconds % 60));
}
Refresh();
if (_cancel)
{
process.Kill();
progressBar1.Visible = false;
buttonCancel.Visible = false;
DialogResult = DialogResult.Cancel;
return null;
}
if (targetDriveLetter != null && seconds > 1 && Convert.ToInt32(seconds) % 10 == 0)
{
try
{
var drive = new DriveInfo(targetDriveLetter);
if (drive.IsReady)
{
if (drive.AvailableFreeSpace < 50 * 1000000) // 50 mb
{
labelInfo.ForeColor = Color.Red;
labelInfo.Text = LanguageSettings.Current.AddWaveform.LowDiskSpace;
}
}
}
catch
{
// ignored
}
}
}
return outWaveFile;
}
private static Process GetFfmpegProcess(string videoFileName, int audioTrackNumber, string outWaveFile)
{
if (!File.Exists(Configuration.Settings.General.FFmpegLocation) && Configuration.IsRunningOnWindows)
{
return null;
}
var audioParameter = string.Empty;
if (audioTrackNumber > 0)
{
audioParameter = $"-map 0:a:{audioTrackNumber}";
}
const string fFmpegWaveTranscodeSettings = "-i \"{0}\" -vn -ar 16000 -ac 1 -ab 128 -vol 448 -f wav {2} \"{1}\"";
//-i indicates the input
//-vn means no video output
//-ar 44100 indicates the sampling frequency.
//-ab indicates the bit rate (in this example 160kb/s)
//-vol 448 will boot volume... 256 is normal
//-ac 2 means 2 channels
// "-map 0:a:0" is the first audio stream, "-map 0:a:1" is the second audio stream
var exeFilePath = Configuration.Settings.General.FFmpegLocation;
if (!Configuration.IsRunningOnWindows)
{
exeFilePath = "ffmpeg";
}
var parameters = string.Format(fFmpegWaveTranscodeSettings, videoFileName, outWaveFile, audioParameter);
return new Process { StartInfo = new ProcessStartInfo(exeFilePath, parameters) { WindowStyle = ProcessWindowStyle.Hidden, CreateNoWindow = true } };
}
private void buttonCancel_Click(object sender, EventArgs e)
{
if (buttonGenerate.Enabled)
{
DialogResult = DialogResult.Cancel;
}
else
{
_cancel = true;
}
}
private void linkLabelVoskWebsite_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e)
{
UiUtil.OpenUrl("https://alphacephei.com/vosk/models");
}
private void AudioToText_FormClosing(object sender, FormClosingEventArgs e)
{
Configuration.Settings.Tools.VoskModel = comboBoxModels.Text;
Configuration.Settings.Tools.VoskPostProcessing = checkBoxUsePostProcessing.Checked;
foreach (var fileName in _filesToDelete)
{
try
{
if (File.Exists(fileName))
{
File.Delete(fileName);
}
}
catch
{
// ignore
}
}
}
private void AudioToText_KeyDown(object sender, KeyEventArgs e)
{
if (e.KeyCode == Keys.F2)
{
if (textBoxLog.Visible)
{
textBoxLog.Visible = true;
textBoxLog.BringToFront();
}
else
{
textBoxLog.Visible = false;
}
e.SuppressKeyPress = true;
}
else if (e.KeyCode == Keys.Escape && buttonGenerate.Enabled)
{
DialogResult = DialogResult.Cancel;
e.SuppressKeyPress = true;
}
else if (e.KeyData == UiUtil.HelpKeys)
{
linkLabelVoskWebsite_LinkClicked(null, null);
e.SuppressKeyPress = true;
}
}
private void linkLabelOpenModelFolder_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e)
{
UiUtil.OpenFolder(_voskFolder);
}
private void timer1_Tick(object sender, EventArgs e)
{
if (_bytesWavRead <= 0 || _bytesWavTotal <= 0)
{
return;
}
var durationMs = (DateTime.UtcNow.Ticks - _startTicks) / 10_000;
var msPerFrame = (float)durationMs / _bytesWavRead;
var estimatedTotalMs = msPerFrame * _bytesWavTotal;
var estimatedLeft = ToProgressTime(estimatedTotalMs - durationMs);
labelTime.Text = estimatedLeft;
}
public static string ToProgressTime(float estimatedTotalMs)
{
var timeCode = new TimeCode(estimatedTotalMs);
if (timeCode.TotalSeconds < 60)
{
return string.Format(LanguageSettings.Current.GenerateVideoWithBurnedInSubs.TimeRemainingSeconds, (int)Math.Round(timeCode.TotalSeconds));
}
if (timeCode.TotalSeconds / 60 > 5)
{
return string.Format(LanguageSettings.Current.GenerateVideoWithBurnedInSubs.TimeRemainingMinutes, (int)Math.Round(timeCode.TotalSeconds / 60));
}
return string.Format(LanguageSettings.Current.GenerateVideoWithBurnedInSubs.TimeRemainingMinutesAndSeconds, timeCode.Minutes + timeCode.Hours * 60, timeCode.Seconds);
}
private void buttonDownload_Click(object sender, EventArgs e)
{
using (var form = new AudioToTextModelDownload() { AutoClose = true })
{
form.ShowDialog(this);
FillModels();
}
}
private void buttonAddFile_Click(object sender, EventArgs e)
{
using (var openFileDialog1 = new OpenFileDialog())
{
openFileDialog1.Title = LanguageSettings.Current.General.OpenVideoFileTitle;
openFileDialog1.FileName = string.Empty;
openFileDialog1.Filter = UiUtil.GetVideoFileFilter(true);
openFileDialog1.Multiselect = true;
if (openFileDialog1.ShowDialog(this) != DialogResult.OK)
{
return;
}
foreach (var fileName in openFileDialog1.FileNames)
{
listViewInputFiles.Items.Add(fileName);
}
}
}
private void buttonRemoveFile_Click(object sender, EventArgs e)
{
for (var i = listViewInputFiles.SelectedIndices.Count - 1; i >= 0; i--)
{
listViewInputFiles.Items.RemoveAt(listViewInputFiles.SelectedIndices[i]);
}
}
private void buttonClear_Click(object sender, EventArgs e)
{
listViewInputFiles.Items.Clear();
}
private void buttonBatchMode_Click(object sender, EventArgs e)
{
_batchMode = !_batchMode;
ShowHideBatchMode();
}
private void ShowHideBatchMode()
{
if (_batchMode)
{
Height = checkBoxUsePostProcessing.Bottom + progressBar1.Height + buttonCancel.Height + 450;
listViewInputFiles.Visible = true;
}
else
{
Height = checkBoxUsePostProcessing.Bottom + progressBar1.Height + buttonCancel.Height + 70;
listViewInputFiles.Visible = false;
}
}
private void AudioToText_Load(object sender, EventArgs e)
{
ShowHideBatchMode();
listViewInputFiles.Columns[0].Width = -2;
}
private void comboBoxModels_SelectedIndexChanged(object sender, EventArgs e)
{
_model = null;
}
}
}

View File

@ -0,0 +1,123 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" use="required" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<metadata name="timer1.TrayLocation" type="System.Drawing.Point, System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
<value>17, 17</value>
</metadata>
</root>

View File

@ -8739,16 +8739,15 @@ namespace Nikse.SubtitleEdit.Forms
}
};
// harboe debug todo nixe fix nikolaj
var audio = new ToolStripMenuItem("Audio");
audio.Tag = "(REMOVE)";
if (SubtitleListview1.SelectedItems.Count > 0)
{
toolStripMenuItemSelectedLines.DropDownItems.Insert(0, audio);
var audioClip = new ToolStripMenuItem("Save audio as...");
var audioClip = new ToolStripMenuItem("Extract audio");
var audioToText = new ToolStripMenuItem(LanguageSettings.Current.Main.Menu.Video.VideoAudioToText);
audio.DropDownItems.Insert(0, audioClip);
// audio.DropDownItems.Insert(0, audioToText);
audio.DropDownItems.Insert(0, audioToText);
audioClip.Click += (senderNew, eNew) =>
{
if (!RequireFfmpegOk())
@ -8757,7 +8756,7 @@ namespace Nikse.SubtitleEdit.Forms
}
var audioClips = GetAudioClips();
UiUtil.OpenFolder(Path.GetDirectoryName(audioClips[0].AudioFile));
UiUtil.OpenFolder(Path.GetDirectoryName(audioClips[0].AudioFileName));
};
audioToText.Click += (senderNew, eNew) =>
{
@ -8767,6 +8766,24 @@ namespace Nikse.SubtitleEdit.Forms
}
var audioClips = GetAudioClips();
using (var form = new AudioToTextSelectedLines(audioClips, this))
{
if (form.ShowDialog(this) == DialogResult.OK)
{
SubtitleListview1.BeginUpdate();
foreach (var ac in audioClips)
{
var p = _subtitle.Paragraphs.FirstOrDefault(pa => pa.Id == ac.Paragraph.Id);
if (p != null)
{
p.Text = ac.Paragraph.Text;
var idx = _subtitle.Paragraphs.IndexOf(p);
SubtitleListview1.SetText(idx, p.Text);
}
}
SubtitleListview1.EndUpdate();
}
}
};
}
@ -9231,7 +9248,7 @@ namespace Nikse.SubtitleEdit.Forms
var selectedParagraphs = new List<Paragraph>();
foreach (var index in SubtitleListview1.GetSelectedIndices())
{
selectedParagraphs.Add(_subtitle.Paragraphs[index]);
selectedParagraphs.Add(new Paragraph(_subtitle.Paragraphs[index], false));
}
using (var form = new AudioClipsGet(selectedParagraphs, _videoFileName, _videoAudioTrackNumber))

View File

@ -186,6 +186,12 @@
<Compile Include="Forms\AudioClipsGet.Designer.cs">
<DependentUpon>AudioClipsGet.cs</DependentUpon>
</Compile>
<Compile Include="Forms\AudioToTextSelectedLines.cs">
<SubType>Form</SubType>
</Compile>
<Compile Include="Forms\AudioToTextSelectedLines.Designer.cs">
<DependentUpon>AudioToTextSelectedLines.cs</DependentUpon>
</Compile>
<Compile Include="Forms\AudioToText.cs">
<SubType>Form</SubType>
</Compile>
@ -1435,6 +1441,9 @@
<EmbeddedResource Include="Forms\AudioClipsGet.resx">
<DependentUpon>AudioClipsGet.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Forms\AudioToTextSelectedLines.resx">
<DependentUpon>AudioToTextSelectedLines.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Forms\AudioToText.resx">
<DependentUpon>AudioToText.cs</DependentUpon>
</EmbeddedResource>