Work on selected lines audio to text

This commit is contained in:
niksedk 2022-07-25 21:50:53 +02:00
parent d3d3210d51
commit 5aa8e0e6c7
7 changed files with 77 additions and 229 deletions

View File

@ -27,7 +27,7 @@ namespace Nikse.SubtitleEdit.Core.AudioToText
TwoLetterLanguageCode = twoLetterLanguageCode;
public Subtitle Generate(List<ResultText> resultTexts, bool usePostProcessing)
public Subtitle Generate(List<ResultText> resultTexts, bool usePostProcessing, bool addPeriods, bool mergeLines, bool fixCasing, bool fixShortDuration)
_resultTexts = resultTexts;
var subtitle = new Subtitle();
@ -44,12 +44,32 @@ namespace Nikse.SubtitleEdit.Core.AudioToText
return Generate(subtitle, usePostProcessing, true, true, true, true);
public Subtitle Generate(Subtitle subtitle, bool usePostProcessing, bool addPeriods, bool mergeLines, bool fixCasing, bool fixShortDuration)
if (usePostProcessing)
subtitle = AddPeriods(subtitle, TwoLetterLanguageCode);
subtitle = MergeShortLines(subtitle, TwoLetterLanguageCode);
subtitle = FixCasing(subtitle, TwoLetterLanguageCode);
subtitle = FixShortDuration(subtitle);
if (addPeriods)
subtitle = AddPeriods(subtitle, TwoLetterLanguageCode);
if (mergeLines)
subtitle = MergeShortLines(subtitle, TwoLetterLanguageCode);
if (fixCasing)
subtitle = FixCasing(subtitle, TwoLetterLanguageCode);
if (fixShortDuration)
subtitle = FixShortDuration(subtitle);

View File

@ -54,7 +54,7 @@ namespace Nikse.SubtitleEdit.Forms
var start = $"{item.StartTime.TotalSeconds:0.000}".Replace(",", ".");
var duration = $"{item.Duration.TotalSeconds:0.000}".Replace(",", ".");
var fFmpegWaveTranscodeSettings = "-ss " + start + " -t " + duration + " -i \"{0}\" -vn -ar 24000 -ac 2 -ab 128 -vol 448 -f wav {2} \"{1}\"";
var fFmpegWaveTranscodeSettings = "-ss " + start + " -t " + duration + " -i \"{0}\" -vn -ar 16000 -ac 1 -ab 128 -vol 448 -f wav {2} \"{1}\"";
//-ss = start time
//-t = duration
//-i indicates the input

View File

@ -148,7 +148,7 @@ namespace Nikse.SubtitleEdit.Forms
ParagraphMaxChars = Configuration.Settings.General.SubtitleLineMaximumLength * 2,
TranscribedSubtitle = postProcessor.Generate(transcript, checkBoxUsePostProcessing.Checked);
TranscribedSubtitle = postProcessor.Generate(transcript, checkBoxUsePostProcessing.Checked, true, true, true, true);
DialogResult = DialogResult.OK;
@ -190,7 +190,7 @@ namespace Nikse.SubtitleEdit.Forms
ParagraphMaxChars = Configuration.Settings.General.SubtitleLineMaximumLength * 2,
TranscribedSubtitle = postProcessor.Generate(transcript, checkBoxUsePostProcessing.Checked);
TranscribedSubtitle = postProcessor.Generate(transcript, checkBoxUsePostProcessing.Checked, true, true, true, true);
TaskbarList.SetProgressValue(_parentForm.Handle, _batchFileNumber, listViewInputFiles.Items.Count);

View File

@ -3,8 +3,6 @@ using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Logic;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Drawing;
using System.Globalization;
using System.IO;
using System.Linq;
@ -18,12 +16,10 @@ namespace Nikse.SubtitleEdit.Forms
private readonly string _voskFolder;
private bool _cancel;
private bool _batchMode;
private int _batchFileNumber;
private long _startTicks;
private long _bytesWavTotal;
private long _bytesWavRead;
private readonly List<string> _filesToDelete;
private readonly List<AudioClipsGet.AudioClip> _audioClips;
private readonly Form _parentForm;
private Model _model;
@ -59,10 +55,9 @@ namespace Nikse.SubtitleEdit.Forms
textBoxLog.Dock = DockStyle.Fill;
labelProgress.Text = string.Empty;
labelTime.Text = string.Empty;
_filesToDelete = new List<string>();
_batchMode = true;
listViewInputFiles.Visible = true;
_audioClips = audioClips;
progressBar1.Maximum = 100;
foreach (var audioClip in audioClips)
@ -109,56 +104,75 @@ namespace Nikse.SubtitleEdit.Forms
TaskbarList.SetProgressState(_parentForm.Handle, TaskbarButtonProgressFlags.NoProgress);
private void GenerateBatch()
groupBoxInputFiles.Enabled = false;
_batchFileNumber = 0;
textBoxLog.AppendText("Batch mode" + Environment.NewLine);
var postProcessor = new AudioToTextPostProcessor(GetLanguage(comboBoxModels.Text))
ParagraphMaxChars = Configuration.Settings.General.SubtitleLineMaximumLength * 2,
progressBar1.Visible = true;
foreach (ListViewItem lvi in listViewInputFiles.Items)
var videoFileName = lvi.Text;
lvi.Selected = true;
progressBar1.Maximum = 100;
progressBar1.Value = 0;
progressBar1.Visible = true;
var modelFileName = Path.Combine(_voskFolder, comboBoxModels.Text);
buttonGenerate.Enabled = false;
buttonDownload.Enabled = false;
var waveFileName = GenerateWavFile(videoFileName, 0);
var waveFileName = videoFileName;
textBoxLog.AppendText("Wav file name: " + waveFileName + Environment.NewLine);
progressBar1.Style = ProgressBarStyle.Blocks;
var transcript = TranscribeViaVosk(waveFileName, modelFileName);
if (_cancel)
TaskbarList.SetProgressState(_parentForm.Handle, TaskbarButtonProgressFlags.NoProgress);
if (!_batchMode)
DialogResult = DialogResult.Cancel;
groupBoxInputFiles.Enabled = true;
DialogResult = DialogResult.Cancel;
var postProcessor = new AudioToTextPostProcessor(GetLanguage(comboBoxModels.Text))
ParagraphMaxChars = Configuration.Settings.General.SubtitleLineMaximumLength * 2,
TranscribedSubtitle = postProcessor.Generate(transcript, checkBoxUsePostProcessing.Checked);
TranscribedSubtitle = postProcessor.Generate(transcript, checkBoxUsePostProcessing.Checked, false, true, false, false);
SaveToSourceFolder(videoFileName, _batchFileNumber - 1);
progressBar1.Value = (int)Math.Round(_batchFileNumber * 100.0 / _audioClips.Count, MidpointRounding.AwayFromZero);
SaveToAudioClip(_batchFileNumber - 1);
TaskbarList.SetProgressValue(_parentForm.Handle, _batchFileNumber, listViewInputFiles.Items.Count);
progressBar1.Visible = false;
progressBar1.Value = 100;
labelTime.Text = string.Empty;
DialogResult = DialogResult.OK;
private void SaveToSourceFolder(string videoFileName, int index)
private void PostFix(AudioToTextPostProcessor postProcessor)
var postSub = new Subtitle();
foreach (var audioClip in _audioClips)
var postSubFixed = postProcessor.Generate(postSub, checkBoxUsePostProcessing.Checked, true, false, true, false);
for (var index = 0; index < _audioClips.Count; index++)
var audioClip = _audioClips[index];
if (index < postSubFixed.Paragraphs.Count)
audioClip.Paragraph.Text = postSubFixed.Paragraphs[index].Text;
private void SaveToAudioClip(int index)
var audioClip = _audioClips[index];
@ -201,13 +215,13 @@ namespace Nikse.SubtitleEdit.Forms
public List<ResultText> TranscribeViaVosk(string waveFileName, string modelFileName)
labelProgress.Text = LanguageSettings.Current.AudioToText.LoadingVoskModel;
if (_model == null)
labelProgress.Text = LanguageSettings.Current.AudioToText.LoadingVoskModel;
_model = new Model(modelFileName);
var rec = new VoskRecognizer(_model, 16000.0f);
@ -215,15 +229,7 @@ namespace Nikse.SubtitleEdit.Forms
var list = new List<ResultText>();
labelProgress.Text = LanguageSettings.Current.AudioToText.Transcribing;
if (_batchMode)
labelProgress.Text = string.Format(LanguageSettings.Current.AudioToText.TranscribingXOfY, _batchFileNumber, listViewInputFiles.Items.Count);
TaskbarList.SetProgressValue(_parentForm.Handle, 1, 100);
labelProgress.Text = string.Format(LanguageSettings.Current.AudioToText.TranscribingXOfY, _batchFileNumber, listViewInputFiles.Items.Count);
var buffer = new byte[4096];
@ -237,9 +243,6 @@ namespace Nikse.SubtitleEdit.Forms
while ((bytesRead = source.Read(buffer, 0, buffer.Length)) > 0)
_bytesWavRead += bytesRead;
progressBar1.Value = (int)(_bytesWavRead * 100.0 / _bytesWavTotal);
if (rec.AcceptWaveform(buffer, bytesRead))
var res = rec.Result();
@ -252,11 +255,6 @@ namespace Nikse.SubtitleEdit.Forms
textBoxLog.AppendText(res.RemoveChar('\r', '\n'));
if (!_batchMode)
TaskbarList.SetProgressValue(_parentForm.Handle, Math.Max(1, progressBar1.Value), progressBar1.Maximum);
if (_cancel)
TaskbarList.SetProgressState(_parentForm.Handle, TaskbarButtonProgressFlags.NoProgress);
@ -296,116 +294,6 @@ namespace Nikse.SubtitleEdit.Forms
return list;
private string GenerateWavFile(string videoFileName, int audioTrackNumber)
var outWaveFile = Path.Combine(Path.GetTempPath(), Guid.NewGuid() + ".wav");
var process = GetFfmpegProcess(videoFileName, audioTrackNumber, outWaveFile);
progressBar1.Style = ProgressBarStyle.Marquee;
progressBar1.Visible = true;
double seconds = 0;
buttonCancel.Visible = true;
process.PriorityClass = ProcessPriorityClass.Normal;
// ignored
_cancel = false;
string targetDriveLetter = null;
if (Configuration.IsRunningOnWindows)
var root = Path.GetPathRoot(outWaveFile);
if (root.Length > 1 && root[1] == ':')
targetDriveLetter = root.Remove(1);
while (!process.HasExited)
seconds += 0.1;
if (seconds < 60)
labelProgress.Text = string.Format(LanguageSettings.Current.AddWaveform.ExtractingSeconds, seconds);
labelProgress.Text = string.Format(LanguageSettings.Current.AddWaveform.ExtractingMinutes, (int)(seconds / 60), (int)(seconds % 60));
if (_cancel)
progressBar1.Visible = false;
buttonCancel.Visible = false;
DialogResult = DialogResult.Cancel;
return null;
if (targetDriveLetter != null && seconds > 1 && Convert.ToInt32(seconds) % 10 == 0)
var drive = new DriveInfo(targetDriveLetter);
if (drive.IsReady)
if (drive.AvailableFreeSpace < 50 * 1000000) // 50 mb
labelInfo.ForeColor = Color.Red;
labelInfo.Text = LanguageSettings.Current.AddWaveform.LowDiskSpace;
// ignored
return outWaveFile;
private static Process GetFfmpegProcess(string videoFileName, int audioTrackNumber, string outWaveFile)
if (!File.Exists(Configuration.Settings.General.FFmpegLocation) && Configuration.IsRunningOnWindows)
return null;
var audioParameter = string.Empty;
if (audioTrackNumber > 0)
audioParameter = $"-map 0:a:{audioTrackNumber}";
const string fFmpegWaveTranscodeSettings = "-i \"{0}\" -vn -ar 16000 -ac 1 -ab 128 -vol 448 -f wav {2} \"{1}\"";
//-i indicates the input
//-vn means no video output
//-ar 44100 indicates the sampling frequency.
//-ab indicates the bit rate (in this example 160kb/s)
//-vol 448 will boot volume... 256 is normal
//-ac 2 means 2 channels
// "-map 0:a:0" is the first audio stream, "-map 0:a:1" is the second audio stream
var exeFilePath = Configuration.Settings.General.FFmpegLocation;
if (!Configuration.IsRunningOnWindows)
exeFilePath = "ffmpeg";
var parameters = string.Format(fFmpegWaveTranscodeSettings, videoFileName, outWaveFile, audioParameter);
return new Process { StartInfo = new ProcessStartInfo(exeFilePath, parameters) { WindowStyle = ProcessWindowStyle.Hidden, CreateNoWindow = true } };
private void buttonCancel_Click(object sender, EventArgs e)
if (buttonGenerate.Enabled)
@ -427,21 +315,6 @@ namespace Nikse.SubtitleEdit.Forms
Configuration.Settings.Tools.VoskModel = comboBoxModels.Text;
Configuration.Settings.Tools.VoskPostProcessing = checkBoxUsePostProcessing.Checked;
foreach (var fileName in _filesToDelete)
if (File.Exists(fileName))
// ignore
private void AudioToText_KeyDown(object sender, KeyEventArgs e)
@ -516,57 +389,10 @@ namespace Nikse.SubtitleEdit.Forms
private void buttonAddFile_Click(object sender, EventArgs e)
using (var openFileDialog1 = new OpenFileDialog())
openFileDialog1.Title = LanguageSettings.Current.General.OpenVideoFileTitle;
openFileDialog1.FileName = string.Empty;
openFileDialog1.Filter = UiUtil.GetVideoFileFilter(true);
openFileDialog1.Multiselect = true;
if (openFileDialog1.ShowDialog(this) != DialogResult.OK)
foreach (var fileName in openFileDialog1.FileNames)
private void buttonRemoveFile_Click(object sender, EventArgs e)
for (var i = listViewInputFiles.SelectedIndices.Count - 1; i >= 0; i--)
private void buttonClear_Click(object sender, EventArgs e)
private void buttonBatchMode_Click(object sender, EventArgs e)
_batchMode = !_batchMode;
private void ShowHideBatchMode()
if (_batchMode)
Height = checkBoxUsePostProcessing.Bottom + progressBar1.Height + buttonCancel.Height + 450;
listViewInputFiles.Visible = true;
Height = checkBoxUsePostProcessing.Bottom + progressBar1.Height + buttonCancel.Height + 70;
listViewInputFiles.Visible = false;
Height = checkBoxUsePostProcessing.Bottom + progressBar1.Height + buttonCancel.Height + 450;
listViewInputFiles.Visible = true;
private void AudioToText_Load(object sender, EventArgs e)

View File

@ -8744,7 +8744,7 @@ namespace Nikse.SubtitleEdit.Forms
if (SubtitleListview1.SelectedItems.Count > 0)
toolStripMenuItemSelectedLines.DropDownItems.Insert(0, audio);
var audioClip = new ToolStripMenuItem("Extract audio");
var audioClip = new ToolStripMenuItem(LanguageSettings.Current.Main.Menu.ContextMenu.ExtractAudio);
var audioToText = new ToolStripMenuItem(LanguageSettings.Current.Main.Menu.Video.VideoAudioToText);
audio.DropDownItems.Insert(0, audioClip);
audio.DropDownItems.Insert(0, audioToText);

View File

@ -1951,7 +1951,8 @@ namespace Nikse.SubtitleEdit.Logic
EditBookmark = "Edit bookmark...",
RemoveBookmark = "Remove bookmark",
GoToSourceView = "Go to source view",
GoToListView = "Go to list view"
GoToListView = "Go to list view",
ExtractAudio = "Extract audio...",

View File

@ -1797,6 +1797,7 @@
public string RemoveBookmark { get; set; }
public string GoToSourceView { get; set; }
public string GoToListView { get; set; }
public string ExtractAudio { get; set; }
public FileMenu File { get; set; }