Update PFW + fix issue in batch mode - thx Purfview :)

work on #8143
This commit is contained in:
Nikolaj Olsson 2024-04-07 16:00:07 +02:00
parent da152d9ecd
commit b600a1787c
8 changed files with 300 additions and 10 deletions

View File

@ -138,7 +138,7 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
private void button1_Click(object sender, EventArgs e)
{
comboBoxWhisperExtra.Text = $"--highlight_words true --sentence --max_line_width {Configuration.Settings.General.SubtitleLineMaximumLength} --max_line_count {Configuration.Settings.General.MaxNumberOfLines}";
comboBoxWhisperExtra.Text = $"--highlight_words true --max_line_width {Configuration.Settings.General.SubtitleLineMaximumLength} --max_line_count {Configuration.Settings.General.MaxNumberOfLines}";
}
private void buttonStandardAsia_Click(object sender, EventArgs e)

View File

@ -446,7 +446,7 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
progressBar1.Style = ProgressBarStyle.Blocks;
timer1.Start();
var transcript = TranscribeViaWhisper(waveFileName);
var transcript = TranscribeViaWhisper(waveFileName, _videoFileName);
timer1.Stop();
if (_cancel && (transcript == null || transcript.Paragraphs.Count == 0 || MessageBox.Show(LanguageSettings.Current.AudioToText.KeepPartialTranscription, Text, MessageBoxButtons.YesNoCancel) != DialogResult.Yes))
{
@ -564,7 +564,7 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
_outputText.Add(string.Empty);
progressBar1.Style = ProgressBarStyle.Blocks;
var transcript = TranscribeViaWhisper(waveFileName);
var transcript = TranscribeViaWhisper(waveFileName, videoFileName);
if (_cancel)
{
TaskbarList.SetProgressState(_parentForm.Handle, TaskbarButtonProgressFlags.NoProgress);
@ -719,7 +719,7 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
var fileName = Path.Combine(Utilities.GetPathAndFileNameWithoutExtension(videoFileName)) + format.Extension;
if (File.Exists(fileName))
{
fileName = $"{Path.Combine(Utilities.GetPathAndFileNameWithoutExtension(videoFileName))}.{Guid.NewGuid().ToString()}.{format.Extension}";
fileName = $"{Path.Combine(Utilities.GetPathAndFileNameWithoutExtension(videoFileName))}.{Guid.NewGuid().ToString()}{format.Extension}";
}
try
@ -761,7 +761,7 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
return language != null ? language.Code : "en";
}
public Subtitle TranscribeViaWhisper(string waveFileName)
public Subtitle TranscribeViaWhisper(string waveFileName, string videoFileName)
{
_showProgressPct = -1;
var model = comboBoxModels.Items[comboBoxModels.SelectedIndex] as WhisperModel;
@ -823,9 +823,9 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
var inputFile = waveFileName;
if (!_useCenterChannelOnly &&
comboBoxWhisperEngine.Text == WhisperChoice.PurfviewFasterWhisper &&
_audioTrackNumber == 0)
_audioTrackNumber <= 0)
{
inputFile = _videoFileName;
inputFile = videoFileName;
}
var process = GetWhisperProcess(inputFile, model.Name, _languageCode, checkBoxTranslateToEnglish.Checked, OutputHandler);
@ -895,7 +895,7 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
process.Dispose();
if (GetResultFromSrt(waveFileName, _videoFileName, out var resultTexts, _outputText, _filesToDelete))
if (GetResultFromSrt(waveFileName, videoFileName, out var resultTexts, _outputText, _filesToDelete))
{
var subtitle = new Subtitle();
subtitle.Paragraphs.AddRange(resultTexts.Select(p => new Paragraph(p.Text, (double)p.Start * 1000.0, (double)p.End * 1000.0)).ToList());

View File

@ -89,15 +89,17 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
};
private const string DownloadUrlPurfviewFasterWhisper = "https://github.com/Purfview/whisper-standalone-win/releases/download/faster-whisper/Whisper-Faster_r192.1_windows.zip";
private const string DownloadUrlPurfviewFasterWhisper = "https://github.com/Purfview/whisper-standalone-win/releases/download/faster-whisper/Whisper-Faster_r192.3_windows.zip";
private static readonly string[] Sha512HashesPurfviewFasterWhisper =
{
"94faae09146e33e0d55d9190d5b3ed79ea3bda0cf9e7f308dd47812827e4f0c24d2037718aa3c647881008eaaa04bbed033ea338aaa8333aae536bb2f2b83256", // r192.1
"7e289659ff5622cdf99144602729a714985a7f11ebdc988f3f0721fe433d72e7fb751afeece94029f602b304690ad34f8a35bf5980af5517b2718ba07c163279", // r192.3
};
private static readonly string[] OldSha512HashesPurfviewFasterWhisper =
{
"3b84e21134b7f24b81d7252fe895299d14087cccc1d8b1e7ae187c190b87b0bf7d84d3ce0a4a2fc5d2f9c436a33003663763fd6f62e7f87da1683df7fbd6d10b", // r192.2
"94faae09146e33e0d55d9190d5b3ed79ea3bda0cf9e7f308dd47812827e4f0c24d2037718aa3c647881008eaaa04bbed033ea338aaa8333aae536bb2f2b83256", // r192.1
"3dee9ece233be4e661bab7555a2b4e7d4c53d823bf2b4032bd75857554a14a04745c57112946e735dc5ab6f8ec832483444cb95a0921f18b5f736787dbbc515c", // r189.1
"e78616511a92b21cb8ac82e23cdbd06f5b9310751e5f3fa940b5c48743b69bad130aaf6d629ae07c5388326f117be8f181b125ed04aacd23f1a80d8891be889b", // r186.1
"a16e2b5460d7f4b0d45de3f0e07b231d58ad4c79d077ad6b9c84a4e2ced4bd1cd3a7d9f01689f1d847ec8ff59c8f81cb742fcf2b153291ed6f15ec8b27adb998", // r167.2
@ -396,6 +398,8 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
"628dee27ab3030798c42983d0f544668f54e7c8d1c7a433b322b9c07286eedd10666d9b1f89764a75301b334cea9c7ad8bfbfeee00a98113b4730ee5cafe8812",
"56faadc85291049b1ad912de8c20fd262288f315d881e517085a15213690f2b242d80aedb2a4c213a7aa26b6ec43d2d26fe3674354a31f816d0e4bca07d002bc",
"d53002d273287bfcfcd678d3d9f1faabbbca533ac3fa11867be0e7e365d386bf8fddf591cad41345006406cac663868dd7214d680f36906abe0f7d851d989fa2",
"0f463526879a83b938c315d8ca865db89945beb8ba9fd44e74319ba567affb0fcf223d1ee662bf8be280e736e54f44beec2f1e33aac9d537d7d7ae9ba155b049",
"78365ba55f66ac018aa8ca405bf11bcb93ce0bff44a528e9304be14f99dc4f84f08ce1679c9cf3d135dd56ade79881318833d69397ca55caa062c6214a0d4cff",
};
return oldHashes.Contains(hash);

View File

@ -19025,6 +19025,19 @@ namespace Nikse.SubtitleEdit.Forms
RunCustomSearch(Configuration.Settings.VideoControls.CustomSearchUrl5);
e.SuppressKeyPress = true;
}
else if (e.Modifiers == (Keys.Alt | Keys.Shift | Keys.Control) && e.KeyCode == Keys.T)
{
using (var form = new TextToSpeech(_subtitle, _videoFileName))
{
if (form.ShowDialog(this) != DialogResult.OK)
{
return;
}
}
e.SuppressKeyPress = true;
}
// put new entries above tabs

104
src/ui/Forms/TextToSpeech.Designer.cs generated Normal file
View File

@ -0,0 +1,104 @@
namespace Nikse.SubtitleEdit.Forms
{
partial class TextToSpeech
{
/// <summary>
/// Required designer variable.
/// </summary>
private System.ComponentModel.IContainer components = null;
/// <summary>
/// Clean up any resources being used.
/// </summary>
/// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param>
protected override void Dispose(bool disposing)
{
if (disposing && (components != null))
{
components.Dispose();
}
base.Dispose(disposing);
}
#region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
this.buttonOK = new System.Windows.Forms.Button();
this.labelDescription = new System.Windows.Forms.Label();
this.button1 = new System.Windows.Forms.Button();
this.textBox1 = new Nikse.SubtitleEdit.Controls.NikseTextBox();
this.SuspendLayout();
//
// buttonOK
//
this.buttonOK.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right)));
this.buttonOK.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.buttonOK.Location = new System.Drawing.Point(724, 407);
this.buttonOK.Name = "buttonOK";
this.buttonOK.Size = new System.Drawing.Size(75, 23);
this.buttonOK.TabIndex = 7;
this.buttonOK.Text = "&OK";
this.buttonOK.UseVisualStyleBackColor = true;
this.buttonOK.Click += new System.EventHandler(this.buttonOK_Click);
//
// labelDescription
//
this.labelDescription.AutoSize = true;
this.labelDescription.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.labelDescription.Location = new System.Drawing.Point(9, 33);
this.labelDescription.Name = "labelDescription";
this.labelDescription.Size = new System.Drawing.Size(76, 13);
this.labelDescription.TabIndex = 9;
this.labelDescription.Text = "Name of voice";
//
// button1
//
this.button1.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right)));
this.button1.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.button1.Location = new System.Drawing.Point(12, 75);
this.button1.Name = "button1";
this.button1.Size = new System.Drawing.Size(75, 23);
this.button1.TabIndex = 11;
this.button1.Text = "Go";
this.button1.UseVisualStyleBackColor = true;
this.button1.Click += new System.EventHandler(this.button1_Click);
//
// textBox1
//
this.textBox1.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.textBox1.FocusedColor = System.Drawing.Color.FromArgb(((int)(((byte)(0)))), ((int)(((byte)(120)))), ((int)(((byte)(215)))));
this.textBox1.Location = new System.Drawing.Point(12, 49);
this.textBox1.Name = "textBox1";
this.textBox1.Size = new System.Drawing.Size(229, 20);
this.textBox1.TabIndex = 10;
this.textBox1.Text = "Hello, how are you?";
//
// TextToSpeech
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(811, 442);
this.Controls.Add(this.button1);
this.Controls.Add(this.textBox1);
this.Controls.Add(this.labelDescription);
this.Controls.Add(this.buttonOK);
this.Name = "TextToSpeech";
this.Text = "TextToSpeach";
this.ResumeLayout(false);
this.PerformLayout();
}
#endregion
private System.Windows.Forms.Button buttonOK;
private Controls.NikseTextBox textBox1;
private System.Windows.Forms.Label labelDescription;
private System.Windows.Forms.Button button1;
}
}

View File

@ -0,0 +1,39 @@
using Nikse.SubtitleEdit.Logic;
using System;
using System.Speech.Synthesis;
using System.Windows.Forms;
namespace Nikse.SubtitleEdit.Forms
{
public partial class TextToSpeech : Form
{
public TextToSpeech(Core.Common.Subtitle subtitle, string videoFileName)
{
UiUtil.PreInitialize(this);
InitializeComponent();
UiUtil.FixFonts(this);
buttonOK.Text = LanguageSettings.Current.General.Ok;
UiUtil.FixLargeFonts(this, buttonOK);
}
private void button1_Click(object sender, EventArgs e)
{
var text = textBox1.Text;
using (SpeechSynthesizer synth = new SpeechSynthesizer())
{
synth.SetOutputToWaveFile(@"C:\data\Sample.wav");
synth.SelectVoiceByHints(VoiceGender.Male, VoiceAge.Adult);
var builder = new PromptBuilder();
builder.AppendText(text);
synth.Speak(builder);
}
}
private void buttonOK_Click(object sender, EventArgs e)
{
DialogResult = DialogResult.OK;
}
}
}

View File

@ -0,0 +1,120 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" use="required" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>

View File

@ -92,6 +92,7 @@
<Reference Include="System.Security.Principal.Windows, Version=5.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
<HintPath>..\..\packages\System.Security.Principal.Windows.5.0.0\lib\net461\System.Security.Principal.Windows.dll</HintPath>
</Reference>
<Reference Include="System.Speech" />
<Reference Include="System.Web.Services" />
<Reference Include="System.Windows.Forms" />
<Reference Include="System.Xml" />
@ -679,6 +680,12 @@
<Compile Include="Forms\SeJobs\SeJobExport.Designer.cs">
<DependentUpon>SeJobExport.cs</DependentUpon>
</Compile>
<Compile Include="Forms\TextToSpeech.cs">
<SubType>Form</SubType>
</Compile>
<Compile Include="Forms\TextToSpeech.Designer.cs">
<DependentUpon>TextToSpeech.cs</DependentUpon>
</Compile>
<Compile Include="Forms\TimedTextPropertiesImsc11.cs">
<SubType>Form</SubType>
</Compile>
@ -1944,6 +1951,9 @@
<EmbeddedResource Include="Forms\ShotChanges\AdjustTimingViaShotChanges.resx">
<DependentUpon>AdjustTimingViaShotChanges.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Forms\TextToSpeech.resx">
<DependentUpon>TextToSpeech.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Forms\TimedTextPropertiesImsc11.resx">
<DependentUpon>TimedTextPropertiesImsc11.cs</DependentUpon>
</EmbeddedResource>