From 1db474deb07e3235bec4e0502716768ead4e1625 Mon Sep 17 00:00:00 2001 From: niksedk Date: Fri, 12 Apr 2024 07:04:53 +0200 Subject: [PATCH] Work on tts --- src/libse/TextToSpeech/PiperModels.cs | 2 +- src/ui/Forms/Tts/TextToSpeech.Designer.cs | 64 ++++++++++++++--------- src/ui/Forms/Tts/TextToSpeech.cs | 54 +++++++++++++++---- 3 files changed, 82 insertions(+), 38 deletions(-) diff --git a/src/libse/TextToSpeech/PiperModels.cs b/src/libse/TextToSpeech/PiperModels.cs index f1a0eb688..abbc16278 100644 --- a/src/libse/TextToSpeech/PiperModels.cs +++ b/src/libse/TextToSpeech/PiperModels.cs @@ -52,7 +52,7 @@ namespace Nikse.SubtitleEdit.Core.TextToSpeech new PiperModels("joe", "English US", "medium", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/joe/medium/en_US-joe-medium.onnx", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/joe/medium/en_US-joe-medium.onnx.json"), new PiperModels("kristin", "English US", "medium", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/kristin/medium/en_US-kristin-medium.onnx", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/kristin/medium/en_US-kristin-medium.onnx.json"), new PiperModels("kusal", "English US", "medium", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/kusal/medium/en_US-kusal-medium.onnx", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/kusal/medium/en_US-kusal-medium.onnx.json"), - new PiperModels("l2arctic", "English US", "medium", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/l2arctic/medium/en_US-l2arctic-medium.onnx", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/l2arctic/medium/en_US-l2arctic-medium.onnx.json?download=true.json"), + new PiperModels("l2arctic", "English US", "medium", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/l2arctic/medium/en_US-l2arctic-medium.onnx", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/l2arctic/medium/en_US-l2arctic-medium.onnx.json"), new PiperModels("lessac", "English US", "high", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/lessac/high/en_US-lessac-high.onnx", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/lessac/high/en_US-lessac-high.onnx.json"), new PiperModels("libritts", "English US", "high", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/libritts/high/en_US-libritts-high.onnx", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/libritts/high/en_US-libritts-high.onnx.json"), new PiperModels("ljspeech", "English US", "high", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/ljspeech/high/en_US-ljspeech-high.onnx", "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/ljspeech/high/en_US-ljspeech-high.onnx.json"), diff --git a/src/ui/Forms/Tts/TextToSpeech.Designer.cs b/src/ui/Forms/Tts/TextToSpeech.Designer.cs index 234fe175a..3814b73f6 100644 --- a/src/ui/Forms/Tts/TextToSpeech.Designer.cs +++ b/src/ui/Forms/Tts/TextToSpeech.Designer.cs @@ -37,13 +37,14 @@ this.groupBoxMsSettings = new System.Windows.Forms.GroupBox(); this.buttonTestVoice = new System.Windows.Forms.Button(); this.labelMsVoice = new System.Windows.Forms.Label(); - this.nikseComboBoxVoice = new Nikse.SubtitleEdit.Controls.NikseComboBox(); this.checkBoxAddToVideoFile = new System.Windows.Forms.CheckBox(); this.listView1 = new System.Windows.Forms.ListView(); this.columnHeader1 = ((System.Windows.Forms.ColumnHeader)(new System.Windows.Forms.ColumnHeader())); this.columnHeader2 = ((System.Windows.Forms.ColumnHeader)(new System.Windows.Forms.ColumnHeader())); this.contextMenuStripActors = new System.Windows.Forms.ContextMenuStrip(this.components); + this.nikseComboBoxVoice = new Nikse.SubtitleEdit.Controls.NikseComboBox(); this.nikseComboBoxEngine = new Nikse.SubtitleEdit.Controls.NikseComboBox(); + this.TextBoxTest = new Nikse.SubtitleEdit.Controls.NikseTextBox(); this.groupBoxMsSettings.SuspendLayout(); this.SuspendLayout(); // @@ -106,6 +107,7 @@ // this.groupBoxMsSettings.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) | System.Windows.Forms.AnchorStyles.Right))); + this.groupBoxMsSettings.Controls.Add(this.TextBoxTest); this.groupBoxMsSettings.Controls.Add(this.buttonTestVoice); this.groupBoxMsSettings.Controls.Add(this.labelMsVoice); this.groupBoxMsSettings.Controls.Add(this.nikseComboBoxVoice); @@ -118,7 +120,7 @@ // // buttonTestVoice // - this.buttonTestVoice.Location = new System.Drawing.Point(17, 80); + this.buttonTestVoice.Location = new System.Drawing.Point(17, 92); this.buttonTestVoice.Name = "buttonTestVoice"; this.buttonTestVoice.Size = new System.Drawing.Size(150, 23); this.buttonTestVoice.TabIndex = 17; @@ -137,30 +139,6 @@ this.labelMsVoice.TabIndex = 16; this.labelMsVoice.Text = "Voice"; // - // nikseComboBoxVoice - // - this.nikseComboBoxVoice.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Right))); - this.nikseComboBoxVoice.BackColor = System.Drawing.SystemColors.Window; - this.nikseComboBoxVoice.BackColorDisabled = System.Drawing.Color.FromArgb(((int)(((byte)(240)))), ((int)(((byte)(240)))), ((int)(((byte)(240))))); - this.nikseComboBoxVoice.BorderColor = System.Drawing.Color.FromArgb(((int)(((byte)(171)))), ((int)(((byte)(173)))), ((int)(((byte)(179))))); - this.nikseComboBoxVoice.BorderColorDisabled = System.Drawing.Color.FromArgb(((int)(((byte)(120)))), ((int)(((byte)(120)))), ((int)(((byte)(120))))); - this.nikseComboBoxVoice.ButtonForeColor = System.Drawing.SystemColors.ControlText; - this.nikseComboBoxVoice.ButtonForeColorDown = System.Drawing.Color.Orange; - this.nikseComboBoxVoice.ButtonForeColorOver = System.Drawing.Color.FromArgb(((int)(((byte)(0)))), ((int)(((byte)(120)))), ((int)(((byte)(215))))); - this.nikseComboBoxVoice.DropDownHeight = 400; - this.nikseComboBoxVoice.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; - this.nikseComboBoxVoice.DropDownWidth = 0; - this.nikseComboBoxVoice.FormattingEnabled = false; - this.nikseComboBoxVoice.Location = new System.Drawing.Point(17, 41); - this.nikseComboBoxVoice.MaxLength = 32767; - this.nikseComboBoxVoice.Name = "nikseComboBoxVoice"; - this.nikseComboBoxVoice.SelectedIndex = -1; - this.nikseComboBoxVoice.SelectedItem = null; - this.nikseComboBoxVoice.SelectedText = ""; - this.nikseComboBoxVoice.Size = new System.Drawing.Size(351, 23); - this.nikseComboBoxVoice.TabIndex = 15; - this.nikseComboBoxVoice.UsePopupWindow = false; - // // checkBoxAddToVideoFile // this.checkBoxAddToVideoFile.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); @@ -208,6 +186,30 @@ this.contextMenuStripActors.Name = "contextMenuStripActors"; this.contextMenuStripActors.Size = new System.Drawing.Size(61, 4); // + // nikseComboBoxVoice + // + this.nikseComboBoxVoice.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Right))); + this.nikseComboBoxVoice.BackColor = System.Drawing.SystemColors.Window; + this.nikseComboBoxVoice.BackColorDisabled = System.Drawing.Color.FromArgb(((int)(((byte)(240)))), ((int)(((byte)(240)))), ((int)(((byte)(240))))); + this.nikseComboBoxVoice.BorderColor = System.Drawing.Color.FromArgb(((int)(((byte)(171)))), ((int)(((byte)(173)))), ((int)(((byte)(179))))); + this.nikseComboBoxVoice.BorderColorDisabled = System.Drawing.Color.FromArgb(((int)(((byte)(120)))), ((int)(((byte)(120)))), ((int)(((byte)(120))))); + this.nikseComboBoxVoice.ButtonForeColor = System.Drawing.SystemColors.ControlText; + this.nikseComboBoxVoice.ButtonForeColorDown = System.Drawing.Color.Orange; + this.nikseComboBoxVoice.ButtonForeColorOver = System.Drawing.Color.FromArgb(((int)(((byte)(0)))), ((int)(((byte)(120)))), ((int)(((byte)(215))))); + this.nikseComboBoxVoice.DropDownHeight = 400; + this.nikseComboBoxVoice.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.nikseComboBoxVoice.DropDownWidth = 0; + this.nikseComboBoxVoice.FormattingEnabled = false; + this.nikseComboBoxVoice.Location = new System.Drawing.Point(17, 41); + this.nikseComboBoxVoice.MaxLength = 32767; + this.nikseComboBoxVoice.Name = "nikseComboBoxVoice"; + this.nikseComboBoxVoice.SelectedIndex = -1; + this.nikseComboBoxVoice.SelectedItem = null; + this.nikseComboBoxVoice.SelectedText = ""; + this.nikseComboBoxVoice.Size = new System.Drawing.Size(351, 23); + this.nikseComboBoxVoice.TabIndex = 15; + this.nikseComboBoxVoice.UsePopupWindow = false; + // // nikseComboBoxEngine // this.nikseComboBoxEngine.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Right))); @@ -235,6 +237,15 @@ this.nikseComboBoxEngine.UsePopupWindow = false; this.nikseComboBoxEngine.SelectedIndexChanged += new System.EventHandler(this.nikseComboBoxEngine_SelectedIndexChanged); // + // TextBoxTest + // + this.TextBoxTest.FocusedColor = System.Drawing.Color.FromArgb(((int)(((byte)(0)))), ((int)(((byte)(120)))), ((int)(((byte)(215))))); + this.TextBoxTest.Location = new System.Drawing.Point(17, 121); + this.TextBoxTest.Name = "TextBoxTest"; + this.TextBoxTest.Size = new System.Drawing.Size(351, 20); + this.TextBoxTest.TabIndex = 18; + this.TextBoxTest.Text = "Hello, how are you?"; + // // TextToSpeech // this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); @@ -282,5 +293,6 @@ private System.Windows.Forms.ColumnHeader columnHeader2; private System.Windows.Forms.ContextMenuStrip contextMenuStripActors; private System.Windows.Forms.Button buttonTestVoice; + private Controls.NikseTextBox TextBoxTest; } } \ No newline at end of file diff --git a/src/ui/Forms/Tts/TextToSpeech.cs b/src/ui/Forms/Tts/TextToSpeech.cs index 19ddc9dba..ef5fb056c 100644 --- a/src/ui/Forms/Tts/TextToSpeech.cs +++ b/src/ui/Forms/Tts/TextToSpeech.cs @@ -95,7 +95,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts _waveFolder = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); Directory.CreateDirectory(_waveFolder); - GenerateParagraphAudio(_subtitle); + GenerateParagraphAudio(_subtitle, true); var fileNames = FixParagraphAudioSpeed(); @@ -115,23 +115,23 @@ namespace Nikse.SubtitleEdit.Forms.Tts UiUtil.OpenFolder(_waveFolder); } - private void GenerateParagraphAudio(Subtitle subtitle) + private void GenerateParagraphAudio(Subtitle subtitle, bool showProgressBar) { if (nikseComboBoxEngine.SelectedIndex == 0) { - GenerateParagraphAudioMs(subtitle, true); + GenerateParagraphAudioMs(subtitle, showProgressBar); } else if (nikseComboBoxEngine.SelectedIndex == 1) { - GenerateParagraphAudioPiperTts(subtitle, true); + GenerateParagraphAudioPiperTts(subtitle, showProgressBar); } else if (nikseComboBoxEngine.SelectedIndex == 2) { - GenerateParagraphAudioTortoiseTts(subtitle, true); + GenerateParagraphAudioTortoiseTts(subtitle, showProgressBar); } else if (nikseComboBoxEngine.SelectedIndex == 3) { - GenerateParagraphAudioMimic3(subtitle, true); + GenerateParagraphAudioMimic3(subtitle, showProgressBar); } } @@ -182,6 +182,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts var next = _subtitle.GetParagraphOrDefault(index + 1); var pFileName = Path.Combine(_waveFolder, index + ".wav"); + //TODO: analyse audio and remove silence at start and end (ffmpeg -af silenceremove=1:0:-50dB:1:1:-50dB) var outputFileName1 = Path.Combine(_waveFolder, index + "_u.wav"); var trimProcess = VideoPreviewGenerator.TrimSilenceStartAndEnd(pFileName, outputFileName1); trimProcess.Start(); @@ -368,6 +369,17 @@ namespace Nikse.SubtitleEdit.Forms.Tts var p = subtitle.Paragraphs[index]; var outputFileName = Path.Combine(_waveFolder, index + ".wav"); + if (File.Exists(outputFileName)) + { + try + { + File.Delete(outputFileName); + } + catch + { + // ignore + } + } var voice = voices.First(x => x.ToString() == nikseComboBoxVoice.Text); if (_actorAndVoices.Count > 0 && !string.IsNullOrEmpty(p.Actor)) @@ -645,22 +657,42 @@ namespace Nikse.SubtitleEdit.Forms.Tts { try { + if (string.IsNullOrWhiteSpace(TextBoxTest.Text)) + { + return; + } + + buttonTestVoice.Enabled = false; _waveFolder = Path.GetTempPath(); - var text = "Hello there, how are you?"; + var text = TextBoxTest.Text; var sub = new Subtitle(); sub.Paragraphs.Add(new Paragraph(text, 0, 2500)); - GenerateParagraphAudio(sub); + GenerateParagraphAudio(sub, false); var waveFileName = Path.Combine(_waveFolder, "0.wav"); using (var soundPlayer = new System.Media.SoundPlayer(waveFileName)) { soundPlayer.Play(); } - File.Delete(waveFileName); + TaskDelayHelper.RunDelayed(TimeSpan.FromSeconds(30), () => + { + try + { + File.Delete(waveFileName); + } + catch + { + // ignore + } + }); } - catch + catch (Exception ex) { - // ignore + MessageBox.Show(ex.Message + Environment.NewLine + ex.StackTrace); + } + finally + { + buttonTestVoice.Enabled = true; } } }