Work on tts

This commit is contained in:
Nikolaj Olsson 2024-04-15 19:50:29 +02:00
parent 122aa406f5
commit d42a974457
4 changed files with 374 additions and 79 deletions

View File

@ -190,6 +190,8 @@ namespace Nikse.SubtitleEdit.Core.Common
public string AnthropicApiModel { get; set; } public string AnthropicApiModel { get; set; }
public int AutoTranslateDelaySeconds { get; set; } public int AutoTranslateDelaySeconds { get; set; }
public string GeminiProApiKey { get; set; } public string GeminiProApiKey { get; set; }
public string TextToSpeechEngine { get; set; }
public string TextToSpeechElevenLabsApiKey { get; set; }
public bool DisableVidoInfoViaLabel { get; set; } public bool DisableVidoInfoViaLabel { get; set; }
public bool ListViewSyntaxColorDurationSmall { get; set; } public bool ListViewSyntaxColorDurationSmall { get; set; }
public bool ListViewSyntaxColorDurationBig { get; set; } public bool ListViewSyntaxColorDurationBig { get; set; }
@ -5449,6 +5451,18 @@ $HorzAlign = Center
settings.Tools.GeminiProApiKey = subNode.InnerText; settings.Tools.GeminiProApiKey = subNode.InnerText;
} }
subNode = node.SelectSingleNode("TextToSpeechEngine");
if (subNode != null)
{
settings.Tools.TextToSpeechEngine = subNode.InnerText;
}
subNode = node.SelectSingleNode("TextToSpeechElevenLabsApiKey");
if (subNode != null)
{
settings.Tools.TextToSpeechElevenLabsApiKey = subNode.InnerText;
}
subNode = node.SelectSingleNode("TranslateViaCopyPasteAutoCopyToClipboard"); subNode = node.SelectSingleNode("TranslateViaCopyPasteAutoCopyToClipboard");
if (subNode != null) if (subNode != null)
{ {
@ -11969,6 +11983,8 @@ $HorzAlign = Center
textWriter.WriteElementString("AnthropicApiModel", settings.Tools.AnthropicApiModel); textWriter.WriteElementString("AnthropicApiModel", settings.Tools.AnthropicApiModel);
textWriter.WriteElementString("AutoTranslateDelaySeconds", settings.Tools.AutoTranslateDelaySeconds.ToString(CultureInfo.InvariantCulture)); textWriter.WriteElementString("AutoTranslateDelaySeconds", settings.Tools.AutoTranslateDelaySeconds.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("GeminiProApiKey", settings.Tools.GeminiProApiKey); textWriter.WriteElementString("GeminiProApiKey", settings.Tools.GeminiProApiKey);
textWriter.WriteElementString("TextToSpeechEngine", settings.Tools.TextToSpeechEngine);
textWriter.WriteElementString("TextToSpeechElevenLabsApiKey", settings.Tools.TextToSpeechElevenLabsApiKey);
textWriter.WriteElementString("DisableVidoInfoViaLabel", settings.Tools.DisableVidoInfoViaLabel.ToString(CultureInfo.InvariantCulture)); textWriter.WriteElementString("DisableVidoInfoViaLabel", settings.Tools.DisableVidoInfoViaLabel.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("ListViewSyntaxColorDurationSmall", settings.Tools.ListViewSyntaxColorDurationSmall.ToString(CultureInfo.InvariantCulture)); textWriter.WriteElementString("ListViewSyntaxColorDurationSmall", settings.Tools.ListViewSyntaxColorDurationSmall.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("ListViewSyntaxColorDurationBig", settings.Tools.ListViewSyntaxColorDurationBig.ToString(CultureInfo.InvariantCulture)); textWriter.WriteElementString("ListViewSyntaxColorDurationBig", settings.Tools.ListViewSyntaxColorDurationBig.ToString(CultureInfo.InvariantCulture));

View File

@ -0,0 +1,76 @@
using System.Collections.Generic;
namespace Nikse.SubtitleEdit.Core.TextToSpeech
{
public class ElevelLabModels
{
public string Voice { get; set; }
public string Language { get; set; }
public string Gender { get; set; }
public string Model { get; set; }
public override string ToString()
{
return $"{Language} - {Voice} ({Gender})";
}
public ElevelLabModels(string language, string voice, string gender, string description, string useCase, string accent, string voiceId)
{
Voice = voice;
Language = accent;
Gender = Gender;
Model = voiceId;
}
public static List<ElevelLabModels> GetVoices()
{
var models = new List<ElevelLabModels>
{
new ElevelLabModels("English", "Adam", "Male", "Deep", "Narration", "American English", "pNInz6obpgDQGcFmaJgB"),
new ElevelLabModels("English", "Charlie", "Male", "Casual", "Conversational", "Australian English", "IKne3meq5aSn9XLyUdCD"),
new ElevelLabModels("English", "Clyde", "Male", "War veteran", "Video games", "American English", "2EiwWnXFnvU5JabPnv8n"),
new ElevelLabModels("English", "Dorothy", "Female", "Pleasant", "Childrens stories", "British English", "ThT5KcBeYPX3keUQqHPh"),
new ElevelLabModels("English", "Freya", "Female", "Overhyped", "Video games", "American English", "jsCqWAovK2LkecY7zXl4"),
new ElevelLabModels("English", "Gigi", "Female", "Childlish", "Animation", "American English", "jBpfuIE2acCO8z3wKNLl"),
new ElevelLabModels("English", "Harry", "Male", "Anxious", "Video games", "American English", "SOYHLrjzK2X1ezoPC6cr"),
new ElevelLabModels("English", "James", "Male", "Calm", "News", "Australian English", "ZQe5CZNOzWyzPSCn5a3c"),
new ElevelLabModels("English", "Lily", "Female", "Raspy", "Narration", "British English", "pFZP5JQG7iQjIQuC4Bku"),
new ElevelLabModels("English", "Rachel", "Female", "Calm", "Narration", "American English", "21m00Tcm4TlvDq8ikWAM"),
new ElevelLabModels("Spanish", "Dorothy", "Female", "Pleasant", "News", "Chilean Spanish", "ThT5KcBeYPX3keUQqHPh"),
new ElevelLabModels("Spanish", "Glinda", "Female", "Witch", "Video games", "Mexican Spanish", "z9fAnlkpzviPz146aGWa"),
new ElevelLabModels("Spanish", "Grace", "Female", "gentle", "Audiobook", "Mexican Spanish", "oWAxZDx7w5VEj9dCyTzz"),
new ElevelLabModels("Spanish", "Matilda", "Female", "Warm", "Audiobook", "Chilean Spanish", "XrExE9yKIg1WjnnlVkGX"),
new ElevelLabModels("German", "Sarah", "Female", "Soft", "News", "Germany German", "EXAVITQu4vr4xnSDxMaL"),
new ElevelLabModels("German", "Serena", "Female", "Pleasant", "Interactive", "Germany German", "pMsXgVXv3BLzUgSXRplE"),
new ElevelLabModels("German", "Matilda", "Female", "Warm", "Audiobook", "Germany German", "XrExE9yKIg1WjnnlVkGX"),
new ElevelLabModels("German", "Freya", "Female", "Overhyped", "Video games", "Germany German", "jsCqWAovK2LkecY7zXl4"),
new ElevelLabModels("German", "Adam", "Male", "Deep", "Narration", "Germany German", "pNInz6obpgDQGcFmaJgB"),
new ElevelLabModels("German", "Antoni", "Male", "Well-rounded", "Narration", "Germany German", "ErXwobaYiN019PkySvjV"),
new ElevelLabModels("French", "Adam", "Male", "Deep", "Narration", "Canadian French", "pNInz6obpgDQGcFmaJgB"),
new ElevelLabModels("French", "Antoni", "Male", "Well-rounded", "Narration", "Canadian French", "ErXwobaYiN019PkySvjV"),
new ElevelLabModels("French", "Arnold", "Male", "Crisp", "Narration", "Canadian French", "VR6AewLTigWG4xSOukaG"),
new ElevelLabModels("French", "Bill", "Male", "Strong", "documentary", "Canadian French", "pqHfZKP75CvOlQylNhV4"),
new ElevelLabModels("French", "George", "Male", "Raspy", "Narration", "Canadian French", "JBFqnCBsd6RMkjVDRZzb"),
new ElevelLabModels("French", "Charlotte", "Female", "Seductive", "Video games", "Canadian French", "XB0fDUnXU5powFXDhCwa"),
new ElevelLabModels("French", "Domi", "Female", "Strong", "Narration", "Canadian French", "AZnzlk1XvdvUeBnXmlld"),
new ElevelLabModels("French", "Dorothy", "Female", "Pleasant", "Childrens stories", "Canadian French", "ThT5KcBeYPX3keUQqHPh"),
new ElevelLabModels("French", "Serena", "Female", "Pleasant", "Interactive", "Canadian French", "pMsXgVXv3BLzUgSXRplE"),
new ElevelLabModels("French", "Sarah", "Female", "Soft", "News", "Canadian French", "EXAVITQu4vr4xnSDxMaL"),
new ElevelLabModels("Polish", "Adam", "Male", "Deep", "Narration", "Poland Polish", "pNInz6obpgDQGcFmaJgB"),
new ElevelLabModels("Polish", "Charlie", "Male", "Casual", "Conversational", "Poland Polish", "IKne3meq5aSn9XLyUdCD"),
new ElevelLabModels("Polish", "Clyde", "Male", "War veteran", "video games", "Poland Polish", "2EiwWnXFnvU5JabPnv8n"),
new ElevelLabModels("Polish", "Dorothy", "Female", "Pleasant", "Childrens stories", "Poland Polish", "ThT5KcBeYPX3keUQqHPh"),
new ElevelLabModels("Polish", "Gigi", "Female", "Childlish", "Animation", "Poland Polish", "jBpfuIE2acCO8z3wKNLl"),
new ElevelLabModels("Polish", "Harry", "Male", "Anxious", "Video games", "Poland Polish", "SOYHLrjzK2X1ezoPC6cr"),
new ElevelLabModels("Italian", "Adam", "Male", "Deep", "Narration", "Italy Italian", "pNInz6obpgDQGcFmaJgB"),
new ElevelLabModels("Italian", "Charlie", "Male", "Casual", "Conversational", "Italy Italian", "IKne3meq5aSn9XLyUdCD"),
new ElevelLabModels("Italian", "Clyde", "Male", "War veteran", "Video games", "Italy Italian", "2EiwWnXFnvU5JabPnv8n"),
new ElevelLabModels("Italian", "Dorothy", "Female", "Pleasant", "Childrens stories", "Italy Italian", "ThT5KcBeYPX3keUQqHPh"),
new ElevelLabModels("Italian", "Gigi", "Female", "Childlish", "Animation", "Italy Italian", "jBpfuIE2acCO8z3wKNLl"),
new ElevelLabModels("Italian", "Harry", "Male", "Anxious", "Video games", "Italy Italian", "SOYHLrjzK2X1ezoPC6cr"),
};
return models;
}
}
}

View File

@ -35,17 +35,19 @@
this.progressBar1 = new System.Windows.Forms.ProgressBar(); this.progressBar1 = new System.Windows.Forms.ProgressBar();
this.labelEngine = new System.Windows.Forms.Label(); this.labelEngine = new System.Windows.Forms.Label();
this.groupBoxMsSettings = new System.Windows.Forms.GroupBox(); this.groupBoxMsSettings = new System.Windows.Forms.GroupBox();
this.TextBoxTest = new Nikse.SubtitleEdit.Controls.NikseTextBox();
this.buttonTestVoice = new System.Windows.Forms.Button(); this.buttonTestVoice = new System.Windows.Forms.Button();
this.checkBoxAddToVideoFile = new System.Windows.Forms.CheckBox(); this.checkBoxAddToVideoFile = new System.Windows.Forms.CheckBox();
this.labelVoice = new System.Windows.Forms.Label(); this.labelVoice = new System.Windows.Forms.Label();
this.nikseComboBoxVoice = new Nikse.SubtitleEdit.Controls.NikseComboBox();
this.nikseComboBoxEngine = new Nikse.SubtitleEdit.Controls.NikseComboBox();
this.listViewActors = new System.Windows.Forms.ListView(); this.listViewActors = new System.Windows.Forms.ListView();
this.columnHeaderActor = ((System.Windows.Forms.ColumnHeader)(new System.Windows.Forms.ColumnHeader())); this.columnHeaderActor = ((System.Windows.Forms.ColumnHeader)(new System.Windows.Forms.ColumnHeader()));
this.columnHeaderVoice = ((System.Windows.Forms.ColumnHeader)(new System.Windows.Forms.ColumnHeader())); this.columnHeaderVoice = ((System.Windows.Forms.ColumnHeader)(new System.Windows.Forms.ColumnHeader()));
this.contextMenuStripActors = new System.Windows.Forms.ContextMenuStrip(this.components); this.contextMenuStripActors = new System.Windows.Forms.ContextMenuStrip(this.components);
this.labelActors = new System.Windows.Forms.Label(); this.labelActors = new System.Windows.Forms.Label();
this.labelApiKey = new System.Windows.Forms.Label();
this.nikseTextBoxApiKey = new Nikse.SubtitleEdit.Controls.NikseTextBox();
this.TextBoxTest = new Nikse.SubtitleEdit.Controls.NikseTextBox();
this.nikseComboBoxVoice = new Nikse.SubtitleEdit.Controls.NikseComboBox();
this.nikseComboBoxEngine = new Nikse.SubtitleEdit.Controls.NikseComboBox();
this.groupBoxMsSettings.SuspendLayout(); this.groupBoxMsSettings.SuspendLayout();
this.SuspendLayout(); this.SuspendLayout();
// //
@ -107,6 +109,8 @@
// //
this.groupBoxMsSettings.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) this.groupBoxMsSettings.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left))); | System.Windows.Forms.AnchorStyles.Left)));
this.groupBoxMsSettings.Controls.Add(this.labelApiKey);
this.groupBoxMsSettings.Controls.Add(this.nikseTextBoxApiKey);
this.groupBoxMsSettings.Controls.Add(this.TextBoxTest); this.groupBoxMsSettings.Controls.Add(this.TextBoxTest);
this.groupBoxMsSettings.Controls.Add(this.buttonTestVoice); this.groupBoxMsSettings.Controls.Add(this.buttonTestVoice);
this.groupBoxMsSettings.Controls.Add(this.checkBoxAddToVideoFile); this.groupBoxMsSettings.Controls.Add(this.checkBoxAddToVideoFile);
@ -121,17 +125,6 @@
this.groupBoxMsSettings.TabStop = false; this.groupBoxMsSettings.TabStop = false;
this.groupBoxMsSettings.Text = "Settings"; this.groupBoxMsSettings.Text = "Settings";
// //
// TextBoxTest
//
this.TextBoxTest.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.TextBoxTest.FocusedColor = System.Drawing.Color.FromArgb(((int)(((byte)(0)))), ((int)(((byte)(120)))), ((int)(((byte)(215)))));
this.TextBoxTest.Location = new System.Drawing.Point(17, 187);
this.TextBoxTest.Name = "TextBoxTest";
this.TextBoxTest.Size = new System.Drawing.Size(351, 20);
this.TextBoxTest.TabIndex = 20;
this.TextBoxTest.Text = "Hello, how are you?";
//
// buttonTestVoice // buttonTestVoice
// //
this.buttonTestVoice.Location = new System.Drawing.Point(17, 158); this.buttonTestVoice.Location = new System.Drawing.Point(17, 158);
@ -165,6 +158,81 @@
this.labelVoice.TabIndex = 16; this.labelVoice.TabIndex = 16;
this.labelVoice.Text = "Voice"; this.labelVoice.Text = "Voice";
// //
// listViewActors
//
this.listViewActors.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.listViewActors.Columns.AddRange(new System.Windows.Forms.ColumnHeader[] {
this.columnHeaderActor,
this.columnHeaderVoice});
this.listViewActors.ContextMenuStrip = this.contextMenuStripActors;
this.listViewActors.FullRowSelect = true;
this.listViewActors.GridLines = true;
this.listViewActors.HideSelection = false;
this.listViewActors.Location = new System.Drawing.Point(412, 42);
this.listViewActors.Name = "listViewActors";
this.listViewActors.Size = new System.Drawing.Size(430, 375);
this.listViewActors.TabIndex = 40;
this.listViewActors.UseCompatibleStateImageBehavior = false;
this.listViewActors.View = System.Windows.Forms.View.Details;
//
// columnHeaderActor
//
this.columnHeaderActor.Text = "Actor";
this.columnHeaderActor.Width = 200;
//
// columnHeaderVoice
//
this.columnHeaderVoice.Text = "Voice";
this.columnHeaderVoice.Width = 200;
//
// contextMenuStripActors
//
this.contextMenuStripActors.Name = "contextMenuStripActors";
this.contextMenuStripActors.Size = new System.Drawing.Size(61, 4);
//
// labelActors
//
this.labelActors.AutoSize = true;
this.labelActors.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.labelActors.Location = new System.Drawing.Point(412, 20);
this.labelActors.Name = "labelActors";
this.labelActors.Size = new System.Drawing.Size(170, 13);
this.labelActors.TabIndex = 19;
this.labelActors.Text = "Right-click to assign actor to voice";
//
// labelApiKey
//
this.labelApiKey.AutoSize = true;
this.labelApiKey.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.labelApiKey.Location = new System.Drawing.Point(20, 242);
this.labelApiKey.Name = "labelApiKey";
this.labelApiKey.Size = new System.Drawing.Size(44, 13);
this.labelApiKey.TabIndex = 28;
this.labelApiKey.Text = "API key";
//
// nikseTextBoxApiKey
//
this.nikseTextBoxApiKey.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.nikseTextBoxApiKey.FocusedColor = System.Drawing.Color.FromArgb(((int)(((byte)(0)))), ((int)(((byte)(120)))), ((int)(((byte)(215)))));
this.nikseTextBoxApiKey.Location = new System.Drawing.Point(17, 258);
this.nikseTextBoxApiKey.Name = "nikseTextBoxApiKey";
this.nikseTextBoxApiKey.Size = new System.Drawing.Size(351, 20);
this.nikseTextBoxApiKey.TabIndex = 27;
//
// TextBoxTest
//
this.TextBoxTest.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.TextBoxTest.FocusedColor = System.Drawing.Color.FromArgb(((int)(((byte)(0)))), ((int)(((byte)(120)))), ((int)(((byte)(215)))));
this.TextBoxTest.Location = new System.Drawing.Point(17, 187);
this.TextBoxTest.Name = "TextBoxTest";
this.TextBoxTest.Size = new System.Drawing.Size(351, 20);
this.TextBoxTest.TabIndex = 20;
this.TextBoxTest.Text = "Hello, how are you?";
//
// nikseComboBoxVoice // nikseComboBoxVoice
// //
this.nikseComboBoxVoice.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) this.nikseComboBoxVoice.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
@ -218,50 +286,6 @@
this.nikseComboBoxEngine.UsePopupWindow = false; this.nikseComboBoxEngine.UsePopupWindow = false;
this.nikseComboBoxEngine.SelectedIndexChanged += new System.EventHandler(this.nikseComboBoxEngine_SelectedIndexChanged); this.nikseComboBoxEngine.SelectedIndexChanged += new System.EventHandler(this.nikseComboBoxEngine_SelectedIndexChanged);
// //
// listViewActors
//
this.listViewActors.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.listViewActors.Columns.AddRange(new System.Windows.Forms.ColumnHeader[] {
this.columnHeaderActor,
this.columnHeaderVoice});
this.listViewActors.ContextMenuStrip = this.contextMenuStripActors;
this.listViewActors.FullRowSelect = true;
this.listViewActors.GridLines = true;
this.listViewActors.HideSelection = false;
this.listViewActors.Location = new System.Drawing.Point(412, 42);
this.listViewActors.Name = "listViewActors";
this.listViewActors.Size = new System.Drawing.Size(430, 375);
this.listViewActors.TabIndex = 40;
this.listViewActors.UseCompatibleStateImageBehavior = false;
this.listViewActors.View = System.Windows.Forms.View.Details;
//
// columnHeaderActor
//
this.columnHeaderActor.Text = "Actor";
this.columnHeaderActor.Width = 200;
//
// columnHeaderVoice
//
this.columnHeaderVoice.Text = "Voice";
this.columnHeaderVoice.Width = 200;
//
// contextMenuStripActors
//
this.contextMenuStripActors.Name = "contextMenuStripActors";
this.contextMenuStripActors.Size = new System.Drawing.Size(61, 4);
//
// labelActors
//
this.labelActors.AutoSize = true;
this.labelActors.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.labelActors.Location = new System.Drawing.Point(412, 20);
this.labelActors.Name = "labelActors";
this.labelActors.Size = new System.Drawing.Size(170, 13);
this.labelActors.TabIndex = 19;
this.labelActors.Text = "Right-click to assign actor to voice";
//
// TextToSpeech // TextToSpeech
// //
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
@ -281,6 +305,7 @@
this.ShowInTaskbar = false; this.ShowInTaskbar = false;
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent; this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
this.Text = "Text to speech"; this.Text = "Text to speech";
this.FormClosing += new System.Windows.Forms.FormClosingEventHandler(this.TextToSpeech_FormClosing);
this.Load += new System.EventHandler(this.TextToSpeech_Load); this.Load += new System.EventHandler(this.TextToSpeech_Load);
this.ResizeEnd += new System.EventHandler(this.TextToSpeech_ResizeEnd); this.ResizeEnd += new System.EventHandler(this.TextToSpeech_ResizeEnd);
this.SizeChanged += new System.EventHandler(this.TextToSpeech_SizeChanged); this.SizeChanged += new System.EventHandler(this.TextToSpeech_SizeChanged);
@ -309,5 +334,7 @@
private System.Windows.Forms.Button buttonTestVoice; private System.Windows.Forms.Button buttonTestVoice;
private Controls.NikseTextBox TextBoxTest; private Controls.NikseTextBox TextBoxTest;
private System.Windows.Forms.Label labelActors; private System.Windows.Forms.Label labelActors;
private System.Windows.Forms.Label labelApiKey;
private Controls.NikseTextBox nikseTextBoxApiKey;
} }
} }

View File

@ -9,6 +9,8 @@ using System.Drawing;
using System.IO; using System.IO;
using System.Linq; using System.Linq;
using System.Net.Http; using System.Net.Http;
using System.Text;
using System.Threading;
using System.Threading.Tasks; using System.Threading.Tasks;
using System.Windows.Forms; using System.Windows.Forms;
using MessageBox = Nikse.SubtitleEdit.Forms.SeMsgBox.MessageBox; using MessageBox = Nikse.SubtitleEdit.Forms.SeMsgBox.MessageBox;
@ -37,21 +39,25 @@ namespace Nikse.SubtitleEdit.Forms.Tts
public class TextToSpeechEngine public class TextToSpeechEngine
{ {
public string Id { get; set; } public TextToSpeechEngineId Id { get; set; }
public string Name { get; set; } public string Name { get; set; }
public int Index { get; set; } public int Index { get; set; }
public TextToSpeechEngine(string id, string name, int index) public TextToSpeechEngine(TextToSpeechEngineId id, string name, int index)
{ {
Id = id; Id = id;
Name = name; Name = name;
Index = index; Index = index;
} }
}
public static string IdPiper = "Piper"; public enum TextToSpeechEngineId
public static string IdTortoise = "Tortoise"; {
public static string IdCoqui = "coqui"; Piper,
public static string IdSpeechSynthesizer = "SpeechSynthesizer"; Tortoise,
Coqui,
MsSpeechSynthesizer,
ElevenLabs,
} }
public TextToSpeech(Subtitle subtitle, SubtitleFormat subtitleFormat, string videoFileName, VideoInfo videoInfo) public TextToSpeech(Subtitle subtitle, SubtitleFormat subtitleFormat, string videoFileName, VideoInfo videoInfo)
@ -73,6 +79,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
Text = LanguageSettings.Current.TextToSpeech.Title; Text = LanguageSettings.Current.TextToSpeech.Title;
labelVoice.Text = LanguageSettings.Current.TextToSpeech.Voice; labelVoice.Text = LanguageSettings.Current.TextToSpeech.Voice;
labelApiKey.Text = LanguageSettings.Current.VobSubOcr.ApiKey;
buttonTestVoice.Text = LanguageSettings.Current.TextToSpeech.TestVoice; buttonTestVoice.Text = LanguageSettings.Current.TextToSpeech.TestVoice;
labelActors.Text = LanguageSettings.Current.TextToSpeech.ActorInfo; labelActors.Text = LanguageSettings.Current.TextToSpeech.ActorInfo;
checkBoxAddToVideoFile.Text = LanguageSettings.Current.TextToSpeech.AddAudioToVideo; checkBoxAddToVideoFile.Text = LanguageSettings.Current.TextToSpeech.AddAudioToVideo;
@ -84,13 +91,14 @@ namespace Nikse.SubtitleEdit.Forms.Tts
labelProgress.Text = string.Empty; labelProgress.Text = string.Empty;
_engines = new List<TextToSpeechEngine>(); _engines = new List<TextToSpeechEngine>();
_engines.Add(new TextToSpeechEngine(TextToSpeechEngine.IdPiper, "Piper (fast/good)", _engines.Count)); _engines.Add(new TextToSpeechEngine(TextToSpeechEngineId.Piper, "Piper (fast/good)", _engines.Count));
_engines.Add(new TextToSpeechEngine(TextToSpeechEngine.IdTortoise, "Tortoise TTS (very slow/very good)", _engines.Count)); _engines.Add(new TextToSpeechEngine(TextToSpeechEngineId.Tortoise, "Tortoise TTS (very slow/very good)", _engines.Count));
_engines.Add(new TextToSpeechEngine(TextToSpeechEngine.IdCoqui, "coqui TTS", _engines.Count)); _engines.Add(new TextToSpeechEngine(TextToSpeechEngineId.Coqui, "coqui TTS", _engines.Count));
if (Configuration.IsRunningOnWindows) if (Configuration.IsRunningOnWindows)
{ {
_engines.Add(new TextToSpeechEngine(TextToSpeechEngine.IdSpeechSynthesizer, "Microsoft SpeechSynthesizer (very fast/robotic)", _engines.Count)); _engines.Add(new TextToSpeechEngine(TextToSpeechEngineId.MsSpeechSynthesizer, "Microsoft SpeechSynthesizer (very fast/robotic)", _engines.Count));
} }
_engines.Add(new TextToSpeechEngine(TextToSpeechEngineId.ElevenLabs, "ElevenLabs TTS", _engines.Count));
_actorAndVoices = new List<ActorAndVoice>(); _actorAndVoices = new List<ActorAndVoice>();
nikseComboBoxEngine.DropDownStyle = ComboBoxStyle.DropDownList; nikseComboBoxEngine.DropDownStyle = ComboBoxStyle.DropDownList;
@ -98,9 +106,16 @@ namespace Nikse.SubtitleEdit.Forms.Tts
foreach (var engine in _engines) foreach (var engine in _engines)
{ {
nikseComboBoxEngine.Items.Add(engine.Name); nikseComboBoxEngine.Items.Add(engine.Name);
if (Configuration.Settings.Tools.TextToSpeechEngine == engine.Id.ToString())
{
nikseComboBoxEngine.SelectedIndex = nikseComboBoxEngine.Items.Count - 1;
}
} }
nikseComboBoxEngine.SelectedIndex = 0; if (nikseComboBoxEngine.SelectedIndex < 0)
{
nikseComboBoxEngine.SelectedIndex = 0;
}
labelActors.Visible = false; labelActors.Visible = false;
listViewActors.Visible = false; listViewActors.Visible = false;
@ -224,25 +239,34 @@ namespace Nikse.SubtitleEdit.Forms.Tts
private async Task<bool> GenerateParagraphAudio(Subtitle subtitle, bool showProgressBar, string overrideFileName) private async Task<bool> GenerateParagraphAudio(Subtitle subtitle, bool showProgressBar, string overrideFileName)
{ {
var engine = _engines.First(p => p.Index == nikseComboBoxEngine.SelectedIndex); var engine = _engines.First(p => p.Index == nikseComboBoxEngine.SelectedIndex);
if (engine.Id == TextToSpeechEngine.IdSpeechSynthesizer) if (engine.Id == TextToSpeechEngineId.MsSpeechSynthesizer)
{ {
GenerateParagraphAudioMs(subtitle, showProgressBar, overrideFileName); GenerateParagraphAudioMs(subtitle, showProgressBar, overrideFileName);
return true; return true;
} }
else if (engine.Id == TextToSpeechEngine.IdPiper)
if (engine.Id == TextToSpeechEngineId.Piper)
{ {
return GenerateParagraphAudioPiperTts(subtitle, showProgressBar, overrideFileName); return GenerateParagraphAudioPiperTts(subtitle, showProgressBar, overrideFileName);
} }
else if (engine.Id == TextToSpeechEngine.IdTortoise)
if (engine.Id == TextToSpeechEngineId.Tortoise)
{ {
return GenerateParagraphAudioTortoiseTts(subtitle, showProgressBar, overrideFileName); return GenerateParagraphAudioTortoiseTts(subtitle, showProgressBar, overrideFileName);
} }
else if (engine.Id == TextToSpeechEngine.IdCoqui)
if (engine.Id == TextToSpeechEngineId.Coqui)
{ {
var result = await GenerateParagraphAudioCoqui(subtitle, showProgressBar, overrideFileName); var result = await GenerateParagraphAudioCoqui(subtitle, showProgressBar, overrideFileName);
return result; return result;
} }
if (engine.Id == TextToSpeechEngineId.ElevenLabs)
{
var result = await GenerateParagraphAudioElevenLabs(subtitle, showProgressBar, overrideFileName);
return result;
}
return false; return false;
} }
@ -299,6 +323,10 @@ namespace Nikse.SubtitleEdit.Forms.Tts
var p = _subtitle.Paragraphs[index]; var p = _subtitle.Paragraphs[index];
var next = _subtitle.GetParagraphOrDefault(index + 1); var next = _subtitle.GetParagraphOrDefault(index + 1);
var pFileName = Path.Combine(_waveFolder, index + ".wav"); var pFileName = Path.Combine(_waveFolder, index + ".wav");
if (!File.Exists(pFileName))
{
pFileName = Path.Combine(_waveFolder, index + ".mp3");
}
var outputFileName1 = Path.Combine(_waveFolder, index + "_u.wav"); var outputFileName1 = Path.Combine(_waveFolder, index + "_u.wav");
var trimProcess = VideoPreviewGenerator.TrimSilenceStartAndEnd(pFileName, outputFileName1); var trimProcess = VideoPreviewGenerator.TrimSilenceStartAndEnd(pFileName, outputFileName1);
@ -733,6 +761,77 @@ namespace Nikse.SubtitleEdit.Forms.Tts
return true; return true;
} }
private async Task<bool> GenerateParagraphAudioElevenLabs(Subtitle subtitle, bool showProgressBar, string overrideFileName)
{
if (string.IsNullOrWhiteSpace(nikseTextBoxApiKey.Text))
{
MessageBox.Show("Please add API key");
nikseTextBoxApiKey.Focus();
return false;
}
var httpClient = new HttpClient();
//httpClient.BaseAddress = new Uri("https://api.elevenlabs.io/v1/text-to-speech/");
httpClient.DefaultRequestHeaders.TryAddWithoutValidation("Content-Type", "application/json");
httpClient.DefaultRequestHeaders.TryAddWithoutValidation("accept", "audio/mpeg");
httpClient.DefaultRequestHeaders.TryAddWithoutValidation("xi-api-key", nikseTextBoxApiKey.Text.Trim());
progressBar1.Value = 0;
progressBar1.Maximum = subtitle.Paragraphs.Count;
progressBar1.Visible = showProgressBar;
var voices = ElevelLabModels.GetVoices();
var v = nikseComboBoxVoice.Text;
for (var index = 0; index < subtitle.Paragraphs.Count; index++)
{
if (showProgressBar)
{
progressBar1.Value = index + 1;
labelProgress.Text = string.Format(LanguageSettings.Current.TextToSpeech.GeneratingSpeechFromTextXOfY, index + 1, subtitle.Paragraphs.Count);
}
var p = subtitle.Paragraphs[index];
var outputFileName = Path.Combine(_waveFolder, string.IsNullOrEmpty(overrideFileName) ? index + ".mp3" : overrideFileName.Replace(".wav", ".mp3"));
if (_actorAndVoices.Count > 0 && !string.IsNullOrEmpty(p.Actor))
{
var f = _actorAndVoices.FirstOrDefault(x => x.Actor == p.Actor);
if (f != null && !string.IsNullOrEmpty(f.Voice))
{
v = f.Voice;
}
}
var voice = voices.First(x => x.ToString() == v);
var url = "https://api.elevenlabs.io/v1/text-to-speech/" + voice.Model;
var data = "{ \"text\": \"" + Json.EncodeJsonText(p.Text) + "\", \"model_id\": \"eleven_monolingual_v1\", \"voice_settings\": { \"stability\": 0.5, \"similarity_boost\": 0.5 } }";
var result = await httpClient.PostAsync(url, new StringContent(data, Encoding.UTF8), CancellationToken.None);
var bytes = await result.Content.ReadAsByteArrayAsync();
if (!result.IsSuccessStatusCode)
{
var error = Encoding.UTF8.GetString(bytes).Trim();
SeLogger.Error($"ElevenLabs TTS failed calling API as base address {httpClient.BaseAddress} : Status code={result.StatusCode} {error}" + Environment.NewLine + "Data=" + data);
MessageBox.Show("Calling url: " + url + Environment.NewLine + "With: " + data + Environment.NewLine + Environment.NewLine + "Error: " + error);
return false;
}
File.WriteAllBytes(outputFileName, bytes);
progressBar1.Refresh();
labelProgress.Refresh();
Application.DoEvents();
}
progressBar1.Visible = false;
labelProgress.Text = string.Empty;
return true;
}
private void buttonOK_Click(object sender, EventArgs e) private void buttonOK_Click(object sender, EventArgs e)
{ {
DialogResult = DialogResult.OK; DialogResult = DialogResult.OK;
@ -742,6 +841,9 @@ namespace Nikse.SubtitleEdit.Forms.Tts
{ {
nikseComboBoxVoice.Items.Clear(); nikseComboBoxVoice.Items.Clear();
labelApiKey.Visible = false;
nikseTextBoxApiKey.Visible = false;
labelVoice.Text = LanguageSettings.Current.TextToSpeech.Voice; labelVoice.Text = LanguageSettings.Current.TextToSpeech.Voice;
if (SubtitleFormatHasActors() && _actors.Any()) if (SubtitleFormatHasActors() && _actors.Any())
{ {
@ -749,7 +851,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
} }
var engine = _engines.First(p => p.Index == nikseComboBoxEngine.SelectedIndex); var engine = _engines.First(p => p.Index == nikseComboBoxEngine.SelectedIndex);
if (engine.Id == TextToSpeechEngine.IdSpeechSynthesizer) if (engine.Id == TextToSpeechEngineId.MsSpeechSynthesizer)
{ {
using (var synthesizer = new System.Speech.Synthesis.SpeechSynthesizer()) using (var synthesizer = new System.Speech.Synthesis.SpeechSynthesizer())
{ {
@ -763,7 +865,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
} }
} }
if (engine.Id == TextToSpeechEngine.IdPiper) if (engine.Id == TextToSpeechEngineId.Piper)
{ {
foreach (var voice in PiperModels.GetVoices()) foreach (var voice in PiperModels.GetVoices())
{ {
@ -771,7 +873,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
} }
} }
if (engine.Id == TextToSpeechEngine.IdTortoise) if (engine.Id == TextToSpeechEngineId.Tortoise)
{ {
nikseComboBoxVoice.Items.Add("angie"); nikseComboBoxVoice.Items.Add("angie");
nikseComboBoxVoice.Items.Add("applejack"); nikseComboBoxVoice.Items.Add("applejack");
@ -796,12 +898,25 @@ namespace Nikse.SubtitleEdit.Forms.Tts
nikseComboBoxVoice.Items.Add("william"); nikseComboBoxVoice.Items.Add("william");
} }
if (engine.Id == TextToSpeechEngine.IdCoqui) if (engine.Id == TextToSpeechEngineId.Coqui)
{ {
labelVoice.Text = LanguageSettings.Current.General.WebServiceUrl; labelVoice.Text = LanguageSettings.Current.General.WebServiceUrl;
nikseComboBoxVoice.Items.Add("http://localhost:5002/api/tts"); nikseComboBoxVoice.Items.Add("http://localhost:5002/api/tts");
} }
if (engine.Id == TextToSpeechEngineId.ElevenLabs)
{
nikseTextBoxApiKey.Text = Configuration.Settings.Tools.TextToSpeechElevenLabsApiKey;
labelApiKey.Visible = true;
nikseTextBoxApiKey.Visible = true;
foreach (var voice in ElevelLabModels.GetVoices())
{
nikseComboBoxVoice.Items.Add(voice.ToString());
}
}
if (nikseComboBoxVoice.Items.Count > 0) if (nikseComboBoxVoice.Items.Count > 0)
{ {
nikseComboBoxVoice.SelectedIndex = 0; nikseComboBoxVoice.SelectedIndex = 0;
@ -828,7 +943,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
contextMenuStripActors.Items.Clear(); contextMenuStripActors.Items.Clear();
if (engine.Id == TextToSpeechEngine.IdPiper) if (engine.Id == TextToSpeechEngineId.Piper)
{ {
var voices = PiperModels.GetVoices(); var voices = PiperModels.GetVoices();
foreach (var voiceLanguage in voices foreach (var voiceLanguage in voices
@ -871,6 +986,49 @@ namespace Nikse.SubtitleEdit.Forms.Tts
} }
} }
} }
else if (engine.Id == TextToSpeechEngineId.ElevenLabs)
{
var voices = ElevelLabModels.GetVoices();
foreach (var voiceLanguage in voices
.GroupBy(p => p.Language)
.OrderBy(p => p.Key))
{
if (voiceLanguage.Count() == 1)
{
var voice = voiceLanguage.First();
var tsi = new ToolStripMenuItem();
tsi.Tag = new ActorAndVoice { Voice = voice.Voice, VoiceIndex = voices.IndexOf(voice) };
tsi.Text = voice.ToString();
tsi.Click += (x, args) =>
{
var a = (ActorAndVoice)(x as ToolStripItem).Tag;
SetActor(a);
};
contextMenuStripActors.Items.Add(tsi);
}
else
{
var parent = new ToolStripMenuItem();
parent.Text = voiceLanguage.Key;
contextMenuStripActors.Items.Add(parent);
foreach (var voice in voiceLanguage.OrderBy(p => p.Voice).ToList())
{
var tsi = new ToolStripMenuItem();
tsi.Tag = new ActorAndVoice { Voice = voice.Voice, VoiceIndex = voices.IndexOf(voice) };
tsi.Text = voice.Voice + " (" + voice.Gender + ")";
tsi.Click += (x, args) =>
{
var a = (ActorAndVoice)(x as ToolStripItem).Tag;
SetActor(a);
};
parent.DropDownItems.Add(tsi);
}
DarkTheme.SetDarkTheme(parent);
}
}
}
else else
{ {
for (var index = 0; index < nikseComboBoxVoice.Items.Count; index++) for (var index = 0; index < nikseComboBoxVoice.Items.Count; index++)
@ -934,7 +1092,13 @@ namespace Nikse.SubtitleEdit.Forms.Tts
var sub = new Subtitle(); var sub = new Subtitle();
sub.Paragraphs.Add(new Paragraph(text, 0, 2500)); sub.Paragraphs.Add(new Paragraph(text, 0, 2500));
var waveFileNameOnly = Guid.NewGuid() + ".wav"; var waveFileNameOnly = Guid.NewGuid() + ".wav";
await GenerateParagraphAudio(sub, false, waveFileNameOnly); var ok = await GenerateParagraphAudio(sub, false, waveFileNameOnly);
if (!ok)
{
MessageBox.Show("Ups, voice generation failed!");
return;
}
var waveFileName = Path.Combine(_waveFolder, waveFileNameOnly); var waveFileName = Path.Combine(_waveFolder, waveFileNameOnly);
using (var soundPlayer = new System.Media.SoundPlayer(waveFileName)) using (var soundPlayer = new System.Media.SoundPlayer(waveFileName))
{ {
@ -962,5 +1126,17 @@ namespace Nikse.SubtitleEdit.Forms.Tts
TaskDelayHelper.RunDelayed(TimeSpan.FromSeconds(1), () => buttonTestVoice.Enabled = true); TaskDelayHelper.RunDelayed(TimeSpan.FromSeconds(1), () => buttonTestVoice.Enabled = true);
} }
} }
private void TextToSpeech_FormClosing(object sender, FormClosingEventArgs e)
{
var engine = _engines.First(p => p.Index == nikseComboBoxEngine.SelectedIndex);
if (engine.Id == TextToSpeechEngineId.ElevenLabs)
{
Configuration.Settings.Tools.TextToSpeechElevenLabsApiKey = nikseTextBoxApiKey.Text;
}
Configuration.Settings.Tools.TextToSpeechEngine = engine.Id.ToString();
}
} }
} }