Work on tts

This commit is contained in:
Nikolaj Olsson 2024-04-15 19:50:29 +02:00
parent 122aa406f5
commit d42a974457
4 changed files with 374 additions and 79 deletions

View File

@ -190,6 +190,8 @@ namespace Nikse.SubtitleEdit.Core.Common
public string AnthropicApiModel { get; set; }
public int AutoTranslateDelaySeconds { get; set; }
public string GeminiProApiKey { get; set; }
public string TextToSpeechEngine { get; set; }
public string TextToSpeechElevenLabsApiKey { get; set; }
public bool DisableVidoInfoViaLabel { get; set; }
public bool ListViewSyntaxColorDurationSmall { get; set; }
public bool ListViewSyntaxColorDurationBig { get; set; }
@ -5449,6 +5451,18 @@ $HorzAlign = Center
settings.Tools.GeminiProApiKey = subNode.InnerText;
}
subNode = node.SelectSingleNode("TextToSpeechEngine");
if (subNode != null)
{
settings.Tools.TextToSpeechEngine = subNode.InnerText;
}
subNode = node.SelectSingleNode("TextToSpeechElevenLabsApiKey");
if (subNode != null)
{
settings.Tools.TextToSpeechElevenLabsApiKey = subNode.InnerText;
}
subNode = node.SelectSingleNode("TranslateViaCopyPasteAutoCopyToClipboard");
if (subNode != null)
{
@ -11969,6 +11983,8 @@ $HorzAlign = Center
textWriter.WriteElementString("AnthropicApiModel", settings.Tools.AnthropicApiModel);
textWriter.WriteElementString("AutoTranslateDelaySeconds", settings.Tools.AutoTranslateDelaySeconds.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("GeminiProApiKey", settings.Tools.GeminiProApiKey);
textWriter.WriteElementString("TextToSpeechEngine", settings.Tools.TextToSpeechEngine);
textWriter.WriteElementString("TextToSpeechElevenLabsApiKey", settings.Tools.TextToSpeechElevenLabsApiKey);
textWriter.WriteElementString("DisableVidoInfoViaLabel", settings.Tools.DisableVidoInfoViaLabel.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("ListViewSyntaxColorDurationSmall", settings.Tools.ListViewSyntaxColorDurationSmall.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("ListViewSyntaxColorDurationBig", settings.Tools.ListViewSyntaxColorDurationBig.ToString(CultureInfo.InvariantCulture));

View File

@ -0,0 +1,76 @@
using System.Collections.Generic;
namespace Nikse.SubtitleEdit.Core.TextToSpeech
{
public class ElevelLabModels
{
public string Voice { get; set; }
public string Language { get; set; }
public string Gender { get; set; }
public string Model { get; set; }
public override string ToString()
{
return $"{Language} - {Voice} ({Gender})";
}
public ElevelLabModels(string language, string voice, string gender, string description, string useCase, string accent, string voiceId)
{
Voice = voice;
Language = accent;
Gender = Gender;
Model = voiceId;
}
public static List<ElevelLabModels> GetVoices()
{
var models = new List<ElevelLabModels>
{
new ElevelLabModels("English", "Adam", "Male", "Deep", "Narration", "American English", "pNInz6obpgDQGcFmaJgB"),
new ElevelLabModels("English", "Charlie", "Male", "Casual", "Conversational", "Australian English", "IKne3meq5aSn9XLyUdCD"),
new ElevelLabModels("English", "Clyde", "Male", "War veteran", "Video games", "American English", "2EiwWnXFnvU5JabPnv8n"),
new ElevelLabModels("English", "Dorothy", "Female", "Pleasant", "Childrens stories", "British English", "ThT5KcBeYPX3keUQqHPh"),
new ElevelLabModels("English", "Freya", "Female", "Overhyped", "Video games", "American English", "jsCqWAovK2LkecY7zXl4"),
new ElevelLabModels("English", "Gigi", "Female", "Childlish", "Animation", "American English", "jBpfuIE2acCO8z3wKNLl"),
new ElevelLabModels("English", "Harry", "Male", "Anxious", "Video games", "American English", "SOYHLrjzK2X1ezoPC6cr"),
new ElevelLabModels("English", "James", "Male", "Calm", "News", "Australian English", "ZQe5CZNOzWyzPSCn5a3c"),
new ElevelLabModels("English", "Lily", "Female", "Raspy", "Narration", "British English", "pFZP5JQG7iQjIQuC4Bku"),
new ElevelLabModels("English", "Rachel", "Female", "Calm", "Narration", "American English", "21m00Tcm4TlvDq8ikWAM"),
new ElevelLabModels("Spanish", "Dorothy", "Female", "Pleasant", "News", "Chilean Spanish", "ThT5KcBeYPX3keUQqHPh"),
new ElevelLabModels("Spanish", "Glinda", "Female", "Witch", "Video games", "Mexican Spanish", "z9fAnlkpzviPz146aGWa"),
new ElevelLabModels("Spanish", "Grace", "Female", "gentle", "Audiobook", "Mexican Spanish", "oWAxZDx7w5VEj9dCyTzz"),
new ElevelLabModels("Spanish", "Matilda", "Female", "Warm", "Audiobook", "Chilean Spanish", "XrExE9yKIg1WjnnlVkGX"),
new ElevelLabModels("German", "Sarah", "Female", "Soft", "News", "Germany German", "EXAVITQu4vr4xnSDxMaL"),
new ElevelLabModels("German", "Serena", "Female", "Pleasant", "Interactive", "Germany German", "pMsXgVXv3BLzUgSXRplE"),
new ElevelLabModels("German", "Matilda", "Female", "Warm", "Audiobook", "Germany German", "XrExE9yKIg1WjnnlVkGX"),
new ElevelLabModels("German", "Freya", "Female", "Overhyped", "Video games", "Germany German", "jsCqWAovK2LkecY7zXl4"),
new ElevelLabModels("German", "Adam", "Male", "Deep", "Narration", "Germany German", "pNInz6obpgDQGcFmaJgB"),
new ElevelLabModels("German", "Antoni", "Male", "Well-rounded", "Narration", "Germany German", "ErXwobaYiN019PkySvjV"),
new ElevelLabModels("French", "Adam", "Male", "Deep", "Narration", "Canadian French", "pNInz6obpgDQGcFmaJgB"),
new ElevelLabModels("French", "Antoni", "Male", "Well-rounded", "Narration", "Canadian French", "ErXwobaYiN019PkySvjV"),
new ElevelLabModels("French", "Arnold", "Male", "Crisp", "Narration", "Canadian French", "VR6AewLTigWG4xSOukaG"),
new ElevelLabModels("French", "Bill", "Male", "Strong", "documentary", "Canadian French", "pqHfZKP75CvOlQylNhV4"),
new ElevelLabModels("French", "George", "Male", "Raspy", "Narration", "Canadian French", "JBFqnCBsd6RMkjVDRZzb"),
new ElevelLabModels("French", "Charlotte", "Female", "Seductive", "Video games", "Canadian French", "XB0fDUnXU5powFXDhCwa"),
new ElevelLabModels("French", "Domi", "Female", "Strong", "Narration", "Canadian French", "AZnzlk1XvdvUeBnXmlld"),
new ElevelLabModels("French", "Dorothy", "Female", "Pleasant", "Childrens stories", "Canadian French", "ThT5KcBeYPX3keUQqHPh"),
new ElevelLabModels("French", "Serena", "Female", "Pleasant", "Interactive", "Canadian French", "pMsXgVXv3BLzUgSXRplE"),
new ElevelLabModels("French", "Sarah", "Female", "Soft", "News", "Canadian French", "EXAVITQu4vr4xnSDxMaL"),
new ElevelLabModels("Polish", "Adam", "Male", "Deep", "Narration", "Poland Polish", "pNInz6obpgDQGcFmaJgB"),
new ElevelLabModels("Polish", "Charlie", "Male", "Casual", "Conversational", "Poland Polish", "IKne3meq5aSn9XLyUdCD"),
new ElevelLabModels("Polish", "Clyde", "Male", "War veteran", "video games", "Poland Polish", "2EiwWnXFnvU5JabPnv8n"),
new ElevelLabModels("Polish", "Dorothy", "Female", "Pleasant", "Childrens stories", "Poland Polish", "ThT5KcBeYPX3keUQqHPh"),
new ElevelLabModels("Polish", "Gigi", "Female", "Childlish", "Animation", "Poland Polish", "jBpfuIE2acCO8z3wKNLl"),
new ElevelLabModels("Polish", "Harry", "Male", "Anxious", "Video games", "Poland Polish", "SOYHLrjzK2X1ezoPC6cr"),
new ElevelLabModels("Italian", "Adam", "Male", "Deep", "Narration", "Italy Italian", "pNInz6obpgDQGcFmaJgB"),
new ElevelLabModels("Italian", "Charlie", "Male", "Casual", "Conversational", "Italy Italian", "IKne3meq5aSn9XLyUdCD"),
new ElevelLabModels("Italian", "Clyde", "Male", "War veteran", "Video games", "Italy Italian", "2EiwWnXFnvU5JabPnv8n"),
new ElevelLabModels("Italian", "Dorothy", "Female", "Pleasant", "Childrens stories", "Italy Italian", "ThT5KcBeYPX3keUQqHPh"),
new ElevelLabModels("Italian", "Gigi", "Female", "Childlish", "Animation", "Italy Italian", "jBpfuIE2acCO8z3wKNLl"),
new ElevelLabModels("Italian", "Harry", "Male", "Anxious", "Video games", "Italy Italian", "SOYHLrjzK2X1ezoPC6cr"),
};
return models;
}
}
}

View File

@ -35,17 +35,19 @@
this.progressBar1 = new System.Windows.Forms.ProgressBar();
this.labelEngine = new System.Windows.Forms.Label();
this.groupBoxMsSettings = new System.Windows.Forms.GroupBox();
this.TextBoxTest = new Nikse.SubtitleEdit.Controls.NikseTextBox();
this.buttonTestVoice = new System.Windows.Forms.Button();
this.checkBoxAddToVideoFile = new System.Windows.Forms.CheckBox();
this.labelVoice = new System.Windows.Forms.Label();
this.nikseComboBoxVoice = new Nikse.SubtitleEdit.Controls.NikseComboBox();
this.nikseComboBoxEngine = new Nikse.SubtitleEdit.Controls.NikseComboBox();
this.listViewActors = new System.Windows.Forms.ListView();
this.columnHeaderActor = ((System.Windows.Forms.ColumnHeader)(new System.Windows.Forms.ColumnHeader()));
this.columnHeaderVoice = ((System.Windows.Forms.ColumnHeader)(new System.Windows.Forms.ColumnHeader()));
this.contextMenuStripActors = new System.Windows.Forms.ContextMenuStrip(this.components);
this.labelActors = new System.Windows.Forms.Label();
this.labelApiKey = new System.Windows.Forms.Label();
this.nikseTextBoxApiKey = new Nikse.SubtitleEdit.Controls.NikseTextBox();
this.TextBoxTest = new Nikse.SubtitleEdit.Controls.NikseTextBox();
this.nikseComboBoxVoice = new Nikse.SubtitleEdit.Controls.NikseComboBox();
this.nikseComboBoxEngine = new Nikse.SubtitleEdit.Controls.NikseComboBox();
this.groupBoxMsSettings.SuspendLayout();
this.SuspendLayout();
//
@ -107,6 +109,8 @@
//
this.groupBoxMsSettings.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)));
this.groupBoxMsSettings.Controls.Add(this.labelApiKey);
this.groupBoxMsSettings.Controls.Add(this.nikseTextBoxApiKey);
this.groupBoxMsSettings.Controls.Add(this.TextBoxTest);
this.groupBoxMsSettings.Controls.Add(this.buttonTestVoice);
this.groupBoxMsSettings.Controls.Add(this.checkBoxAddToVideoFile);
@ -121,17 +125,6 @@
this.groupBoxMsSettings.TabStop = false;
this.groupBoxMsSettings.Text = "Settings";
//
// TextBoxTest
//
this.TextBoxTest.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.TextBoxTest.FocusedColor = System.Drawing.Color.FromArgb(((int)(((byte)(0)))), ((int)(((byte)(120)))), ((int)(((byte)(215)))));
this.TextBoxTest.Location = new System.Drawing.Point(17, 187);
this.TextBoxTest.Name = "TextBoxTest";
this.TextBoxTest.Size = new System.Drawing.Size(351, 20);
this.TextBoxTest.TabIndex = 20;
this.TextBoxTest.Text = "Hello, how are you?";
//
// buttonTestVoice
//
this.buttonTestVoice.Location = new System.Drawing.Point(17, 158);
@ -165,6 +158,81 @@
this.labelVoice.TabIndex = 16;
this.labelVoice.Text = "Voice";
//
// listViewActors
//
this.listViewActors.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.listViewActors.Columns.AddRange(new System.Windows.Forms.ColumnHeader[] {
this.columnHeaderActor,
this.columnHeaderVoice});
this.listViewActors.ContextMenuStrip = this.contextMenuStripActors;
this.listViewActors.FullRowSelect = true;
this.listViewActors.GridLines = true;
this.listViewActors.HideSelection = false;
this.listViewActors.Location = new System.Drawing.Point(412, 42);
this.listViewActors.Name = "listViewActors";
this.listViewActors.Size = new System.Drawing.Size(430, 375);
this.listViewActors.TabIndex = 40;
this.listViewActors.UseCompatibleStateImageBehavior = false;
this.listViewActors.View = System.Windows.Forms.View.Details;
//
// columnHeaderActor
//
this.columnHeaderActor.Text = "Actor";
this.columnHeaderActor.Width = 200;
//
// columnHeaderVoice
//
this.columnHeaderVoice.Text = "Voice";
this.columnHeaderVoice.Width = 200;
//
// contextMenuStripActors
//
this.contextMenuStripActors.Name = "contextMenuStripActors";
this.contextMenuStripActors.Size = new System.Drawing.Size(61, 4);
//
// labelActors
//
this.labelActors.AutoSize = true;
this.labelActors.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.labelActors.Location = new System.Drawing.Point(412, 20);
this.labelActors.Name = "labelActors";
this.labelActors.Size = new System.Drawing.Size(170, 13);
this.labelActors.TabIndex = 19;
this.labelActors.Text = "Right-click to assign actor to voice";
//
// labelApiKey
//
this.labelApiKey.AutoSize = true;
this.labelApiKey.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.labelApiKey.Location = new System.Drawing.Point(20, 242);
this.labelApiKey.Name = "labelApiKey";
this.labelApiKey.Size = new System.Drawing.Size(44, 13);
this.labelApiKey.TabIndex = 28;
this.labelApiKey.Text = "API key";
//
// nikseTextBoxApiKey
//
this.nikseTextBoxApiKey.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.nikseTextBoxApiKey.FocusedColor = System.Drawing.Color.FromArgb(((int)(((byte)(0)))), ((int)(((byte)(120)))), ((int)(((byte)(215)))));
this.nikseTextBoxApiKey.Location = new System.Drawing.Point(17, 258);
this.nikseTextBoxApiKey.Name = "nikseTextBoxApiKey";
this.nikseTextBoxApiKey.Size = new System.Drawing.Size(351, 20);
this.nikseTextBoxApiKey.TabIndex = 27;
//
// TextBoxTest
//
this.TextBoxTest.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.TextBoxTest.FocusedColor = System.Drawing.Color.FromArgb(((int)(((byte)(0)))), ((int)(((byte)(120)))), ((int)(((byte)(215)))));
this.TextBoxTest.Location = new System.Drawing.Point(17, 187);
this.TextBoxTest.Name = "TextBoxTest";
this.TextBoxTest.Size = new System.Drawing.Size(351, 20);
this.TextBoxTest.TabIndex = 20;
this.TextBoxTest.Text = "Hello, how are you?";
//
// nikseComboBoxVoice
//
this.nikseComboBoxVoice.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
@ -218,50 +286,6 @@
this.nikseComboBoxEngine.UsePopupWindow = false;
this.nikseComboBoxEngine.SelectedIndexChanged += new System.EventHandler(this.nikseComboBoxEngine_SelectedIndexChanged);
//
// listViewActors
//
this.listViewActors.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.listViewActors.Columns.AddRange(new System.Windows.Forms.ColumnHeader[] {
this.columnHeaderActor,
this.columnHeaderVoice});
this.listViewActors.ContextMenuStrip = this.contextMenuStripActors;
this.listViewActors.FullRowSelect = true;
this.listViewActors.GridLines = true;
this.listViewActors.HideSelection = false;
this.listViewActors.Location = new System.Drawing.Point(412, 42);
this.listViewActors.Name = "listViewActors";
this.listViewActors.Size = new System.Drawing.Size(430, 375);
this.listViewActors.TabIndex = 40;
this.listViewActors.UseCompatibleStateImageBehavior = false;
this.listViewActors.View = System.Windows.Forms.View.Details;
//
// columnHeaderActor
//
this.columnHeaderActor.Text = "Actor";
this.columnHeaderActor.Width = 200;
//
// columnHeaderVoice
//
this.columnHeaderVoice.Text = "Voice";
this.columnHeaderVoice.Width = 200;
//
// contextMenuStripActors
//
this.contextMenuStripActors.Name = "contextMenuStripActors";
this.contextMenuStripActors.Size = new System.Drawing.Size(61, 4);
//
// labelActors
//
this.labelActors.AutoSize = true;
this.labelActors.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.labelActors.Location = new System.Drawing.Point(412, 20);
this.labelActors.Name = "labelActors";
this.labelActors.Size = new System.Drawing.Size(170, 13);
this.labelActors.TabIndex = 19;
this.labelActors.Text = "Right-click to assign actor to voice";
//
// TextToSpeech
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
@ -281,6 +305,7 @@
this.ShowInTaskbar = false;
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
this.Text = "Text to speech";
this.FormClosing += new System.Windows.Forms.FormClosingEventHandler(this.TextToSpeech_FormClosing);
this.Load += new System.EventHandler(this.TextToSpeech_Load);
this.ResizeEnd += new System.EventHandler(this.TextToSpeech_ResizeEnd);
this.SizeChanged += new System.EventHandler(this.TextToSpeech_SizeChanged);
@ -309,5 +334,7 @@
private System.Windows.Forms.Button buttonTestVoice;
private Controls.NikseTextBox TextBoxTest;
private System.Windows.Forms.Label labelActors;
private System.Windows.Forms.Label labelApiKey;
private Controls.NikseTextBox nikseTextBoxApiKey;
}
}

View File

@ -9,6 +9,8 @@ using System.Drawing;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
using MessageBox = Nikse.SubtitleEdit.Forms.SeMsgBox.MessageBox;
@ -37,21 +39,25 @@ namespace Nikse.SubtitleEdit.Forms.Tts
public class TextToSpeechEngine
{
public string Id { get; set; }
public TextToSpeechEngineId Id { get; set; }
public string Name { get; set; }
public int Index { get; set; }
public TextToSpeechEngine(string id, string name, int index)
public TextToSpeechEngine(TextToSpeechEngineId id, string name, int index)
{
Id = id;
Name = name;
Index = index;
}
}
public static string IdPiper = "Piper";
public static string IdTortoise = "Tortoise";
public static string IdCoqui = "coqui";
public static string IdSpeechSynthesizer = "SpeechSynthesizer";
public enum TextToSpeechEngineId
{
Piper,
Tortoise,
Coqui,
MsSpeechSynthesizer,
ElevenLabs,
}
public TextToSpeech(Subtitle subtitle, SubtitleFormat subtitleFormat, string videoFileName, VideoInfo videoInfo)
@ -73,6 +79,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
Text = LanguageSettings.Current.TextToSpeech.Title;
labelVoice.Text = LanguageSettings.Current.TextToSpeech.Voice;
labelApiKey.Text = LanguageSettings.Current.VobSubOcr.ApiKey;
buttonTestVoice.Text = LanguageSettings.Current.TextToSpeech.TestVoice;
labelActors.Text = LanguageSettings.Current.TextToSpeech.ActorInfo;
checkBoxAddToVideoFile.Text = LanguageSettings.Current.TextToSpeech.AddAudioToVideo;
@ -84,13 +91,14 @@ namespace Nikse.SubtitleEdit.Forms.Tts
labelProgress.Text = string.Empty;
_engines = new List<TextToSpeechEngine>();
_engines.Add(new TextToSpeechEngine(TextToSpeechEngine.IdPiper, "Piper (fast/good)", _engines.Count));
_engines.Add(new TextToSpeechEngine(TextToSpeechEngine.IdTortoise, "Tortoise TTS (very slow/very good)", _engines.Count));
_engines.Add(new TextToSpeechEngine(TextToSpeechEngine.IdCoqui, "coqui TTS", _engines.Count));
_engines.Add(new TextToSpeechEngine(TextToSpeechEngineId.Piper, "Piper (fast/good)", _engines.Count));
_engines.Add(new TextToSpeechEngine(TextToSpeechEngineId.Tortoise, "Tortoise TTS (very slow/very good)", _engines.Count));
_engines.Add(new TextToSpeechEngine(TextToSpeechEngineId.Coqui, "coqui TTS", _engines.Count));
if (Configuration.IsRunningOnWindows)
{
_engines.Add(new TextToSpeechEngine(TextToSpeechEngine.IdSpeechSynthesizer, "Microsoft SpeechSynthesizer (very fast/robotic)", _engines.Count));
_engines.Add(new TextToSpeechEngine(TextToSpeechEngineId.MsSpeechSynthesizer, "Microsoft SpeechSynthesizer (very fast/robotic)", _engines.Count));
}
_engines.Add(new TextToSpeechEngine(TextToSpeechEngineId.ElevenLabs, "ElevenLabs TTS", _engines.Count));
_actorAndVoices = new List<ActorAndVoice>();
nikseComboBoxEngine.DropDownStyle = ComboBoxStyle.DropDownList;
@ -98,9 +106,16 @@ namespace Nikse.SubtitleEdit.Forms.Tts
foreach (var engine in _engines)
{
nikseComboBoxEngine.Items.Add(engine.Name);
if (Configuration.Settings.Tools.TextToSpeechEngine == engine.Id.ToString())
{
nikseComboBoxEngine.SelectedIndex = nikseComboBoxEngine.Items.Count - 1;
}
}
nikseComboBoxEngine.SelectedIndex = 0;
if (nikseComboBoxEngine.SelectedIndex < 0)
{
nikseComboBoxEngine.SelectedIndex = 0;
}
labelActors.Visible = false;
listViewActors.Visible = false;
@ -224,25 +239,34 @@ namespace Nikse.SubtitleEdit.Forms.Tts
private async Task<bool> GenerateParagraphAudio(Subtitle subtitle, bool showProgressBar, string overrideFileName)
{
var engine = _engines.First(p => p.Index == nikseComboBoxEngine.SelectedIndex);
if (engine.Id == TextToSpeechEngine.IdSpeechSynthesizer)
if (engine.Id == TextToSpeechEngineId.MsSpeechSynthesizer)
{
GenerateParagraphAudioMs(subtitle, showProgressBar, overrideFileName);
return true;
}
else if (engine.Id == TextToSpeechEngine.IdPiper)
if (engine.Id == TextToSpeechEngineId.Piper)
{
return GenerateParagraphAudioPiperTts(subtitle, showProgressBar, overrideFileName);
}
else if (engine.Id == TextToSpeechEngine.IdTortoise)
if (engine.Id == TextToSpeechEngineId.Tortoise)
{
return GenerateParagraphAudioTortoiseTts(subtitle, showProgressBar, overrideFileName);
}
else if (engine.Id == TextToSpeechEngine.IdCoqui)
if (engine.Id == TextToSpeechEngineId.Coqui)
{
var result = await GenerateParagraphAudioCoqui(subtitle, showProgressBar, overrideFileName);
return result;
}
if (engine.Id == TextToSpeechEngineId.ElevenLabs)
{
var result = await GenerateParagraphAudioElevenLabs(subtitle, showProgressBar, overrideFileName);
return result;
}
return false;
}
@ -299,6 +323,10 @@ namespace Nikse.SubtitleEdit.Forms.Tts
var p = _subtitle.Paragraphs[index];
var next = _subtitle.GetParagraphOrDefault(index + 1);
var pFileName = Path.Combine(_waveFolder, index + ".wav");
if (!File.Exists(pFileName))
{
pFileName = Path.Combine(_waveFolder, index + ".mp3");
}
var outputFileName1 = Path.Combine(_waveFolder, index + "_u.wav");
var trimProcess = VideoPreviewGenerator.TrimSilenceStartAndEnd(pFileName, outputFileName1);
@ -733,6 +761,77 @@ namespace Nikse.SubtitleEdit.Forms.Tts
return true;
}
private async Task<bool> GenerateParagraphAudioElevenLabs(Subtitle subtitle, bool showProgressBar, string overrideFileName)
{
if (string.IsNullOrWhiteSpace(nikseTextBoxApiKey.Text))
{
MessageBox.Show("Please add API key");
nikseTextBoxApiKey.Focus();
return false;
}
var httpClient = new HttpClient();
//httpClient.BaseAddress = new Uri("https://api.elevenlabs.io/v1/text-to-speech/");
httpClient.DefaultRequestHeaders.TryAddWithoutValidation("Content-Type", "application/json");
httpClient.DefaultRequestHeaders.TryAddWithoutValidation("accept", "audio/mpeg");
httpClient.DefaultRequestHeaders.TryAddWithoutValidation("xi-api-key", nikseTextBoxApiKey.Text.Trim());
progressBar1.Value = 0;
progressBar1.Maximum = subtitle.Paragraphs.Count;
progressBar1.Visible = showProgressBar;
var voices = ElevelLabModels.GetVoices();
var v = nikseComboBoxVoice.Text;
for (var index = 0; index < subtitle.Paragraphs.Count; index++)
{
if (showProgressBar)
{
progressBar1.Value = index + 1;
labelProgress.Text = string.Format(LanguageSettings.Current.TextToSpeech.GeneratingSpeechFromTextXOfY, index + 1, subtitle.Paragraphs.Count);
}
var p = subtitle.Paragraphs[index];
var outputFileName = Path.Combine(_waveFolder, string.IsNullOrEmpty(overrideFileName) ? index + ".mp3" : overrideFileName.Replace(".wav", ".mp3"));
if (_actorAndVoices.Count > 0 && !string.IsNullOrEmpty(p.Actor))
{
var f = _actorAndVoices.FirstOrDefault(x => x.Actor == p.Actor);
if (f != null && !string.IsNullOrEmpty(f.Voice))
{
v = f.Voice;
}
}
var voice = voices.First(x => x.ToString() == v);
var url = "https://api.elevenlabs.io/v1/text-to-speech/" + voice.Model;
var data = "{ \"text\": \"" + Json.EncodeJsonText(p.Text) + "\", \"model_id\": \"eleven_monolingual_v1\", \"voice_settings\": { \"stability\": 0.5, \"similarity_boost\": 0.5 } }";
var result = await httpClient.PostAsync(url, new StringContent(data, Encoding.UTF8), CancellationToken.None);
var bytes = await result.Content.ReadAsByteArrayAsync();
if (!result.IsSuccessStatusCode)
{
var error = Encoding.UTF8.GetString(bytes).Trim();
SeLogger.Error($"ElevenLabs TTS failed calling API as base address {httpClient.BaseAddress} : Status code={result.StatusCode} {error}" + Environment.NewLine + "Data=" + data);
MessageBox.Show("Calling url: " + url + Environment.NewLine + "With: " + data + Environment.NewLine + Environment.NewLine + "Error: " + error);
return false;
}
File.WriteAllBytes(outputFileName, bytes);
progressBar1.Refresh();
labelProgress.Refresh();
Application.DoEvents();
}
progressBar1.Visible = false;
labelProgress.Text = string.Empty;
return true;
}
private void buttonOK_Click(object sender, EventArgs e)
{
DialogResult = DialogResult.OK;
@ -742,6 +841,9 @@ namespace Nikse.SubtitleEdit.Forms.Tts
{
nikseComboBoxVoice.Items.Clear();
labelApiKey.Visible = false;
nikseTextBoxApiKey.Visible = false;
labelVoice.Text = LanguageSettings.Current.TextToSpeech.Voice;
if (SubtitleFormatHasActors() && _actors.Any())
{
@ -749,7 +851,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
}
var engine = _engines.First(p => p.Index == nikseComboBoxEngine.SelectedIndex);
if (engine.Id == TextToSpeechEngine.IdSpeechSynthesizer)
if (engine.Id == TextToSpeechEngineId.MsSpeechSynthesizer)
{
using (var synthesizer = new System.Speech.Synthesis.SpeechSynthesizer())
{
@ -763,7 +865,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
}
}
if (engine.Id == TextToSpeechEngine.IdPiper)
if (engine.Id == TextToSpeechEngineId.Piper)
{
foreach (var voice in PiperModels.GetVoices())
{
@ -771,7 +873,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
}
}
if (engine.Id == TextToSpeechEngine.IdTortoise)
if (engine.Id == TextToSpeechEngineId.Tortoise)
{
nikseComboBoxVoice.Items.Add("angie");
nikseComboBoxVoice.Items.Add("applejack");
@ -796,12 +898,25 @@ namespace Nikse.SubtitleEdit.Forms.Tts
nikseComboBoxVoice.Items.Add("william");
}
if (engine.Id == TextToSpeechEngine.IdCoqui)
if (engine.Id == TextToSpeechEngineId.Coqui)
{
labelVoice.Text = LanguageSettings.Current.General.WebServiceUrl;
nikseComboBoxVoice.Items.Add("http://localhost:5002/api/tts");
}
if (engine.Id == TextToSpeechEngineId.ElevenLabs)
{
nikseTextBoxApiKey.Text = Configuration.Settings.Tools.TextToSpeechElevenLabsApiKey;
labelApiKey.Visible = true;
nikseTextBoxApiKey.Visible = true;
foreach (var voice in ElevelLabModels.GetVoices())
{
nikseComboBoxVoice.Items.Add(voice.ToString());
}
}
if (nikseComboBoxVoice.Items.Count > 0)
{
nikseComboBoxVoice.SelectedIndex = 0;
@ -828,7 +943,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
contextMenuStripActors.Items.Clear();
if (engine.Id == TextToSpeechEngine.IdPiper)
if (engine.Id == TextToSpeechEngineId.Piper)
{
var voices = PiperModels.GetVoices();
foreach (var voiceLanguage in voices
@ -871,6 +986,49 @@ namespace Nikse.SubtitleEdit.Forms.Tts
}
}
}
else if (engine.Id == TextToSpeechEngineId.ElevenLabs)
{
var voices = ElevelLabModels.GetVoices();
foreach (var voiceLanguage in voices
.GroupBy(p => p.Language)
.OrderBy(p => p.Key))
{
if (voiceLanguage.Count() == 1)
{
var voice = voiceLanguage.First();
var tsi = new ToolStripMenuItem();
tsi.Tag = new ActorAndVoice { Voice = voice.Voice, VoiceIndex = voices.IndexOf(voice) };
tsi.Text = voice.ToString();
tsi.Click += (x, args) =>
{
var a = (ActorAndVoice)(x as ToolStripItem).Tag;
SetActor(a);
};
contextMenuStripActors.Items.Add(tsi);
}
else
{
var parent = new ToolStripMenuItem();
parent.Text = voiceLanguage.Key;
contextMenuStripActors.Items.Add(parent);
foreach (var voice in voiceLanguage.OrderBy(p => p.Voice).ToList())
{
var tsi = new ToolStripMenuItem();
tsi.Tag = new ActorAndVoice { Voice = voice.Voice, VoiceIndex = voices.IndexOf(voice) };
tsi.Text = voice.Voice + " (" + voice.Gender + ")";
tsi.Click += (x, args) =>
{
var a = (ActorAndVoice)(x as ToolStripItem).Tag;
SetActor(a);
};
parent.DropDownItems.Add(tsi);
}
DarkTheme.SetDarkTheme(parent);
}
}
}
else
{
for (var index = 0; index < nikseComboBoxVoice.Items.Count; index++)
@ -934,7 +1092,13 @@ namespace Nikse.SubtitleEdit.Forms.Tts
var sub = new Subtitle();
sub.Paragraphs.Add(new Paragraph(text, 0, 2500));
var waveFileNameOnly = Guid.NewGuid() + ".wav";
await GenerateParagraphAudio(sub, false, waveFileNameOnly);
var ok = await GenerateParagraphAudio(sub, false, waveFileNameOnly);
if (!ok)
{
MessageBox.Show("Ups, voice generation failed!");
return;
}
var waveFileName = Path.Combine(_waveFolder, waveFileNameOnly);
using (var soundPlayer = new System.Media.SoundPlayer(waveFileName))
{
@ -962,5 +1126,17 @@ namespace Nikse.SubtitleEdit.Forms.Tts
TaskDelayHelper.RunDelayed(TimeSpan.FromSeconds(1), () => buttonTestVoice.Enabled = true);
}
}
private void TextToSpeech_FormClosing(object sender, FormClosingEventArgs e)
{
var engine = _engines.First(p => p.Index == nikseComboBoxEngine.SelectedIndex);
if (engine.Id == TextToSpeechEngineId.ElevenLabs)
{
Configuration.Settings.Tools.TextToSpeechElevenLabsApiKey = nikseTextBoxApiKey.Text;
}
Configuration.Settings.Tools.TextToSpeechEngine = engine.Id.ToString();
}
}
}