diff --git a/src/libse/TextToSpeech/ElevelLabModels.cs b/src/libse/TextToSpeech/ElevelLabModels.cs deleted file mode 100644 index 3736add62..000000000 --- a/src/libse/TextToSpeech/ElevelLabModels.cs +++ /dev/null @@ -1,76 +0,0 @@ -using System.Collections.Generic; - -namespace Nikse.SubtitleEdit.Core.TextToSpeech -{ - public class ElevelLabModels - { - public string Voice { get; set; } - public string Language { get; set; } - public string Gender { get; set; } - public string Model { get; set; } - - public override string ToString() - { - return $"{Language} - {Voice} ({Gender})"; - } - - public ElevelLabModels(string language, string voice, string gender, string description, string useCase, string accent, string voiceId) - { - Voice = voice; - Language = accent; - Gender = Gender; - Model = voiceId; - } - - public static List GetVoices() - { - var models = new List - { - new ElevelLabModels("English", "Adam", "Male", "Deep", "Narration", "American English", "pNInz6obpgDQGcFmaJgB"), - new ElevelLabModels("English", "Charlie", "Male", "Casual", "Conversational", "Australian English", "IKne3meq5aSn9XLyUdCD"), - new ElevelLabModels("English", "Clyde", "Male", "War veteran", "Video games", "American English", "2EiwWnXFnvU5JabPnv8n"), - new ElevelLabModels("English", "Dorothy", "Female", "Pleasant", "Children’s stories", "British English", "ThT5KcBeYPX3keUQqHPh"), - new ElevelLabModels("English", "Freya", "Female", "Overhyped", "Video games", "American English", "jsCqWAovK2LkecY7zXl4"), - new ElevelLabModels("English", "Gigi", "Female", "Childlish", "Animation", "American English", "jBpfuIE2acCO8z3wKNLl"), - new ElevelLabModels("English", "Harry", "Male", "Anxious", "Video games", "American English", "SOYHLrjzK2X1ezoPC6cr"), - new ElevelLabModels("English", "James", "Male", "Calm", "News", "Australian English", "ZQe5CZNOzWyzPSCn5a3c"), - new ElevelLabModels("English", "Lily", "Female", "Raspy", "Narration", "British English", "pFZP5JQG7iQjIQuC4Bku"), - new ElevelLabModels("English", "Rachel", "Female", "Calm", "Narration", "American English", "21m00Tcm4TlvDq8ikWAM"), - new ElevelLabModels("Spanish", "Dorothy", "Female", "Pleasant", "News", "Chilean Spanish", "ThT5KcBeYPX3keUQqHPh"), - new ElevelLabModels("Spanish", "Glinda", "Female", "Witch", "Video games", "Mexican Spanish", "z9fAnlkpzviPz146aGWa"), - new ElevelLabModels("Spanish", "Grace", "Female", "gentle", "Audiobook", "Mexican Spanish", "oWAxZDx7w5VEj9dCyTzz"), - new ElevelLabModels("Spanish", "Matilda", "Female", "Warm", "Audiobook", "Chilean Spanish", "XrExE9yKIg1WjnnlVkGX"), - new ElevelLabModels("German", "Sarah", "Female", "Soft", "News", "Germany German", "EXAVITQu4vr4xnSDxMaL"), - new ElevelLabModels("German", "Serena", "Female", "Pleasant", "Interactive", "Germany German", "pMsXgVXv3BLzUgSXRplE"), - new ElevelLabModels("German", "Matilda", "Female", "Warm", "Audiobook", "Germany German", "XrExE9yKIg1WjnnlVkGX"), - new ElevelLabModels("German", "Freya", "Female", "Overhyped", "Video games", "Germany German", "jsCqWAovK2LkecY7zXl4"), - new ElevelLabModels("German", "Adam", "Male", "Deep", "Narration", "Germany German", "pNInz6obpgDQGcFmaJgB"), - new ElevelLabModels("German", "Antoni", "Male", "Well-rounded", "Narration", "Germany German", "ErXwobaYiN019PkySvjV"), - new ElevelLabModels("French", "Adam", "Male", "Deep", "Narration", "Canadian French", "pNInz6obpgDQGcFmaJgB"), - new ElevelLabModels("French", "Antoni", "Male", "Well-rounded", "Narration", "Canadian French", "ErXwobaYiN019PkySvjV"), - new ElevelLabModels("French", "Arnold", "Male", "Crisp", "Narration", "Canadian French", "VR6AewLTigWG4xSOukaG"), - new ElevelLabModels("French", "Bill", "Male", "Strong", "documentary", "Canadian French", "pqHfZKP75CvOlQylNhV4"), - new ElevelLabModels("French", "George", "Male", "Raspy", "Narration", "Canadian French", "JBFqnCBsd6RMkjVDRZzb"), - new ElevelLabModels("French", "Charlotte", "Female", "Seductive", "Video games", "Canadian French", "XB0fDUnXU5powFXDhCwa"), - new ElevelLabModels("French", "Domi", "Female", "Strong", "Narration", "Canadian French", "AZnzlk1XvdvUeBnXmlld"), - new ElevelLabModels("French", "Dorothy", "Female", "Pleasant", "Children’s stories", "Canadian French", "ThT5KcBeYPX3keUQqHPh"), - new ElevelLabModels("French", "Serena", "Female", "Pleasant", "Interactive", "Canadian French", "pMsXgVXv3BLzUgSXRplE"), - new ElevelLabModels("French", "Sarah", "Female", "Soft", "News", "Canadian French", "EXAVITQu4vr4xnSDxMaL"), - new ElevelLabModels("Polish", "Adam", "Male", "Deep", "Narration", "Poland Polish", "pNInz6obpgDQGcFmaJgB"), - new ElevelLabModels("Polish", "Charlie", "Male", "Casual", "Conversational", "Poland Polish", "IKne3meq5aSn9XLyUdCD"), - new ElevelLabModels("Polish", "Clyde", "Male", "War veteran", "video games", "Poland Polish", "2EiwWnXFnvU5JabPnv8n"), - new ElevelLabModels("Polish", "Dorothy", "Female", "Pleasant", "Children’s stories", "Poland Polish", "ThT5KcBeYPX3keUQqHPh"), - new ElevelLabModels("Polish", "Gigi", "Female", "Childlish", "Animation", "Poland Polish", "jBpfuIE2acCO8z3wKNLl"), - new ElevelLabModels("Polish", "Harry", "Male", "Anxious", "Video games", "Poland Polish", "SOYHLrjzK2X1ezoPC6cr"), - new ElevelLabModels("Italian", "Adam", "Male", "Deep", "Narration", "Italy Italian", "pNInz6obpgDQGcFmaJgB"), - new ElevelLabModels("Italian", "Charlie", "Male", "Casual", "Conversational", "Italy Italian", "IKne3meq5aSn9XLyUdCD"), - new ElevelLabModels("Italian", "Clyde", "Male", "War veteran", "Video games", "Italy Italian", "2EiwWnXFnvU5JabPnv8n"), - new ElevelLabModels("Italian", "Dorothy", "Female", "Pleasant", "Children’s stories", "Italy Italian", "ThT5KcBeYPX3keUQqHPh"), - new ElevelLabModels("Italian", "Gigi", "Female", "Childlish", "Animation", "Italy Italian", "jBpfuIE2acCO8z3wKNLl"), - new ElevelLabModels("Italian", "Harry", "Male", "Anxious", "Video games", "Italy Italian", "SOYHLrjzK2X1ezoPC6cr"), - }; - - return models; - } - } -} \ No newline at end of file diff --git a/src/libse/TextToSpeech/ElevenLabModel.cs b/src/libse/TextToSpeech/ElevenLabModel.cs new file mode 100644 index 000000000..086b95ce1 --- /dev/null +++ b/src/libse/TextToSpeech/ElevenLabModel.cs @@ -0,0 +1,23 @@ +namespace Nikse.SubtitleEdit.Core.TextToSpeech +{ + public class ElevenLabModel + { + public string Voice { get; set; } + public string Language { get; set; } + public string Gender { get; set; } + public string Model { get; set; } + + public override string ToString() + { + return $"{Language} - {Voice} ({Gender})"; + } + + public ElevenLabModel(string language, string voice, string gender, string description, string useCase, string accent, string voiceId) + { + Voice = voice; + Language = accent; + Gender = gender; + Model = voiceId; + } + } +} \ No newline at end of file diff --git a/src/ui/Forms/Tts/TextToSpeech.cs b/src/ui/Forms/Tts/TextToSpeech.cs index 48e099db8..70c47eb94 100644 --- a/src/ui/Forms/Tts/TextToSpeech.cs +++ b/src/ui/Forms/Tts/TextToSpeech.cs @@ -29,6 +29,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts private bool _abort; private readonly List _actors; private readonly List _engines; + private readonly List _elevelLabVoices; public class ActorAndVoice { @@ -71,7 +72,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts _subtitleFormat = subtitleFormat; _videoFileName = videoFileName; _videoInfo = videoInfo; - + _elevelLabVoices = new List(); _actors = _subtitle.Paragraphs .Where(p => !string.IsNullOrEmpty(p.Actor)) .Select(p => p.Actor) @@ -782,7 +783,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts progressBar1.Maximum = subtitle.Paragraphs.Count; progressBar1.Visible = showProgressBar; - var voices = ElevelLabModels.GetVoices(); + var voices = _elevelLabVoices; var v = nikseComboBoxVoice.Text; for (var index = 0; index < subtitle.Paragraphs.Count; index++) @@ -914,7 +915,12 @@ namespace Nikse.SubtitleEdit.Forms.Tts labelApiKey.Visible = true; nikseTextBoxApiKey.Visible = true; - foreach (var voice in ElevelLabModels.GetVoices()) + if (_elevelLabVoices.Count == 0) + { + _elevelLabVoices.AddRange(GetElevelLabVoices()); + } + + foreach (var voice in _elevelLabVoices) { nikseComboBoxVoice.Items.Add(voice.ToString()); } @@ -991,7 +997,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts } else if (engine.Id == TextToSpeechEngineId.ElevenLabs) { - var voices = ElevelLabModels.GetVoices(); + var voices = _elevelLabVoices; foreach (var voiceLanguage in voices .GroupBy(p => p.Language) .OrderBy(p => p.Key)) @@ -1056,6 +1062,67 @@ namespace Nikse.SubtitleEdit.Forms.Tts } } + private List GetElevelLabVoices() + { + var ttsPath = Path.Combine(Configuration.DataDirectory, "TextToSpeech"); + if (!Directory.Exists(ttsPath)) + { + Directory.CreateDirectory(ttsPath); + } + + var elevelLabsPath = Path.Combine(ttsPath, "ElevenLabs"); + if (!Directory.Exists(elevelLabsPath)) + { + Directory.CreateDirectory(elevelLabsPath); + } + + var result = new List(); + + var jsonFileName = Path.Combine(elevelLabsPath, "eleven-labs-voices.json"); + + if (!File.Exists(jsonFileName)) + { + var asm = System.Reflection.Assembly.GetExecutingAssembly(); + var stream = asm.GetManifestResourceStream("Nikse.SubtitleEdit.Resources.eleven-labs-voices.zip"); + if (stream != null) + { + using (var zip = ZipExtractor.Open(stream)) + { + var dir = zip.ReadCentralDir(); + foreach (var entry in dir) + { + var fileName = Path.GetFileName(entry.FilenameInZip); + if (!string.IsNullOrEmpty(fileName)) + { + var name = entry.FilenameInZip; + var path = Path.Combine(elevelLabsPath, name.Replace('/', Path.DirectorySeparatorChar)); + zip.ExtractFile(entry, path); + } + } + } + } + } + + if (File.Exists(jsonFileName)) + { + var json = File.ReadAllText(jsonFileName); + var parser = new SeJsonParser(); + var voices = parser.GetArrayElementsByName(json, "voices"); + foreach (var voice in voices) + { + var name = parser.GetFirstObject(voice, "name"); + var voiceId = parser.GetFirstObject(voice, "voice_id"); + var gender = parser.GetFirstObject(voice, "gender"); + var description = parser.GetFirstObject(voice, "description"); + var accent = parser.GetFirstObject(voice, "accent"); + var useCase = parser.GetFirstObject(voice, "use case"); + result.Add(new ElevenLabModel(string.Empty, name, gender, description, useCase, accent, voiceId)); + } + } + + return result; + } + private bool SubtitleFormatHasActors() { var formatType = _subtitleFormat.GetType(); diff --git a/src/ui/Resources/eleven-labs-voices.zip b/src/ui/Resources/eleven-labs-voices.zip new file mode 100644 index 000000000..5be87fe35 Binary files /dev/null and b/src/ui/Resources/eleven-labs-voices.zip differ diff --git a/src/ui/SubtitleEdit.csproj b/src/ui/SubtitleEdit.csproj index 98cd1f9ea..adb7d4c66 100644 --- a/src/ui/SubtitleEdit.csproj +++ b/src/ui/SubtitleEdit.csproj @@ -2426,6 +2426,7 @@ + Designer