Work on tts

This commit is contained in:
Nikolaj Olsson 2024-04-15 22:52:18 +02:00
parent 3a68c56174
commit 90d5d4df0e
5 changed files with 95 additions and 80 deletions

View File

@ -1,76 +0,0 @@
using System.Collections.Generic;
namespace Nikse.SubtitleEdit.Core.TextToSpeech
{
public class ElevelLabModels
{
public string Voice { get; set; }
public string Language { get; set; }
public string Gender { get; set; }
public string Model { get; set; }
public override string ToString()
{
return $"{Language} - {Voice} ({Gender})";
}
public ElevelLabModels(string language, string voice, string gender, string description, string useCase, string accent, string voiceId)
{
Voice = voice;
Language = accent;
Gender = Gender;
Model = voiceId;
}
public static List<ElevelLabModels> GetVoices()
{
var models = new List<ElevelLabModels>
{
new ElevelLabModels("English", "Adam", "Male", "Deep", "Narration", "American English", "pNInz6obpgDQGcFmaJgB"),
new ElevelLabModels("English", "Charlie", "Male", "Casual", "Conversational", "Australian English", "IKne3meq5aSn9XLyUdCD"),
new ElevelLabModels("English", "Clyde", "Male", "War veteran", "Video games", "American English", "2EiwWnXFnvU5JabPnv8n"),
new ElevelLabModels("English", "Dorothy", "Female", "Pleasant", "Childrens stories", "British English", "ThT5KcBeYPX3keUQqHPh"),
new ElevelLabModels("English", "Freya", "Female", "Overhyped", "Video games", "American English", "jsCqWAovK2LkecY7zXl4"),
new ElevelLabModels("English", "Gigi", "Female", "Childlish", "Animation", "American English", "jBpfuIE2acCO8z3wKNLl"),
new ElevelLabModels("English", "Harry", "Male", "Anxious", "Video games", "American English", "SOYHLrjzK2X1ezoPC6cr"),
new ElevelLabModels("English", "James", "Male", "Calm", "News", "Australian English", "ZQe5CZNOzWyzPSCn5a3c"),
new ElevelLabModels("English", "Lily", "Female", "Raspy", "Narration", "British English", "pFZP5JQG7iQjIQuC4Bku"),
new ElevelLabModels("English", "Rachel", "Female", "Calm", "Narration", "American English", "21m00Tcm4TlvDq8ikWAM"),
new ElevelLabModels("Spanish", "Dorothy", "Female", "Pleasant", "News", "Chilean Spanish", "ThT5KcBeYPX3keUQqHPh"),
new ElevelLabModels("Spanish", "Glinda", "Female", "Witch", "Video games", "Mexican Spanish", "z9fAnlkpzviPz146aGWa"),
new ElevelLabModels("Spanish", "Grace", "Female", "gentle", "Audiobook", "Mexican Spanish", "oWAxZDx7w5VEj9dCyTzz"),
new ElevelLabModels("Spanish", "Matilda", "Female", "Warm", "Audiobook", "Chilean Spanish", "XrExE9yKIg1WjnnlVkGX"),
new ElevelLabModels("German", "Sarah", "Female", "Soft", "News", "Germany German", "EXAVITQu4vr4xnSDxMaL"),
new ElevelLabModels("German", "Serena", "Female", "Pleasant", "Interactive", "Germany German", "pMsXgVXv3BLzUgSXRplE"),
new ElevelLabModels("German", "Matilda", "Female", "Warm", "Audiobook", "Germany German", "XrExE9yKIg1WjnnlVkGX"),
new ElevelLabModels("German", "Freya", "Female", "Overhyped", "Video games", "Germany German", "jsCqWAovK2LkecY7zXl4"),
new ElevelLabModels("German", "Adam", "Male", "Deep", "Narration", "Germany German", "pNInz6obpgDQGcFmaJgB"),
new ElevelLabModels("German", "Antoni", "Male", "Well-rounded", "Narration", "Germany German", "ErXwobaYiN019PkySvjV"),
new ElevelLabModels("French", "Adam", "Male", "Deep", "Narration", "Canadian French", "pNInz6obpgDQGcFmaJgB"),
new ElevelLabModels("French", "Antoni", "Male", "Well-rounded", "Narration", "Canadian French", "ErXwobaYiN019PkySvjV"),
new ElevelLabModels("French", "Arnold", "Male", "Crisp", "Narration", "Canadian French", "VR6AewLTigWG4xSOukaG"),
new ElevelLabModels("French", "Bill", "Male", "Strong", "documentary", "Canadian French", "pqHfZKP75CvOlQylNhV4"),
new ElevelLabModels("French", "George", "Male", "Raspy", "Narration", "Canadian French", "JBFqnCBsd6RMkjVDRZzb"),
new ElevelLabModels("French", "Charlotte", "Female", "Seductive", "Video games", "Canadian French", "XB0fDUnXU5powFXDhCwa"),
new ElevelLabModels("French", "Domi", "Female", "Strong", "Narration", "Canadian French", "AZnzlk1XvdvUeBnXmlld"),
new ElevelLabModels("French", "Dorothy", "Female", "Pleasant", "Childrens stories", "Canadian French", "ThT5KcBeYPX3keUQqHPh"),
new ElevelLabModels("French", "Serena", "Female", "Pleasant", "Interactive", "Canadian French", "pMsXgVXv3BLzUgSXRplE"),
new ElevelLabModels("French", "Sarah", "Female", "Soft", "News", "Canadian French", "EXAVITQu4vr4xnSDxMaL"),
new ElevelLabModels("Polish", "Adam", "Male", "Deep", "Narration", "Poland Polish", "pNInz6obpgDQGcFmaJgB"),
new ElevelLabModels("Polish", "Charlie", "Male", "Casual", "Conversational", "Poland Polish", "IKne3meq5aSn9XLyUdCD"),
new ElevelLabModels("Polish", "Clyde", "Male", "War veteran", "video games", "Poland Polish", "2EiwWnXFnvU5JabPnv8n"),
new ElevelLabModels("Polish", "Dorothy", "Female", "Pleasant", "Childrens stories", "Poland Polish", "ThT5KcBeYPX3keUQqHPh"),
new ElevelLabModels("Polish", "Gigi", "Female", "Childlish", "Animation", "Poland Polish", "jBpfuIE2acCO8z3wKNLl"),
new ElevelLabModels("Polish", "Harry", "Male", "Anxious", "Video games", "Poland Polish", "SOYHLrjzK2X1ezoPC6cr"),
new ElevelLabModels("Italian", "Adam", "Male", "Deep", "Narration", "Italy Italian", "pNInz6obpgDQGcFmaJgB"),
new ElevelLabModels("Italian", "Charlie", "Male", "Casual", "Conversational", "Italy Italian", "IKne3meq5aSn9XLyUdCD"),
new ElevelLabModels("Italian", "Clyde", "Male", "War veteran", "Video games", "Italy Italian", "2EiwWnXFnvU5JabPnv8n"),
new ElevelLabModels("Italian", "Dorothy", "Female", "Pleasant", "Childrens stories", "Italy Italian", "ThT5KcBeYPX3keUQqHPh"),
new ElevelLabModels("Italian", "Gigi", "Female", "Childlish", "Animation", "Italy Italian", "jBpfuIE2acCO8z3wKNLl"),
new ElevelLabModels("Italian", "Harry", "Male", "Anxious", "Video games", "Italy Italian", "SOYHLrjzK2X1ezoPC6cr"),
};
return models;
}
}
}

View File

@ -0,0 +1,23 @@
namespace Nikse.SubtitleEdit.Core.TextToSpeech
{
public class ElevenLabModel
{
public string Voice { get; set; }
public string Language { get; set; }
public string Gender { get; set; }
public string Model { get; set; }
public override string ToString()
{
return $"{Language} - {Voice} ({Gender})";
}
public ElevenLabModel(string language, string voice, string gender, string description, string useCase, string accent, string voiceId)
{
Voice = voice;
Language = accent;
Gender = gender;
Model = voiceId;
}
}
}

View File

@ -29,6 +29,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
private bool _abort;
private readonly List<string> _actors;
private readonly List<TextToSpeechEngine> _engines;
private readonly List<ElevenLabModel> _elevelLabVoices;
public class ActorAndVoice
{
@ -71,7 +72,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
_subtitleFormat = subtitleFormat;
_videoFileName = videoFileName;
_videoInfo = videoInfo;
_elevelLabVoices = new List<ElevenLabModel>();
_actors = _subtitle.Paragraphs
.Where(p => !string.IsNullOrEmpty(p.Actor))
.Select(p => p.Actor)
@ -782,7 +783,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
progressBar1.Maximum = subtitle.Paragraphs.Count;
progressBar1.Visible = showProgressBar;
var voices = ElevelLabModels.GetVoices();
var voices = _elevelLabVoices;
var v = nikseComboBoxVoice.Text;
for (var index = 0; index < subtitle.Paragraphs.Count; index++)
@ -914,7 +915,12 @@ namespace Nikse.SubtitleEdit.Forms.Tts
labelApiKey.Visible = true;
nikseTextBoxApiKey.Visible = true;
foreach (var voice in ElevelLabModels.GetVoices())
if (_elevelLabVoices.Count == 0)
{
_elevelLabVoices.AddRange(GetElevelLabVoices());
}
foreach (var voice in _elevelLabVoices)
{
nikseComboBoxVoice.Items.Add(voice.ToString());
}
@ -991,7 +997,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
}
else if (engine.Id == TextToSpeechEngineId.ElevenLabs)
{
var voices = ElevelLabModels.GetVoices();
var voices = _elevelLabVoices;
foreach (var voiceLanguage in voices
.GroupBy(p => p.Language)
.OrderBy(p => p.Key))
@ -1056,6 +1062,67 @@ namespace Nikse.SubtitleEdit.Forms.Tts
}
}
private List<ElevenLabModel> GetElevelLabVoices()
{
var ttsPath = Path.Combine(Configuration.DataDirectory, "TextToSpeech");
if (!Directory.Exists(ttsPath))
{
Directory.CreateDirectory(ttsPath);
}
var elevelLabsPath = Path.Combine(ttsPath, "ElevenLabs");
if (!Directory.Exists(elevelLabsPath))
{
Directory.CreateDirectory(elevelLabsPath);
}
var result = new List<ElevenLabModel>();
var jsonFileName = Path.Combine(elevelLabsPath, "eleven-labs-voices.json");
if (!File.Exists(jsonFileName))
{
var asm = System.Reflection.Assembly.GetExecutingAssembly();
var stream = asm.GetManifestResourceStream("Nikse.SubtitleEdit.Resources.eleven-labs-voices.zip");
if (stream != null)
{
using (var zip = ZipExtractor.Open(stream))
{
var dir = zip.ReadCentralDir();
foreach (var entry in dir)
{
var fileName = Path.GetFileName(entry.FilenameInZip);
if (!string.IsNullOrEmpty(fileName))
{
var name = entry.FilenameInZip;
var path = Path.Combine(elevelLabsPath, name.Replace('/', Path.DirectorySeparatorChar));
zip.ExtractFile(entry, path);
}
}
}
}
}
if (File.Exists(jsonFileName))
{
var json = File.ReadAllText(jsonFileName);
var parser = new SeJsonParser();
var voices = parser.GetArrayElementsByName(json, "voices");
foreach (var voice in voices)
{
var name = parser.GetFirstObject(voice, "name");
var voiceId = parser.GetFirstObject(voice, "voice_id");
var gender = parser.GetFirstObject(voice, "gender");
var description = parser.GetFirstObject(voice, "description");
var accent = parser.GetFirstObject(voice, "accent");
var useCase = parser.GetFirstObject(voice, "use case");
result.Add(new ElevenLabModel(string.Empty, name, gender, description, useCase, accent, voiceId));
}
}
return result;
}
private bool SubtitleFormatHasActors()
{
var formatType = _subtitleFormat.GetType();

Binary file not shown.

View File

@ -2426,6 +2426,7 @@
<EmbeddedResource Include="Resources\HunspellDictionaries.xml.gz" />
<EmbeddedResource Include="Resources\HunspellBackupDictionaries.xml.gz" />
<EmbeddedResource Include="Resources\TesseractDictionaries.xml.gz" />
<EmbeddedResource Include="Resources\eleven-labs-voices.zip" />
<None Include="Resources\nOCR_TesseractHelper.xml.gz" />
<None Include="Resources\SMPTE-428-7-2007-DCST.xsd">
<SubType>Designer</SubType>