mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-24 20:22:41 +01:00
Work on tts
This commit is contained in:
parent
3a68c56174
commit
90d5d4df0e
@ -1,76 +0,0 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Core.TextToSpeech
|
||||
{
|
||||
public class ElevelLabModels
|
||||
{
|
||||
public string Voice { get; set; }
|
||||
public string Language { get; set; }
|
||||
public string Gender { get; set; }
|
||||
public string Model { get; set; }
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return $"{Language} - {Voice} ({Gender})";
|
||||
}
|
||||
|
||||
public ElevelLabModels(string language, string voice, string gender, string description, string useCase, string accent, string voiceId)
|
||||
{
|
||||
Voice = voice;
|
||||
Language = accent;
|
||||
Gender = Gender;
|
||||
Model = voiceId;
|
||||
}
|
||||
|
||||
public static List<ElevelLabModels> GetVoices()
|
||||
{
|
||||
var models = new List<ElevelLabModels>
|
||||
{
|
||||
new ElevelLabModels("English", "Adam", "Male", "Deep", "Narration", "American English", "pNInz6obpgDQGcFmaJgB"),
|
||||
new ElevelLabModels("English", "Charlie", "Male", "Casual", "Conversational", "Australian English", "IKne3meq5aSn9XLyUdCD"),
|
||||
new ElevelLabModels("English", "Clyde", "Male", "War veteran", "Video games", "American English", "2EiwWnXFnvU5JabPnv8n"),
|
||||
new ElevelLabModels("English", "Dorothy", "Female", "Pleasant", "Children’s stories", "British English", "ThT5KcBeYPX3keUQqHPh"),
|
||||
new ElevelLabModels("English", "Freya", "Female", "Overhyped", "Video games", "American English", "jsCqWAovK2LkecY7zXl4"),
|
||||
new ElevelLabModels("English", "Gigi", "Female", "Childlish", "Animation", "American English", "jBpfuIE2acCO8z3wKNLl"),
|
||||
new ElevelLabModels("English", "Harry", "Male", "Anxious", "Video games", "American English", "SOYHLrjzK2X1ezoPC6cr"),
|
||||
new ElevelLabModels("English", "James", "Male", "Calm", "News", "Australian English", "ZQe5CZNOzWyzPSCn5a3c"),
|
||||
new ElevelLabModels("English", "Lily", "Female", "Raspy", "Narration", "British English", "pFZP5JQG7iQjIQuC4Bku"),
|
||||
new ElevelLabModels("English", "Rachel", "Female", "Calm", "Narration", "American English", "21m00Tcm4TlvDq8ikWAM"),
|
||||
new ElevelLabModels("Spanish", "Dorothy", "Female", "Pleasant", "News", "Chilean Spanish", "ThT5KcBeYPX3keUQqHPh"),
|
||||
new ElevelLabModels("Spanish", "Glinda", "Female", "Witch", "Video games", "Mexican Spanish", "z9fAnlkpzviPz146aGWa"),
|
||||
new ElevelLabModels("Spanish", "Grace", "Female", "gentle", "Audiobook", "Mexican Spanish", "oWAxZDx7w5VEj9dCyTzz"),
|
||||
new ElevelLabModels("Spanish", "Matilda", "Female", "Warm", "Audiobook", "Chilean Spanish", "XrExE9yKIg1WjnnlVkGX"),
|
||||
new ElevelLabModels("German", "Sarah", "Female", "Soft", "News", "Germany German", "EXAVITQu4vr4xnSDxMaL"),
|
||||
new ElevelLabModels("German", "Serena", "Female", "Pleasant", "Interactive", "Germany German", "pMsXgVXv3BLzUgSXRplE"),
|
||||
new ElevelLabModels("German", "Matilda", "Female", "Warm", "Audiobook", "Germany German", "XrExE9yKIg1WjnnlVkGX"),
|
||||
new ElevelLabModels("German", "Freya", "Female", "Overhyped", "Video games", "Germany German", "jsCqWAovK2LkecY7zXl4"),
|
||||
new ElevelLabModels("German", "Adam", "Male", "Deep", "Narration", "Germany German", "pNInz6obpgDQGcFmaJgB"),
|
||||
new ElevelLabModels("German", "Antoni", "Male", "Well-rounded", "Narration", "Germany German", "ErXwobaYiN019PkySvjV"),
|
||||
new ElevelLabModels("French", "Adam", "Male", "Deep", "Narration", "Canadian French", "pNInz6obpgDQGcFmaJgB"),
|
||||
new ElevelLabModels("French", "Antoni", "Male", "Well-rounded", "Narration", "Canadian French", "ErXwobaYiN019PkySvjV"),
|
||||
new ElevelLabModels("French", "Arnold", "Male", "Crisp", "Narration", "Canadian French", "VR6AewLTigWG4xSOukaG"),
|
||||
new ElevelLabModels("French", "Bill", "Male", "Strong", "documentary", "Canadian French", "pqHfZKP75CvOlQylNhV4"),
|
||||
new ElevelLabModels("French", "George", "Male", "Raspy", "Narration", "Canadian French", "JBFqnCBsd6RMkjVDRZzb"),
|
||||
new ElevelLabModels("French", "Charlotte", "Female", "Seductive", "Video games", "Canadian French", "XB0fDUnXU5powFXDhCwa"),
|
||||
new ElevelLabModels("French", "Domi", "Female", "Strong", "Narration", "Canadian French", "AZnzlk1XvdvUeBnXmlld"),
|
||||
new ElevelLabModels("French", "Dorothy", "Female", "Pleasant", "Children’s stories", "Canadian French", "ThT5KcBeYPX3keUQqHPh"),
|
||||
new ElevelLabModels("French", "Serena", "Female", "Pleasant", "Interactive", "Canadian French", "pMsXgVXv3BLzUgSXRplE"),
|
||||
new ElevelLabModels("French", "Sarah", "Female", "Soft", "News", "Canadian French", "EXAVITQu4vr4xnSDxMaL"),
|
||||
new ElevelLabModels("Polish", "Adam", "Male", "Deep", "Narration", "Poland Polish", "pNInz6obpgDQGcFmaJgB"),
|
||||
new ElevelLabModels("Polish", "Charlie", "Male", "Casual", "Conversational", "Poland Polish", "IKne3meq5aSn9XLyUdCD"),
|
||||
new ElevelLabModels("Polish", "Clyde", "Male", "War veteran", "video games", "Poland Polish", "2EiwWnXFnvU5JabPnv8n"),
|
||||
new ElevelLabModels("Polish", "Dorothy", "Female", "Pleasant", "Children’s stories", "Poland Polish", "ThT5KcBeYPX3keUQqHPh"),
|
||||
new ElevelLabModels("Polish", "Gigi", "Female", "Childlish", "Animation", "Poland Polish", "jBpfuIE2acCO8z3wKNLl"),
|
||||
new ElevelLabModels("Polish", "Harry", "Male", "Anxious", "Video games", "Poland Polish", "SOYHLrjzK2X1ezoPC6cr"),
|
||||
new ElevelLabModels("Italian", "Adam", "Male", "Deep", "Narration", "Italy Italian", "pNInz6obpgDQGcFmaJgB"),
|
||||
new ElevelLabModels("Italian", "Charlie", "Male", "Casual", "Conversational", "Italy Italian", "IKne3meq5aSn9XLyUdCD"),
|
||||
new ElevelLabModels("Italian", "Clyde", "Male", "War veteran", "Video games", "Italy Italian", "2EiwWnXFnvU5JabPnv8n"),
|
||||
new ElevelLabModels("Italian", "Dorothy", "Female", "Pleasant", "Children’s stories", "Italy Italian", "ThT5KcBeYPX3keUQqHPh"),
|
||||
new ElevelLabModels("Italian", "Gigi", "Female", "Childlish", "Animation", "Italy Italian", "jBpfuIE2acCO8z3wKNLl"),
|
||||
new ElevelLabModels("Italian", "Harry", "Male", "Anxious", "Video games", "Italy Italian", "SOYHLrjzK2X1ezoPC6cr"),
|
||||
};
|
||||
|
||||
return models;
|
||||
}
|
||||
}
|
||||
}
|
23
src/libse/TextToSpeech/ElevenLabModel.cs
Normal file
23
src/libse/TextToSpeech/ElevenLabModel.cs
Normal file
@ -0,0 +1,23 @@
|
||||
namespace Nikse.SubtitleEdit.Core.TextToSpeech
|
||||
{
|
||||
public class ElevenLabModel
|
||||
{
|
||||
public string Voice { get; set; }
|
||||
public string Language { get; set; }
|
||||
public string Gender { get; set; }
|
||||
public string Model { get; set; }
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return $"{Language} - {Voice} ({Gender})";
|
||||
}
|
||||
|
||||
public ElevenLabModel(string language, string voice, string gender, string description, string useCase, string accent, string voiceId)
|
||||
{
|
||||
Voice = voice;
|
||||
Language = accent;
|
||||
Gender = gender;
|
||||
Model = voiceId;
|
||||
}
|
||||
}
|
||||
}
|
@ -29,6 +29,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
|
||||
private bool _abort;
|
||||
private readonly List<string> _actors;
|
||||
private readonly List<TextToSpeechEngine> _engines;
|
||||
private readonly List<ElevenLabModel> _elevelLabVoices;
|
||||
|
||||
public class ActorAndVoice
|
||||
{
|
||||
@ -71,7 +72,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
|
||||
_subtitleFormat = subtitleFormat;
|
||||
_videoFileName = videoFileName;
|
||||
_videoInfo = videoInfo;
|
||||
|
||||
_elevelLabVoices = new List<ElevenLabModel>();
|
||||
_actors = _subtitle.Paragraphs
|
||||
.Where(p => !string.IsNullOrEmpty(p.Actor))
|
||||
.Select(p => p.Actor)
|
||||
@ -782,7 +783,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
|
||||
progressBar1.Maximum = subtitle.Paragraphs.Count;
|
||||
progressBar1.Visible = showProgressBar;
|
||||
|
||||
var voices = ElevelLabModels.GetVoices();
|
||||
var voices = _elevelLabVoices;
|
||||
var v = nikseComboBoxVoice.Text;
|
||||
|
||||
for (var index = 0; index < subtitle.Paragraphs.Count; index++)
|
||||
@ -914,7 +915,12 @@ namespace Nikse.SubtitleEdit.Forms.Tts
|
||||
labelApiKey.Visible = true;
|
||||
nikseTextBoxApiKey.Visible = true;
|
||||
|
||||
foreach (var voice in ElevelLabModels.GetVoices())
|
||||
if (_elevelLabVoices.Count == 0)
|
||||
{
|
||||
_elevelLabVoices.AddRange(GetElevelLabVoices());
|
||||
}
|
||||
|
||||
foreach (var voice in _elevelLabVoices)
|
||||
{
|
||||
nikseComboBoxVoice.Items.Add(voice.ToString());
|
||||
}
|
||||
@ -991,7 +997,7 @@ namespace Nikse.SubtitleEdit.Forms.Tts
|
||||
}
|
||||
else if (engine.Id == TextToSpeechEngineId.ElevenLabs)
|
||||
{
|
||||
var voices = ElevelLabModels.GetVoices();
|
||||
var voices = _elevelLabVoices;
|
||||
foreach (var voiceLanguage in voices
|
||||
.GroupBy(p => p.Language)
|
||||
.OrderBy(p => p.Key))
|
||||
@ -1056,6 +1062,67 @@ namespace Nikse.SubtitleEdit.Forms.Tts
|
||||
}
|
||||
}
|
||||
|
||||
private List<ElevenLabModel> GetElevelLabVoices()
|
||||
{
|
||||
var ttsPath = Path.Combine(Configuration.DataDirectory, "TextToSpeech");
|
||||
if (!Directory.Exists(ttsPath))
|
||||
{
|
||||
Directory.CreateDirectory(ttsPath);
|
||||
}
|
||||
|
||||
var elevelLabsPath = Path.Combine(ttsPath, "ElevenLabs");
|
||||
if (!Directory.Exists(elevelLabsPath))
|
||||
{
|
||||
Directory.CreateDirectory(elevelLabsPath);
|
||||
}
|
||||
|
||||
var result = new List<ElevenLabModel>();
|
||||
|
||||
var jsonFileName = Path.Combine(elevelLabsPath, "eleven-labs-voices.json");
|
||||
|
||||
if (!File.Exists(jsonFileName))
|
||||
{
|
||||
var asm = System.Reflection.Assembly.GetExecutingAssembly();
|
||||
var stream = asm.GetManifestResourceStream("Nikse.SubtitleEdit.Resources.eleven-labs-voices.zip");
|
||||
if (stream != null)
|
||||
{
|
||||
using (var zip = ZipExtractor.Open(stream))
|
||||
{
|
||||
var dir = zip.ReadCentralDir();
|
||||
foreach (var entry in dir)
|
||||
{
|
||||
var fileName = Path.GetFileName(entry.FilenameInZip);
|
||||
if (!string.IsNullOrEmpty(fileName))
|
||||
{
|
||||
var name = entry.FilenameInZip;
|
||||
var path = Path.Combine(elevelLabsPath, name.Replace('/', Path.DirectorySeparatorChar));
|
||||
zip.ExtractFile(entry, path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (File.Exists(jsonFileName))
|
||||
{
|
||||
var json = File.ReadAllText(jsonFileName);
|
||||
var parser = new SeJsonParser();
|
||||
var voices = parser.GetArrayElementsByName(json, "voices");
|
||||
foreach (var voice in voices)
|
||||
{
|
||||
var name = parser.GetFirstObject(voice, "name");
|
||||
var voiceId = parser.GetFirstObject(voice, "voice_id");
|
||||
var gender = parser.GetFirstObject(voice, "gender");
|
||||
var description = parser.GetFirstObject(voice, "description");
|
||||
var accent = parser.GetFirstObject(voice, "accent");
|
||||
var useCase = parser.GetFirstObject(voice, "use case");
|
||||
result.Add(new ElevenLabModel(string.Empty, name, gender, description, useCase, accent, voiceId));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private bool SubtitleFormatHasActors()
|
||||
{
|
||||
var formatType = _subtitleFormat.GetType();
|
||||
|
BIN
src/ui/Resources/eleven-labs-voices.zip
Normal file
BIN
src/ui/Resources/eleven-labs-voices.zip
Normal file
Binary file not shown.
@ -2426,6 +2426,7 @@
|
||||
<EmbeddedResource Include="Resources\HunspellDictionaries.xml.gz" />
|
||||
<EmbeddedResource Include="Resources\HunspellBackupDictionaries.xml.gz" />
|
||||
<EmbeddedResource Include="Resources\TesseractDictionaries.xml.gz" />
|
||||
<EmbeddedResource Include="Resources\eleven-labs-voices.zip" />
|
||||
<None Include="Resources\nOCR_TesseractHelper.xml.gz" />
|
||||
<None Include="Resources\SMPTE-428-7-2007-DCST.xsd">
|
||||
<SubType>Designer</SubType>
|
||||
|
Loading…
Reference in New Issue
Block a user