Work on translate

This commit is contained in:
niksedk 2023-10-17 12:40:55 +02:00
parent 0ab757aae0
commit 7843294673
4 changed files with 194 additions and 0 deletions

View File

@ -0,0 +1,164 @@
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Core.SubtitleFormats;
using Nikse.SubtitleEdit.Core.Translate;
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
namespace Nikse.SubtitleEdit.Core.AutoTranslate
{
public class ChatGptTranslate : IAutoTranslator
{
private HttpClient _httpClient;
public static string StaticName { get; set; } = "ChatGPT";
public string Name => StaticName;
public string Url => "https://chat.openai.com/";
public void Initialize()
{
_httpClient?.Dispose();
_httpClient = new HttpClient();
_httpClient.DefaultRequestHeaders.TryAddWithoutValidation("Content-Type", "application/json");
_httpClient.DefaultRequestHeaders.TryAddWithoutValidation("accept", "application/json");
_httpClient.BaseAddress = new Uri(Configuration.Settings.Tools.ChatGptUrl);
_httpClient.DefaultRequestHeaders.TryAddWithoutValidation("Authorization", "Bearer " + Configuration.Settings.Tools.ChatGptApiKey);
}
public List<TranslationPair> GetSupportedSourceLanguages()
{
return ListLanguages();
}
public List<TranslationPair> GetSupportedTargetLanguages()
{
return ListLanguages();
}
public async Task<string> Translate(string text, string sourceLanguageCode, string targetLanguageCode)
{
var input = "{\"model\": \"gpt-3.5-turbo\",\"messages\": [{ \"role\": \"user\", \"content\": \"Please translate the following text from " + sourceLanguageCode + " to " + targetLanguageCode + ", only write the result: \\n\\n" + Json.EncodeJsonText(text.Trim()) + "\" }]}";
var content = new StringContent(input, Encoding.UTF8);
content.Headers.ContentType = MediaTypeHeaderValue.Parse("application/json");
var result = _httpClient.PostAsync(string.Empty, content).Result;
result.EnsureSuccessStatusCode();
var bytes = await result.Content.ReadAsByteArrayAsync();
var json = Encoding.UTF8.GetString(bytes).Trim();
var parser = new SeJsonParser();
var resultText = parser.GetFirstObject(json, "content");
if (resultText == null)
{
return string.Empty;
}
var outputText = Json.DecodeJsonText(resultText).Trim();
if (outputText.StartsWith('"') && outputText.EndsWith('"') && !text.StartsWith('"'))
{
outputText = outputText.Trim('"').Trim();
}
return outputText;
}
private static List<TranslationPair> ListLanguages()
{
return new List<TranslationPair>
{
MakePair("Albanian","sq"),
MakePair("Arabic","ar"),
MakePair("Armenian","hy"),
MakePair("Awadhi","ay"),
MakePair("Azerbaijani","az"),
MakePair("Bashkir","ba"),
MakePair("Basque","eu"),
MakePair("Belarusian","be"),
MakePair("Bengali","bn"),
MakePair("Bhojpuri",""),
MakePair("Bosnian","bs"),
MakePair("Brazilian Portuguese","br"),
MakePair("Bulgarian","bg"),
MakePair("Cantonese","zh"),
MakePair("Catalan","ca"),
MakePair("Chhattisgarhi",""),
MakePair("Chinese","zh"),
MakePair("Croatian","hr"),
MakePair("Czech","cs"),
MakePair("Danish","da"),
MakePair("Dogri",""),
MakePair("Dutch","nl"),
MakePair("English","en"),
MakePair("Estonian","et"),
MakePair("Faroese","fo"),
MakePair("Finnish","fi"),
MakePair("French","fr"),
MakePair("Galician","gl"),
MakePair("Georgian","ka"),
MakePair("German","de"),
MakePair("Greek","el"),
MakePair("Gujarati","gu"),
MakePair("Haryanvi",""),
MakePair("Hindi","hi"),
MakePair("Hungarian","hu"),
MakePair("Indonesian","id"),
MakePair("Irish","ga"),
MakePair("Italian","it"),
MakePair("Japanese","ja"),
MakePair("Javanese","jv"),
MakePair("Kannada","kn"),
MakePair("Kashmiri","ks"),
MakePair("Kazakh","kk"),
MakePair("Konkani",""),
MakePair("Korean","ko"),
MakePair("Kyrgyz","ky"),
MakePair("Latvian","lv"),
MakePair("Lithuanian","lt"),
MakePair("Macedonian","mk"),
MakePair("Maithili",""),
MakePair("Malay","ms"),
MakePair("Maltese","mt"),
MakePair("Mandarin","zh"),
MakePair("Mandarin Chinese","zh"),
MakePair("Marathi","mr"),
MakePair("Marwari",""),
MakePair("Min Nan",""),
MakePair("Moldovan","ro"),
MakePair("Mongolian","mn"),
MakePair("Montenegrin",""),
MakePair("Nepali","ne"),
MakePair("Norwegian","no"),
MakePair("Oriya","or"),
MakePair("Pashto","ps"),
MakePair("Persian","fa"),
MakePair("Polish","pl"),
MakePair("Portuguese","pt"),
MakePair("Punjabi","pa"),
MakePair("Rajasthani",""),
MakePair("Romanian","ro"),
MakePair("Russian","ru"),
MakePair("Sanskrit","sa"),
MakePair("Santali",""),
MakePair("Serbian","sr"),
MakePair("Sindhi","sd"),
MakePair("Sinhala","si"),
MakePair("Slovak","sk"),
MakePair("Slovene","sl"),
MakePair("Slovenian","sl"),
MakePair("Ukrainian","uk"),
MakePair("Urdu","ur"),
MakePair("Uzbek","uz"),
MakePair("Vietnamese","vi"),
MakePair("Welsh","cy"),
MakePair("Wu",""),
};
}
private static TranslationPair MakePair(string nameCode, string twoLetter)
{
return new TranslationPair(nameCode, nameCode, twoLetter);
}
}
}

View File

@ -513,6 +513,7 @@ namespace Nikse.SubtitleEdit.Core.Common
AutoTranslateLibreUrl = "http://localhost:5000/";
AutoTranslateSeamlessM4TUrl = "http://localhost:5000/";
AutoTranslateDeepLUrl = "https://api-free.deepl.com/";
ChatGptUrl = "https://api.openai.com/v1/chat/completions";
TranslateAllowSplit = true;
TranslateViaCopyPasteAutoCopyToClipboard = true;
TranslateViaCopyPasteMaxSize = 5000;

View File

@ -113,6 +113,7 @@ namespace Nikse.SubtitleEdit.Forms.Translate
new NoLanguageLeftBehindServe(),
new NoLanguageLeftBehindApi(),
new MyMemoryApi(),
new ChatGptTranslate(),
};
nikseComboBoxEngine.Items.Clear();
@ -235,6 +236,17 @@ namespace Nikse.SubtitleEdit.Forms.Translate
return;
}
if (engineType == typeof(ChatGptTranslate))
{
labelApiKey.Left = labelUrl.Left;
nikseTextBoxApiKey.Text = Configuration.Settings.Tools.ChatGptApiKey;
nikseTextBoxApiKey.Left = labelApiKey.Right + 3;
labelApiKey.Visible = true;
nikseTextBoxApiKey.Visible = true;
return;
}
throw new Exception($"Engine {_autoTranslator.Name} not handled!");
}
@ -310,6 +322,12 @@ namespace Nikse.SubtitleEdit.Forms.Translate
var threeLetterLanguageCode = Iso639Dash2LanguageCode.GetThreeLetterCodeFromTwoLetterCode(languageIsoCode);
foreach (TranslationPair item in comboBox.Items)
{
if (!string.IsNullOrEmpty(item.TwoLetterIsoLanguageName) && item.TwoLetterIsoLanguageName == languageIsoCode)
{
comboBox.SelectedIndex = i;
return;
}
if (item.Code.Contains('-'))
{
var arr = item.Code.ToLowerInvariant().Split('-');
@ -654,6 +672,11 @@ namespace Nikse.SubtitleEdit.Forms.Translate
{
Configuration.Settings.Tools.AutoTranslateMyMemoryApiKey = nikseTextBoxApiKey.Text.Trim();
}
if (engineType == typeof(ChatGptTranslate) && !string.IsNullOrWhiteSpace(nikseTextBoxApiKey.Text))
{
Configuration.Settings.Tools.ChatGptApiKey = nikseTextBoxApiKey.Text.Trim();
}
}
private static void StartNoLanguageLeftBehindServe()

View File

@ -188,6 +188,12 @@ namespace Nikse.SubtitleEdit.Forms.Translate
return false;
}
var next = subtitle.GetParagraphOrDefault(index + 1);
if (next == null || !next.Text.HasSentenceEnding("en"))
{
return false;
}
if (subtitle.Paragraphs[index].Text.EndsWith(".") && Utilities.CountTagInText(subtitle.Paragraphs[index].Text, '.') == 1)
{
c = '.';