2018-11-30 15:43:46 +01:00
|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Generic;
|
2018-12-04 18:50:36 +01:00
|
|
|
|
using System.Linq;
|
2018-11-30 15:43:46 +01:00
|
|
|
|
using System.Net;
|
|
|
|
|
using System.Text;
|
|
|
|
|
using System.Text.RegularExpressions;
|
2018-12-01 21:56:07 +01:00
|
|
|
|
using Nikse.SubtitleEdit.Core.SubtitleFormats;
|
2018-11-30 15:43:46 +01:00
|
|
|
|
|
|
|
|
|
namespace Nikse.SubtitleEdit.Core.Translate
|
|
|
|
|
{
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Google translate via Google Cloud API - see https://cloud.google.com/translate/
|
|
|
|
|
/// </summary>
|
|
|
|
|
public class GoogleTranslator1 : ITranslator
|
|
|
|
|
{
|
|
|
|
|
public List<TranslationPair> GetTranslationPairs()
|
|
|
|
|
{
|
2018-12-01 21:56:07 +01:00
|
|
|
|
return new List<TranslationPair>
|
|
|
|
|
{
|
|
|
|
|
new TranslationPair("AFRIKAANS", "af"),
|
|
|
|
|
new TranslationPair("ALBANIAN", "sq"),
|
|
|
|
|
new TranslationPair("AMHARIC", "am"),
|
|
|
|
|
new TranslationPair("ARABIC", "ar"),
|
|
|
|
|
new TranslationPair("ARMENIAN", "hy"),
|
|
|
|
|
new TranslationPair("AZERBAIJANI", "az"),
|
|
|
|
|
new TranslationPair("BASQUE", "eu"),
|
|
|
|
|
new TranslationPair("BELARUSIAN", "be"),
|
|
|
|
|
new TranslationPair("BENGALI", "bn"),
|
|
|
|
|
new TranslationPair("BOSNIAN", "bs"),
|
|
|
|
|
new TranslationPair("BULGARIAN", "bg"),
|
|
|
|
|
new TranslationPair("BURMESE", "my"),
|
|
|
|
|
new TranslationPair("CATALAN", "ca"),
|
|
|
|
|
new TranslationPair("CEBUANO", "ceb"),
|
|
|
|
|
new TranslationPair("CHICHEWA", "ny"),
|
|
|
|
|
new TranslationPair("CHINESE", "zh"),
|
|
|
|
|
new TranslationPair("CHINESE_SIMPLIFIED", "zh-CN"),
|
|
|
|
|
new TranslationPair("CHINESE_TRADITIONAL", "zh-TW"),
|
|
|
|
|
new TranslationPair("CORSICAN", "co"),
|
|
|
|
|
new TranslationPair("CROATIAN", "hr"),
|
|
|
|
|
new TranslationPair("CZECH", "cs"),
|
|
|
|
|
new TranslationPair("DANISH", "da"),
|
|
|
|
|
new TranslationPair("DUTCH", "nl"),
|
|
|
|
|
new TranslationPair("ENGLISH", "en"),
|
|
|
|
|
new TranslationPair("ESPERANTO", "eo"),
|
|
|
|
|
new TranslationPair("ESTONIAN", "et"),
|
|
|
|
|
new TranslationPair("FILIPINO", "tl"),
|
|
|
|
|
new TranslationPair("FINNISH", "fi"),
|
|
|
|
|
new TranslationPair("FRENCH", "fr"),
|
|
|
|
|
new TranslationPair("FRISIAN", "fy"),
|
|
|
|
|
new TranslationPair("GALICIAN", "gl"),
|
|
|
|
|
new TranslationPair("GEORGIAN", "ka"),
|
|
|
|
|
new TranslationPair("GERMAN", "de"),
|
|
|
|
|
new TranslationPair("GREEK", "el"),
|
|
|
|
|
new TranslationPair("GUJARATI", "gu"),
|
|
|
|
|
new TranslationPair("HAITIAN CREOLE", "ht"),
|
|
|
|
|
new TranslationPair("HAUSA", "ha"),
|
|
|
|
|
new TranslationPair("HAWAIIAN", "haw"),
|
|
|
|
|
new TranslationPair("HEBREW", "iw"),
|
|
|
|
|
new TranslationPair("HINDI", "hi"),
|
|
|
|
|
new TranslationPair("HMOUNG", "hmn"),
|
|
|
|
|
new TranslationPair("HUNGARIAN", "hu"),
|
|
|
|
|
new TranslationPair("ICELANDIC", "is"),
|
|
|
|
|
new TranslationPair("IGBO", "ig"),
|
|
|
|
|
new TranslationPair("INDONESIAN", "id"),
|
|
|
|
|
new TranslationPair("IRISH", "ga"),
|
|
|
|
|
new TranslationPair("ITALIAN", "it"),
|
|
|
|
|
new TranslationPair("JAPANESE", "ja"),
|
|
|
|
|
new TranslationPair("JAVANESE", "jw"),
|
|
|
|
|
new TranslationPair("KANNADA", "kn"),
|
|
|
|
|
new TranslationPair("KAZAKH", "kk"),
|
|
|
|
|
new TranslationPair("KHMER", "km"),
|
|
|
|
|
new TranslationPair("KOREAN", "ko"),
|
|
|
|
|
new TranslationPair("KURDISH", "ku"),
|
|
|
|
|
new TranslationPair("KYRGYZ", "ky"),
|
|
|
|
|
new TranslationPair("LAO", "lo"),
|
|
|
|
|
new TranslationPair("LATIN", "la"),
|
|
|
|
|
new TranslationPair("LATVIAN", "lv"),
|
|
|
|
|
new TranslationPair("LITHUANIAN", "lt"),
|
|
|
|
|
new TranslationPair("LUXEMBOURGISH", "lb"),
|
|
|
|
|
new TranslationPair("MACEDONIAN", "mk"),
|
|
|
|
|
new TranslationPair("MALAY", "ms"),
|
|
|
|
|
new TranslationPair("MALAGASY", "mg"),
|
|
|
|
|
new TranslationPair("MALAYALAM", "ml"),
|
|
|
|
|
new TranslationPair("MALTESE", "mt"),
|
|
|
|
|
new TranslationPair("MAORI", "mi"),
|
|
|
|
|
new TranslationPair("MARATHI", "mr"),
|
|
|
|
|
new TranslationPair("MONGOLIAN", "mn"),
|
|
|
|
|
new TranslationPair("MYANMAR", "my"),
|
|
|
|
|
new TranslationPair("NEPALI", "ne"),
|
|
|
|
|
new TranslationPair("NORWEGIAN", "no"),
|
|
|
|
|
new TranslationPair("PASHTO", "ps"),
|
|
|
|
|
new TranslationPair("PERSIAN", "fa"),
|
|
|
|
|
new TranslationPair("POLISH", "pl"),
|
|
|
|
|
new TranslationPair("PORTUGUESE", "pt"),
|
|
|
|
|
new TranslationPair("PUNJABI", "pa"),
|
|
|
|
|
new TranslationPair("ROMANIAN", "ro"),
|
|
|
|
|
new TranslationPair("ROMANJI", "romanji"),
|
|
|
|
|
new TranslationPair("RUSSIAN", "ru"),
|
|
|
|
|
new TranslationPair("SAMOAN", "sm"),
|
|
|
|
|
new TranslationPair("SCOTS GAELIC", "gd"),
|
|
|
|
|
new TranslationPair("SERBIAN", "sr"),
|
|
|
|
|
new TranslationPair("SESOTHO", "st"),
|
|
|
|
|
new TranslationPair("SHONA", "sn"),
|
|
|
|
|
new TranslationPair("SINDHI", "sd"),
|
|
|
|
|
new TranslationPair("SINHALA", "si"),
|
|
|
|
|
new TranslationPair("SLOVAK", "sk"),
|
|
|
|
|
new TranslationPair("SLOVENIAN", "sl"),
|
|
|
|
|
new TranslationPair("SOMALI", "so"),
|
|
|
|
|
new TranslationPair("SPANISH", "es"),
|
|
|
|
|
new TranslationPair("SUNDANESE", "su"),
|
|
|
|
|
new TranslationPair("SWAHILI", "sw"),
|
|
|
|
|
new TranslationPair("SWEDISH", "sv"),
|
|
|
|
|
new TranslationPair("TAJIK", "tg"),
|
|
|
|
|
new TranslationPair("TAMIL", "ta"),
|
|
|
|
|
new TranslationPair("TELUGU", "te"),
|
|
|
|
|
new TranslationPair("THAI", "th"),
|
|
|
|
|
new TranslationPair("TURKISH", "tr"),
|
|
|
|
|
new TranslationPair("UKRAINIAN", "uk"),
|
|
|
|
|
new TranslationPair("URDU", "ur"),
|
|
|
|
|
new TranslationPair("UZBEK", "uz"),
|
|
|
|
|
new TranslationPair("VIETNAMESE", "vi"),
|
|
|
|
|
new TranslationPair("WELSH", "cy"),
|
|
|
|
|
new TranslationPair("XHOSA", "xh"),
|
|
|
|
|
new TranslationPair("YIDDISH", "yi"),
|
|
|
|
|
new TranslationPair("YORUBA", "yo"),
|
|
|
|
|
new TranslationPair("ZULU", "zu"),
|
|
|
|
|
};
|
2018-11-30 15:43:46 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public string GetName()
|
|
|
|
|
{
|
|
|
|
|
return "Google translate (old)";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public string GetUrl()
|
|
|
|
|
{
|
|
|
|
|
return "https://translate.google.com/";
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-01 21:56:07 +01:00
|
|
|
|
public List<string> Translate(string sourceLanguage, string targetLanguage, List<Paragraph> paragraphs, StringBuilder log)
|
2018-11-30 15:43:46 +01:00
|
|
|
|
{
|
2018-12-01 21:56:07 +01:00
|
|
|
|
string result;
|
2018-11-30 15:43:46 +01:00
|
|
|
|
var input = new StringBuilder();
|
2018-12-01 21:56:07 +01:00
|
|
|
|
var formattings = new Formatting[paragraphs.Count];
|
|
|
|
|
for (var index = 0; index < paragraphs.Count; index++)
|
2018-11-30 15:43:46 +01:00
|
|
|
|
{
|
2018-12-01 21:56:07 +01:00
|
|
|
|
var p = paragraphs[index];
|
|
|
|
|
var f = new Formatting();
|
|
|
|
|
formattings[index] = f;
|
2018-12-04 18:50:36 +01:00
|
|
|
|
if (input.Length > 0)
|
|
|
|
|
{
|
|
|
|
|
input.Append(" <br/> ");
|
|
|
|
|
}
|
2018-12-01 21:56:07 +01:00
|
|
|
|
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text, sourceLanguage), sourceLanguage);
|
|
|
|
|
input.Append(text);
|
2018-11-30 15:43:46 +01:00
|
|
|
|
}
|
|
|
|
|
|
2018-12-01 21:56:07 +01:00
|
|
|
|
using (var wc = new WebClient())
|
|
|
|
|
{
|
|
|
|
|
string url = $"https://translate.googleapis.com/translate_a/single?client=gtx&sl={sourceLanguage}&tl={targetLanguage}&dt=t&q={Utilities.UrlEncode(input.ToString())}";
|
|
|
|
|
wc.Proxy = Utilities.GetProxy();
|
|
|
|
|
wc.Encoding = Encoding.UTF8;
|
|
|
|
|
wc.Headers.Add("user-agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36");
|
|
|
|
|
result = wc.DownloadString(url).Trim();
|
|
|
|
|
}
|
2018-11-30 15:43:46 +01:00
|
|
|
|
|
2018-12-01 21:56:07 +01:00
|
|
|
|
var sbAll = new StringBuilder();
|
|
|
|
|
int count = 0;
|
|
|
|
|
int i = 1;
|
|
|
|
|
int level = result.StartsWith('[') ? 1 : 0;
|
|
|
|
|
while (i < result.Length - 1)
|
2018-11-30 15:43:46 +01:00
|
|
|
|
{
|
2018-12-01 21:56:07 +01:00
|
|
|
|
var sb = new StringBuilder();
|
|
|
|
|
var start = false;
|
|
|
|
|
for (; i < result.Length - 1; i++)
|
2018-11-30 15:43:46 +01:00
|
|
|
|
{
|
2018-12-01 21:56:07 +01:00
|
|
|
|
var c = result[i];
|
|
|
|
|
if (start)
|
2018-11-30 15:43:46 +01:00
|
|
|
|
{
|
2018-12-01 21:56:07 +01:00
|
|
|
|
if (c == '"' && result[i - 1] != '\\')
|
2018-11-30 15:43:46 +01:00
|
|
|
|
{
|
2018-12-01 21:56:07 +01:00
|
|
|
|
count++;
|
|
|
|
|
if (count % 2 == 1 && level > 2) // even numbers are original text, level > 3 is translation
|
|
|
|
|
sbAll.Append(" " + sb);
|
|
|
|
|
i++;
|
|
|
|
|
break;
|
2018-11-30 15:43:46 +01:00
|
|
|
|
}
|
2018-12-01 21:56:07 +01:00
|
|
|
|
sb.Append(c);
|
|
|
|
|
}
|
|
|
|
|
else if (c == '"')
|
|
|
|
|
{
|
|
|
|
|
start = true;
|
|
|
|
|
}
|
|
|
|
|
else if (c == '[')
|
|
|
|
|
{
|
|
|
|
|
level++;
|
|
|
|
|
}
|
|
|
|
|
else if (c == ']')
|
|
|
|
|
{
|
|
|
|
|
level--;
|
2018-11-30 15:43:46 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-12-01 21:56:07 +01:00
|
|
|
|
|
|
|
|
|
var res = sbAll.ToString().Trim();
|
|
|
|
|
res = Regex.Unescape(res);
|
2018-12-04 18:50:36 +01:00
|
|
|
|
List<string> lines = Split(res);
|
|
|
|
|
var resultList = new List<string>();
|
|
|
|
|
for (var index = 0; index < lines.Count; index++)
|
|
|
|
|
{
|
|
|
|
|
var line = lines[index];
|
|
|
|
|
var s = Json.DecodeJsonText(line);
|
|
|
|
|
s = string.Join(Environment.NewLine, s.SplitToLines());
|
|
|
|
|
s = TranslationHelper.PostTranslate(s, targetLanguage);
|
|
|
|
|
s = s.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine);
|
|
|
|
|
s = s.Replace(Environment.NewLine + " ", Environment.NewLine);
|
|
|
|
|
s = s.Replace(Environment.NewLine + " ", Environment.NewLine);
|
|
|
|
|
s = s.Replace(" " + Environment.NewLine, Environment.NewLine);
|
|
|
|
|
s = s.Replace(" " + Environment.NewLine, Environment.NewLine).Trim();
|
|
|
|
|
if (formattings.Length > index)
|
|
|
|
|
s = formattings[index].ReAddFormatting(s);
|
|
|
|
|
resultList.Add(s);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return resultList;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private List<string> Split(string res)
|
|
|
|
|
{
|
|
|
|
|
res = res.Replace("<br/>", "\0");
|
|
|
|
|
res = res.Replace("< br/>", "\0");
|
|
|
|
|
res = res.Replace("<br />", "\0");
|
|
|
|
|
res = res.Replace("<br/ >", "\0");
|
|
|
|
|
res = res.Replace("< br />", "\0");
|
|
|
|
|
res = res.Replace("< br / >", "\0");
|
|
|
|
|
res = res.Replace("<br/ >", "\0");
|
|
|
|
|
return res.Split('\0').ToList();
|
2018-11-30 15:43:46 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|