SubtitleEdit/libse/Translate/GoogleTranslator1.cs

241 lines
10 KiB
C#
Raw Normal View History

2018-11-30 15:43:46 +01:00
using System;
using System.Collections.Generic;
using System.Linq;
2018-11-30 15:43:46 +01:00
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
2018-12-01 21:56:07 +01:00
using Nikse.SubtitleEdit.Core.SubtitleFormats;
2018-11-30 15:43:46 +01:00
namespace Nikse.SubtitleEdit.Core.Translate
{
/// <summary>
/// Google translate via Google Cloud API - see https://cloud.google.com/translate/
/// </summary>
public class GoogleTranslator1 : ITranslator
{
public List<TranslationPair> GetTranslationPairs()
{
2018-12-01 21:56:07 +01:00
return new List<TranslationPair>
{
new TranslationPair("AFRIKAANS", "af"),
new TranslationPair("ALBANIAN", "sq"),
new TranslationPair("AMHARIC", "am"),
new TranslationPair("ARABIC", "ar"),
new TranslationPair("ARMENIAN", "hy"),
new TranslationPair("AZERBAIJANI", "az"),
new TranslationPair("BASQUE", "eu"),
new TranslationPair("BELARUSIAN", "be"),
new TranslationPair("BENGALI", "bn"),
new TranslationPair("BOSNIAN", "bs"),
new TranslationPair("BULGARIAN", "bg"),
new TranslationPair("BURMESE", "my"),
new TranslationPair("CATALAN", "ca"),
new TranslationPair("CEBUANO", "ceb"),
new TranslationPair("CHICHEWA", "ny"),
new TranslationPair("CHINESE", "zh"),
new TranslationPair("CHINESE_SIMPLIFIED", "zh-CN"),
new TranslationPair("CHINESE_TRADITIONAL", "zh-TW"),
new TranslationPair("CORSICAN", "co"),
new TranslationPair("CROATIAN", "hr"),
new TranslationPair("CZECH", "cs"),
new TranslationPair("DANISH", "da"),
new TranslationPair("DUTCH", "nl"),
new TranslationPair("ENGLISH", "en"),
new TranslationPair("ESPERANTO", "eo"),
new TranslationPair("ESTONIAN", "et"),
new TranslationPair("FILIPINO", "tl"),
new TranslationPair("FINNISH", "fi"),
new TranslationPair("FRENCH", "fr"),
new TranslationPair("FRISIAN", "fy"),
new TranslationPair("GALICIAN", "gl"),
new TranslationPair("GEORGIAN", "ka"),
new TranslationPair("GERMAN", "de"),
new TranslationPair("GREEK", "el"),
new TranslationPair("GUJARATI", "gu"),
new TranslationPair("HAITIAN CREOLE", "ht"),
new TranslationPair("HAUSA", "ha"),
new TranslationPair("HAWAIIAN", "haw"),
new TranslationPair("HEBREW", "iw"),
new TranslationPair("HINDI", "hi"),
new TranslationPair("HMOUNG", "hmn"),
new TranslationPair("HUNGARIAN", "hu"),
new TranslationPair("ICELANDIC", "is"),
new TranslationPair("IGBO", "ig"),
new TranslationPair("INDONESIAN", "id"),
new TranslationPair("IRISH", "ga"),
new TranslationPair("ITALIAN", "it"),
new TranslationPair("JAPANESE", "ja"),
new TranslationPair("JAVANESE", "jw"),
new TranslationPair("KANNADA", "kn"),
new TranslationPair("KAZAKH", "kk"),
new TranslationPair("KHMER", "km"),
new TranslationPair("KOREAN", "ko"),
new TranslationPair("KURDISH", "ku"),
new TranslationPair("KYRGYZ", "ky"),
new TranslationPair("LAO", "lo"),
new TranslationPair("LATIN", "la"),
new TranslationPair("LATVIAN", "lv"),
new TranslationPair("LITHUANIAN", "lt"),
new TranslationPair("LUXEMBOURGISH", "lb"),
new TranslationPair("MACEDONIAN", "mk"),
new TranslationPair("MALAY", "ms"),
new TranslationPair("MALAGASY", "mg"),
new TranslationPair("MALAYALAM", "ml"),
new TranslationPair("MALTESE", "mt"),
new TranslationPair("MAORI", "mi"),
new TranslationPair("MARATHI", "mr"),
new TranslationPair("MONGOLIAN", "mn"),
new TranslationPair("MYANMAR", "my"),
new TranslationPair("NEPALI", "ne"),
new TranslationPair("NORWEGIAN", "no"),
new TranslationPair("PASHTO", "ps"),
new TranslationPair("PERSIAN", "fa"),
new TranslationPair("POLISH", "pl"),
new TranslationPair("PORTUGUESE", "pt"),
new TranslationPair("PUNJABI", "pa"),
new TranslationPair("ROMANIAN", "ro"),
new TranslationPair("ROMANJI", "romanji"),
new TranslationPair("RUSSIAN", "ru"),
new TranslationPair("SAMOAN", "sm"),
new TranslationPair("SCOTS GAELIC", "gd"),
new TranslationPair("SERBIAN", "sr"),
new TranslationPair("SESOTHO", "st"),
new TranslationPair("SHONA", "sn"),
new TranslationPair("SINDHI", "sd"),
new TranslationPair("SINHALA", "si"),
new TranslationPair("SLOVAK", "sk"),
new TranslationPair("SLOVENIAN", "sl"),
new TranslationPair("SOMALI", "so"),
new TranslationPair("SPANISH", "es"),
new TranslationPair("SUNDANESE", "su"),
new TranslationPair("SWAHILI", "sw"),
new TranslationPair("SWEDISH", "sv"),
new TranslationPair("TAJIK", "tg"),
new TranslationPair("TAMIL", "ta"),
new TranslationPair("TELUGU", "te"),
new TranslationPair("THAI", "th"),
new TranslationPair("TURKISH", "tr"),
new TranslationPair("UKRAINIAN", "uk"),
new TranslationPair("URDU", "ur"),
new TranslationPair("UZBEK", "uz"),
new TranslationPair("VIETNAMESE", "vi"),
new TranslationPair("WELSH", "cy"),
new TranslationPair("XHOSA", "xh"),
new TranslationPair("YIDDISH", "yi"),
new TranslationPair("YORUBA", "yo"),
new TranslationPair("ZULU", "zu"),
};
2018-11-30 15:43:46 +01:00
}
public string GetName()
{
return "Google translate (old)";
}
public string GetUrl()
{
return "https://translate.google.com/";
}
2018-12-01 21:56:07 +01:00
public List<string> Translate(string sourceLanguage, string targetLanguage, List<Paragraph> paragraphs, StringBuilder log)
2018-11-30 15:43:46 +01:00
{
2018-12-01 21:56:07 +01:00
string result;
2018-11-30 15:43:46 +01:00
var input = new StringBuilder();
2018-12-01 21:56:07 +01:00
var formattings = new Formatting[paragraphs.Count];
for (var index = 0; index < paragraphs.Count; index++)
2018-11-30 15:43:46 +01:00
{
2018-12-01 21:56:07 +01:00
var p = paragraphs[index];
var f = new Formatting();
formattings[index] = f;
if (input.Length > 0)
{
input.Append(" <br/> ");
}
2018-12-01 21:56:07 +01:00
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text, sourceLanguage), sourceLanguage);
input.Append(text);
2018-11-30 15:43:46 +01:00
}
2018-12-01 21:56:07 +01:00
using (var wc = new WebClient())
{
string url = $"https://translate.googleapis.com/translate_a/single?client=gtx&sl={sourceLanguage}&tl={targetLanguage}&dt=t&q={Utilities.UrlEncode(input.ToString())}";
wc.Proxy = Utilities.GetProxy();
wc.Encoding = Encoding.UTF8;
wc.Headers.Add("user-agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36");
result = wc.DownloadString(url).Trim();
}
2018-11-30 15:43:46 +01:00
2018-12-01 21:56:07 +01:00
var sbAll = new StringBuilder();
int count = 0;
int i = 1;
int level = result.StartsWith('[') ? 1 : 0;
while (i < result.Length - 1)
2018-11-30 15:43:46 +01:00
{
2018-12-01 21:56:07 +01:00
var sb = new StringBuilder();
var start = false;
for (; i < result.Length - 1; i++)
2018-11-30 15:43:46 +01:00
{
2018-12-01 21:56:07 +01:00
var c = result[i];
if (start)
2018-11-30 15:43:46 +01:00
{
2018-12-01 21:56:07 +01:00
if (c == '"' && result[i - 1] != '\\')
2018-11-30 15:43:46 +01:00
{
2018-12-01 21:56:07 +01:00
count++;
if (count % 2 == 1 && level > 2) // even numbers are original text, level > 3 is translation
sbAll.Append(" " + sb);
i++;
break;
2018-11-30 15:43:46 +01:00
}
2018-12-01 21:56:07 +01:00
sb.Append(c);
}
else if (c == '"')
{
start = true;
}
else if (c == '[')
{
level++;
}
else if (c == ']')
{
level--;
2018-11-30 15:43:46 +01:00
}
}
}
2018-12-01 21:56:07 +01:00
var res = sbAll.ToString().Trim();
res = Regex.Unescape(res);
List<string> lines = Split(res);
var resultList = new List<string>();
for (var index = 0; index < lines.Count; index++)
{
var line = lines[index];
var s = Json.DecodeJsonText(line);
s = string.Join(Environment.NewLine, s.SplitToLines());
s = TranslationHelper.PostTranslate(s, targetLanguage);
s = s.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine);
s = s.Replace(Environment.NewLine + " ", Environment.NewLine);
s = s.Replace(Environment.NewLine + " ", Environment.NewLine);
s = s.Replace(" " + Environment.NewLine, Environment.NewLine);
s = s.Replace(" " + Environment.NewLine, Environment.NewLine).Trim();
if (formattings.Length > index)
s = formattings[index].ReAddFormatting(s);
resultList.Add(s);
}
return resultList;
}
private List<string> Split(string res)
{
res = res.Replace("<br/>", "\0");
res = res.Replace("< br/>", "\0");
res = res.Replace("<br />", "\0");
res = res.Replace("<br/ >", "\0");
res = res.Replace("< br />", "\0");
res = res.Replace("< br / >", "\0");
res = res.Replace("<br/ >", "\0");
return res.Split('\0').ToList();
2018-11-30 15:43:46 +01:00
}
}
}