using System; using System.Collections.Generic; using System.Linq; using System.Net; using System.Text; using System.Text.RegularExpressions; using Nikse.SubtitleEdit.Core.SubtitleFormats; namespace Nikse.SubtitleEdit.Core.Translate { /// /// Google translate via Google Cloud API - see https://cloud.google.com/translate/ /// public class GoogleTranslator1 : ITranslator { public List GetTranslationPairs() { return new List { new TranslationPair("AFRIKAANS", "af"), new TranslationPair("ALBANIAN", "sq"), new TranslationPair("AMHARIC", "am"), new TranslationPair("ARABIC", "ar"), new TranslationPair("ARMENIAN", "hy"), new TranslationPair("AZERBAIJANI", "az"), new TranslationPair("BASQUE", "eu"), new TranslationPair("BELARUSIAN", "be"), new TranslationPair("BENGALI", "bn"), new TranslationPair("BOSNIAN", "bs"), new TranslationPair("BULGARIAN", "bg"), new TranslationPair("BURMESE", "my"), new TranslationPair("CATALAN", "ca"), new TranslationPair("CEBUANO", "ceb"), new TranslationPair("CHICHEWA", "ny"), new TranslationPair("CHINESE", "zh"), new TranslationPair("CHINESE_SIMPLIFIED", "zh-CN"), new TranslationPair("CHINESE_TRADITIONAL", "zh-TW"), new TranslationPair("CORSICAN", "co"), new TranslationPair("CROATIAN", "hr"), new TranslationPair("CZECH", "cs"), new TranslationPair("DANISH", "da"), new TranslationPair("DUTCH", "nl"), new TranslationPair("ENGLISH", "en"), new TranslationPair("ESPERANTO", "eo"), new TranslationPair("ESTONIAN", "et"), new TranslationPair("FILIPINO", "tl"), new TranslationPair("FINNISH", "fi"), new TranslationPair("FRENCH", "fr"), new TranslationPair("FRISIAN", "fy"), new TranslationPair("GALICIAN", "gl"), new TranslationPair("GEORGIAN", "ka"), new TranslationPair("GERMAN", "de"), new TranslationPair("GREEK", "el"), new TranslationPair("GUJARATI", "gu"), new TranslationPair("HAITIAN CREOLE", "ht"), new TranslationPair("HAUSA", "ha"), new TranslationPair("HAWAIIAN", "haw"), new TranslationPair("HEBREW", "iw"), new TranslationPair("HINDI", "hi"), new TranslationPair("HMOUNG", "hmn"), new TranslationPair("HUNGARIAN", "hu"), new TranslationPair("ICELANDIC", "is"), new TranslationPair("IGBO", "ig"), new TranslationPair("INDONESIAN", "id"), new TranslationPair("IRISH", "ga"), new TranslationPair("ITALIAN", "it"), new TranslationPair("JAPANESE", "ja"), new TranslationPair("JAVANESE", "jw"), new TranslationPair("KANNADA", "kn"), new TranslationPair("KAZAKH", "kk"), new TranslationPair("KHMER", "km"), new TranslationPair("KOREAN", "ko"), new TranslationPair("KURDISH", "ku"), new TranslationPair("KYRGYZ", "ky"), new TranslationPair("LAO", "lo"), new TranslationPair("LATIN", "la"), new TranslationPair("LATVIAN", "lv"), new TranslationPair("LITHUANIAN", "lt"), new TranslationPair("LUXEMBOURGISH", "lb"), new TranslationPair("MACEDONIAN", "mk"), new TranslationPair("MALAY", "ms"), new TranslationPair("MALAGASY", "mg"), new TranslationPair("MALAYALAM", "ml"), new TranslationPair("MALTESE", "mt"), new TranslationPair("MAORI", "mi"), new TranslationPair("MARATHI", "mr"), new TranslationPair("MONGOLIAN", "mn"), new TranslationPair("MYANMAR", "my"), new TranslationPair("NEPALI", "ne"), new TranslationPair("NORWEGIAN", "no"), new TranslationPair("PASHTO", "ps"), new TranslationPair("PERSIAN", "fa"), new TranslationPair("POLISH", "pl"), new TranslationPair("PORTUGUESE", "pt"), new TranslationPair("PUNJABI", "pa"), new TranslationPair("ROMANIAN", "ro"), new TranslationPair("ROMANJI", "romanji"), new TranslationPair("RUSSIAN", "ru"), new TranslationPair("SAMOAN", "sm"), new TranslationPair("SCOTS GAELIC", "gd"), new TranslationPair("SERBIAN", "sr"), new TranslationPair("SESOTHO", "st"), new TranslationPair("SHONA", "sn"), new TranslationPair("SINDHI", "sd"), new TranslationPair("SINHALA", "si"), new TranslationPair("SLOVAK", "sk"), new TranslationPair("SLOVENIAN", "sl"), new TranslationPair("SOMALI", "so"), new TranslationPair("SPANISH", "es"), new TranslationPair("SUNDANESE", "su"), new TranslationPair("SWAHILI", "sw"), new TranslationPair("SWEDISH", "sv"), new TranslationPair("TAJIK", "tg"), new TranslationPair("TAMIL", "ta"), new TranslationPair("TELUGU", "te"), new TranslationPair("THAI", "th"), new TranslationPair("TURKISH", "tr"), new TranslationPair("UKRAINIAN", "uk"), new TranslationPair("URDU", "ur"), new TranslationPair("UZBEK", "uz"), new TranslationPair("VIETNAMESE", "vi"), new TranslationPair("WELSH", "cy"), new TranslationPair("XHOSA", "xh"), new TranslationPair("YIDDISH", "yi"), new TranslationPair("YORUBA", "yo"), new TranslationPair("ZULU", "zu"), }; } public string GetName() { return "Google translate (old)"; } public string GetUrl() { return "https://translate.google.com/"; } public List Translate(string sourceLanguage, string targetLanguage, List paragraphs, StringBuilder log) { string result; var input = new StringBuilder(); var formattings = new Formatting[paragraphs.Count]; for (var index = 0; index < paragraphs.Count; index++) { var p = paragraphs[index]; var f = new Formatting(); formattings[index] = f; if (input.Length > 0) { input.Append("
"); } var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text, sourceLanguage), sourceLanguage); input.Append(text); } using (var wc = new WebClient()) { string url = $"https://translate.googleapis.com/translate_a/single?client=gtx&sl={sourceLanguage}&tl={targetLanguage}&dt=t&q={Utilities.UrlEncode(input.ToString())}"; wc.Proxy = Utilities.GetProxy(); wc.Encoding = Encoding.UTF8; wc.Headers.Add("user-agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"); result = wc.DownloadString(url).Trim(); } var sbAll = new StringBuilder(); int count = 0; int i = 1; int level = result.StartsWith('[') ? 1 : 0; while (i < result.Length - 1) { var sb = new StringBuilder(); var start = false; for (; i < result.Length - 1; i++) { var c = result[i]; if (start) { if (c == '"' && result[i - 1] != '\\') { count++; if (count % 2 == 1 && level > 2) // even numbers are original text, level > 3 is translation sbAll.Append(" " + sb); i++; break; } sb.Append(c); } else if (c == '"') { start = true; } else if (c == '[') { level++; } else if (c == ']') { level--; } } } var res = sbAll.ToString().Trim(); res = Regex.Unescape(res); List lines = Split(res); var resultList = new List(); for (var index = 0; index < lines.Count; index++) { var line = lines[index]; var s = Json.DecodeJsonText(line); s = string.Join(Environment.NewLine, s.SplitToLines()); s = TranslationHelper.PostTranslate(s, targetLanguage); s = s.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine); s = s.Replace(Environment.NewLine + " ", Environment.NewLine); s = s.Replace(Environment.NewLine + " ", Environment.NewLine); s = s.Replace(" " + Environment.NewLine, Environment.NewLine); s = s.Replace(" " + Environment.NewLine, Environment.NewLine).Trim(); if (formattings.Length > index) s = formattings[index].ReAddFormatting(s); resultList.Add(s); } return resultList; } private List Split(string res) { res = res.Replace("
", "\0"); res = res.Replace("< br/>", "\0"); res = res.Replace("
", "\0"); res = res.Replace("
", "\0"); res = res.Replace("< br />", "\0"); res = res.Replace("< br / >", "\0"); res = res.Replace("
", "\0"); return res.Split('\0').ToList(); } } }