mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-25 20:52:44 +01:00
Extracted language auto detection to "LanguageAutoDetect" + added two simple unit tests
This commit is contained in:
parent
66672b7c03
commit
075de1b239
@ -41,7 +41,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
|
||||
var splittedIndexes = new List<int>();
|
||||
var autoBreakedIndexes = new List<int>();
|
||||
var splittedSubtitle = new Subtitle();
|
||||
string language = Utilities.AutoDetectGoogleLanguage(subtitle);
|
||||
string language = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle);
|
||||
for (int i = 0; i < subtitle.Paragraphs.Count; i++)
|
||||
{
|
||||
bool added = false;
|
||||
|
716
libse/LanguageAutoDetect.cs
Normal file
716
libse/LanguageAutoDetect.cs
Normal file
@ -0,0 +1,716 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Core
|
||||
{
|
||||
public static class LanguageAutoDetect
|
||||
{
|
||||
|
||||
private static int GetCount(string text, params string[] words)
|
||||
{
|
||||
int count = 0;
|
||||
for (int i = 0; i < words.Length; i++)
|
||||
{
|
||||
count += Regex.Matches(text, "\\b" + words[i] + "\\b", (RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture)).Count;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private static int GetCountContains(string text, params string[] words)
|
||||
{
|
||||
int count = 0;
|
||||
for (int i = 0; i < words.Length; i++)
|
||||
{
|
||||
var regEx = new Regex(words[i]);
|
||||
count += regEx.Matches(text).Count;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
public static string AutoDetectGoogleLanguage(Encoding encoding)
|
||||
{
|
||||
switch (encoding.CodePage)
|
||||
{
|
||||
case 860:
|
||||
return "pt"; // Portuguese
|
||||
case 28599:
|
||||
case 1254:
|
||||
return "tr"; // Turkish
|
||||
case 28598:
|
||||
case 1255:
|
||||
return "he"; // Hebrew
|
||||
case 28596:
|
||||
case 1256:
|
||||
return "ar"; // Arabic
|
||||
case 1258:
|
||||
return "vi"; // Vietnamese
|
||||
case 949:
|
||||
case 1361:
|
||||
case 20949:
|
||||
case 51949:
|
||||
case 50225:
|
||||
return "ko"; // Korean
|
||||
case 1253:
|
||||
case 28597:
|
||||
return "el"; // Greek
|
||||
case 50220:
|
||||
case 50221:
|
||||
case 50222:
|
||||
case 51932:
|
||||
case 20932:
|
||||
case 10001:
|
||||
return "ja"; // Japanese
|
||||
case 20000:
|
||||
case 20002:
|
||||
case 20936:
|
||||
case 950:
|
||||
case 52936:
|
||||
case 54936:
|
||||
case 51936:
|
||||
return "zh"; // Chinese
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static readonly string[] AutoDetectWordsEnglish = { "we", "are", "and", "you", "your", "what" };
|
||||
public static readonly string[] AutoDetectWordsDanish = { "vi", "han", "og", "jeg", "var", "men", "gider", "bliver", "virkelig", "kommer", "tilbage", "Hej" };
|
||||
public static readonly string[] AutoDetectWordsNorwegian = { "vi", "er", "og", "jeg", "var", "men" };
|
||||
public static readonly string[] AutoDetectWordsSwedish = { "vi", "är", "och", "Jag", "inte", "för" };
|
||||
public static readonly string[] AutoDetectWordsSpanish = { "el", "bien", "Vamos", "Hola", "casa", "con" };
|
||||
public static readonly string[] AutoDetectWordsFrench = { "un", "vous", "avec", "pas", "ce", "une" };
|
||||
public static readonly string[] AutoDetectWordsGerman = { "und", "auch", "sich", "bin", "hast", "möchte" };
|
||||
public static readonly string[] AutoDetectWordsDutch = { "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n" };
|
||||
public static readonly string[] AutoDetectWordsPolish = { "Czy", "ale", "ty", "siê", "jest", "mnie" };
|
||||
public static readonly string[] AutoDetectWordsItalian = { "Cosa", "sono", "Grazie", "Buongiorno", "bene", "questo", "ragazzi", "propriamente", "numero", "hanno", "giorno", "faccio", "davvero", "negativo", "essere", "vuole", "sensitivo", "venire" };
|
||||
public static readonly string[] AutoDetectWordsPortuguese = { "[Nn]ão", "Então", "Estás", "isso", "com" };
|
||||
public static readonly string[] AutoDetectWordsGreek = { "μου", "είναι", "Είναι", "αυτό", "Τόμπυ", "καλά", "Ενταξει", "Ενταξει", "πρεπει", "Λοιπον", "τιποτα", "ξερεις" };
|
||||
public static readonly string[] AutoDetectWordsRussian = { "Это", "не", "ты", "что", "это", "Мы", "Да", "Нет", "Ты", "нет", "Он", "его", "тебя", "как", "Не", "вы", "меня", "Но", "то", "всё", "бы", "мы", "мне", "вас", "знаю", "ещё", "за", "нас", "чтобы", "был" };
|
||||
public static readonly string[] AutoDetectWordsBulgarian = { "Какво", "тук", "може", "Как", "Ваше", "какво" };
|
||||
public static readonly string[] AutoDetectWordsRomanian = { "Какво", "тук", "може", "Как", "Ваше", "какво" };
|
||||
public static readonly string[] AutoDetectWordsArabic = { "Какво", "тук", "може", "Как", "Ваше", "какво" };
|
||||
public static readonly string[] AutoDetectWordsHebrew = { "אתה", "אולי", "הוא", "בסדר", "יודע", "טוב" };
|
||||
public static readonly string[] AutoDetectWordsVietnamese = { "không", "tôi", "anh", "đó", "Tôi", "ông" };
|
||||
public static readonly string[] AutoDetectWordsHungarian = { "hogy", "lesz", "tudom", "vagy", "mondtam", "még" };
|
||||
public static readonly string[] AutoDetectWordsTurkish = { "için", "Tamam", "Hayır", "benim", "daha", "deðil", "önce", "lazým", "benim", "çalýþýyor", "burada", "efendim" };
|
||||
public static readonly string[] AutoDetectWordsCroatianAndSerbian = { "sam", "ali", "nije", "samo", "ovo", "kako", "dobro", "sve", "tako", "će", "mogu", "ću", "zašto", "nešto", "za" };
|
||||
public static readonly string[] AutoDetectWordsCroatian = { "što", "ovdje", "gdje", "kamo", "tko", "prije", "uvijek", "vrijeme", "vidjeti", "netko",
|
||||
"vidio", "nitko", "bok", "lijepo", "oprosti", "htio", "mjesto", "oprostite", "čovjek", "dolje",
|
||||
"čovječe", "dvije", "dijete", "dio", "poslije", "događa", "vjerovati", "vjerojatno", "vjerujem", "točno",
|
||||
"razumijem", "vidjela", "cijeli", "svijet", "obitelj", "volio", "sretan", "dovraga", "svijetu", "htjela",
|
||||
"vidjeli", "negdje", "želio", "ponovno", "djevojka", "umrijeti", "čovjeka", "mjesta", "djeca", "osjećam",
|
||||
"uopće", "djecu", "naprijed", "obitelji", "doista", "mjestu", "lijepa", "također", "riječ", "tijelo" };
|
||||
public static readonly string[] AutoDetectWordsSerbian = { "šta", "ovde", "gde", "ko", "pre", "uvek", "vreme", "videti", "neko",
|
||||
"video", "niko", "ćao", "lepo", "izvini", "hteo", "mesto", "izvinite", "čovek", "dole",
|
||||
"čoveče", "dve", "dete", "deo", "posle", "dešava", "verovati", "verovatno", "verujem", "tačno",
|
||||
"razumem", "videla", "ceo", "svet", "porodica", "voleo", "srećan", "dođavola", "svetu", "htela",
|
||||
"videli", "negde", "želeo", "ponovo", "devojka", "umreti", "čoveka", "mesta", "deca", "osećam",
|
||||
"uopšte", "decu", "napred", "porodicu", "zaista", "mestu", "lepa", "takođe", "reč", "telo" };
|
||||
|
||||
public static string AutoDetectGoogleLanguage(string text, int bestCount)
|
||||
{
|
||||
int count = GetCount(text, AutoDetectWordsEnglish);
|
||||
if (count > bestCount)
|
||||
return "en";
|
||||
|
||||
count = GetCount(text, AutoDetectWordsDanish);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int norwegianCount = GetCount(text, "ut", "deg", "meg", "merkelig", "mye", "spørre");
|
||||
int dutchCount = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n");
|
||||
if (norwegianCount < 2 && dutchCount < count)
|
||||
return "da";
|
||||
}
|
||||
|
||||
count = GetCount(text, AutoDetectWordsNorwegian);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int danishCount = GetCount(text, "siger", "dig", "mig", "mærkelig", "tilbage", "spørge");
|
||||
int dutchCount = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n");
|
||||
if (danishCount < 2 && dutchCount < count)
|
||||
return "no";
|
||||
}
|
||||
|
||||
count = GetCount(text, AutoDetectWordsSwedish);
|
||||
if (count > bestCount)
|
||||
return "sv";
|
||||
|
||||
count = GetCount(text, AutoDetectWordsSpanish);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int frenchCount = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words
|
||||
int portugueseCount = GetCount(text, "[NnCc]ão", "Então", "h?ouve", "pessoal", "rapariga", "tivesse", "fizeste",
|
||||
"jantar", "conheço", "atenção", "foste", "milhões", "devias", "ganhar", "raios"); // not spanish words
|
||||
if (frenchCount < 2 && portugueseCount < 2)
|
||||
return "es";
|
||||
}
|
||||
|
||||
count = GetCount(text, AutoDetectWordsItalian);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int frenchCount = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not italian words
|
||||
if (frenchCount < 2)
|
||||
return "it";
|
||||
}
|
||||
|
||||
count = GetCount(text, AutoDetectWordsFrench);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int romanianCount = GetCount(text, "[Ss]înt", "aici", "domnule", "pentru", "Vreau");
|
||||
if (romanianCount < 5)
|
||||
return "fr";
|
||||
}
|
||||
|
||||
count = GetCount(text, AutoDetectWordsPortuguese);
|
||||
if (count > bestCount)
|
||||
return "pt"; // Portuguese
|
||||
|
||||
count = GetCount(text, AutoDetectWordsGerman);
|
||||
if (count > bestCount)
|
||||
return "de";
|
||||
|
||||
count = GetCount(text, AutoDetectWordsDutch);
|
||||
if (count > bestCount)
|
||||
return "nl";
|
||||
|
||||
count = GetCount(text, AutoDetectWordsPolish);
|
||||
if (count > bestCount)
|
||||
return "pl";
|
||||
|
||||
count = GetCount(text, AutoDetectWordsGreek);
|
||||
if (count > bestCount)
|
||||
return "el"; // Greek
|
||||
|
||||
count = GetCount(text, AutoDetectWordsRussian);
|
||||
if (count > bestCount)
|
||||
return "ru"; // Russian
|
||||
|
||||
count = GetCount(text, AutoDetectWordsBulgarian);
|
||||
if (count > bestCount)
|
||||
return "bg"; // Bulgarian
|
||||
|
||||
count = GetCount(text, AutoDetectWordsArabic);
|
||||
if (count > bestCount)
|
||||
{
|
||||
if (GetCount(text, "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10)
|
||||
return "he";
|
||||
|
||||
int romanianCount = GetCount(text, "sînt", "aici", "Sînt", "domnule", "pentru", "Vreau", "trãiascã", "niciodatã", "înseamnã",
|
||||
"vorbesti", "oamenii", "Asteaptã", "fãcut", "Fãrã", "spune", "decât", "pentru", "vreau");
|
||||
if (romanianCount > count)
|
||||
return "ro"; // Romanian
|
||||
|
||||
romanianCount = GetCount(text, "daca", "pentru", "acum", "soare", "trebuie", "Trebuie", "nevoie", "decat", "echilibrul",
|
||||
"vorbesti", "oamenii", "zeului", "vrea", "atunci", "Poate", "Acum", "memoria", "soarele");
|
||||
if (romanianCount > count)
|
||||
return "ro"; // Romanian
|
||||
|
||||
return "ar"; // Arabic
|
||||
}
|
||||
|
||||
count = GetCount(text, AutoDetectWordsHebrew);
|
||||
if (count > bestCount)
|
||||
return "he"; // Hebrew
|
||||
|
||||
count = GetCount(text, AutoDetectWordsCroatianAndSerbian);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int croatianCount = GetCount(text, AutoDetectWordsCroatian);
|
||||
int serbianCount = GetCount(text, AutoDetectWordsSerbian);
|
||||
if (croatianCount > serbianCount)
|
||||
return "hr"; // Croatian
|
||||
|
||||
return "sr"; // Serbian
|
||||
}
|
||||
|
||||
count = GetCount(text, AutoDetectWordsVietnamese);
|
||||
if (count > bestCount)
|
||||
return "vi"; // Vietnamese
|
||||
|
||||
count = GetCount(text, AutoDetectWordsHungarian);
|
||||
if (count > bestCount)
|
||||
return "hu"; // Hungarian
|
||||
|
||||
count = GetCount(text, AutoDetectWordsTurkish);
|
||||
if (count > bestCount)
|
||||
return "tr"; // Turkish
|
||||
|
||||
count = GetCount(text, "yang", "tahu", "bisa", "akan", "tahun", "tapi", "dengan", "untuk", "rumah", "dalam", "sudah", "bertemu");
|
||||
if (count > bestCount)
|
||||
return "id"; // Indonesian
|
||||
|
||||
count = GetCount(text, "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล", "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์");
|
||||
if (count > 10 || count > bestCount)
|
||||
return "th"; // Thai
|
||||
|
||||
count = GetCount(text, "그리고", "아니야", "하지만", "말이야", "그들은", "우리가");
|
||||
if (count > 10 || count > bestCount)
|
||||
return "ko"; // Korean
|
||||
|
||||
count = GetCount(text, "että", "kuin", "minä", "mitään", "Mutta", "siitä", "täällä", "poika", "Kiitos", "enää", "vielä", "tässä");
|
||||
if (count > bestCount)
|
||||
return "fi"; // Finnish
|
||||
|
||||
count = GetCount(text, "sînt", "aici", "Sînt", "domnule", "pentru", "Vreau", "trãiascã", "niciodatã", "înseamnã", "vorbesti", "oamenii",
|
||||
"Asteaptã", "fãcut", "Fãrã", "spune", "decât", "pentru", "vreau");
|
||||
if (count > bestCount)
|
||||
return "ro"; // Romanian
|
||||
|
||||
count = GetCount(text, "daca", "pentru", "acum", "soare", "trebuie", "Trebuie", "nevoie", "decat", "echilibrul", "vorbesti", "oamenii",
|
||||
"zeului", "vrea", "atunci", "Poate", "Acum", "memoria", "soarele");
|
||||
if (count > bestCount)
|
||||
return "ro"; // Romanian
|
||||
|
||||
count = GetCountContains(text, "シ", "ュ", "シン", "シ", "ン", "ユ");
|
||||
count += GetCountContains(text, "イ", "ン", "チ", "ェ", "ク", "ハ");
|
||||
count += GetCountContains(text, "シ", "ュ", "う", "シ", "ン", "サ");
|
||||
count += GetCountContains(text, "シ", "ュ", "シ", "ン", "だ", "う");
|
||||
if (count > bestCount * 2)
|
||||
return "ja"; // Japanese - not tested...
|
||||
|
||||
count = GetCountContains(text, "是", "是早", "吧", "的", "爱", "上好");
|
||||
count += GetCountContains(text, "的", "啊", "好", "好", "亲", "的");
|
||||
count += GetCountContains(text, "谢", "走", "吧", "晚", "上", "好");
|
||||
count += GetCountContains(text, "来", "卡", "拉", "吐", "滚", "他");
|
||||
if (count > bestCount * 2)
|
||||
return "zh"; // Chinese (simplified) - not tested...
|
||||
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
public static string AutoDetectGoogleLanguage(Subtitle subtitle)
|
||||
{
|
||||
string languageId = AutoDetectGoogleLanguageOrNull(subtitle);
|
||||
if (languageId == null)
|
||||
languageId = "en";
|
||||
|
||||
return languageId;
|
||||
}
|
||||
|
||||
public static string AutoDetectGoogleLanguageOrNull(Subtitle subtitle)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
foreach (Paragraph p in subtitle.Paragraphs)
|
||||
sb.AppendLine(p.Text);
|
||||
|
||||
string languageId = AutoDetectGoogleLanguage(sb.ToString(), subtitle.Paragraphs.Count / 14);
|
||||
if (string.IsNullOrEmpty(languageId))
|
||||
languageId = null;
|
||||
|
||||
return languageId;
|
||||
}
|
||||
|
||||
public static string AutoDetectLanguageName(string languageName, Subtitle subtitle)
|
||||
{
|
||||
if (string.IsNullOrEmpty(languageName))
|
||||
languageName = "en_US";
|
||||
int bestCount = subtitle.Paragraphs.Count / 14;
|
||||
|
||||
var sb = new StringBuilder();
|
||||
foreach (Paragraph p in subtitle.Paragraphs)
|
||||
sb.AppendLine(p.Text);
|
||||
string text = sb.ToString();
|
||||
|
||||
List<string> dictionaryNames = Utilities.GetDictionaryLanguages();
|
||||
|
||||
bool containsEnGb = false;
|
||||
bool containsEnUs = false;
|
||||
bool containsHrHr = false;
|
||||
bool containsSrLatn = false;
|
||||
foreach (string name in dictionaryNames)
|
||||
{
|
||||
if (name.Contains("[en_GB]"))
|
||||
containsEnGb = true;
|
||||
if (name.Contains("[en_US]"))
|
||||
containsEnUs = true;
|
||||
if (name.Contains("[hr_HR]"))
|
||||
containsHrHr = true;
|
||||
if (name.Contains("[sr-Latn]"))
|
||||
containsSrLatn = true;
|
||||
}
|
||||
|
||||
foreach (string name in dictionaryNames)
|
||||
{
|
||||
string shortName = string.Empty;
|
||||
int start = name.IndexOf('[');
|
||||
int end = name.IndexOf(']');
|
||||
if (start > 0 && end > start)
|
||||
{
|
||||
start++;
|
||||
shortName = name.Substring(start, end - start);
|
||||
}
|
||||
|
||||
int count;
|
||||
switch (shortName)
|
||||
{
|
||||
case "da_DK":
|
||||
count = GetCount(text, "vi", "hun", "og", "jeg", "var", "men", "bliver", "meget", "spørger", "Hej", "utrolig", "dejligt");
|
||||
if (count > bestCount)
|
||||
{
|
||||
int norweigianCount = GetCount(text, "ut", "deg", "meg", "merkelig", "mye", "spørre");
|
||||
if (norweigianCount < 2)
|
||||
languageName = shortName;
|
||||
}
|
||||
break;
|
||||
case "nb_NO":
|
||||
count = GetCount(text, AutoDetectWordsNorwegian);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int danishCount = GetCount(text, "siger", "dig", "mig", "mærkelig", "tilbage", "spørge");
|
||||
int dutchCount = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n");
|
||||
if (danishCount < 2 && dutchCount < count)
|
||||
languageName = shortName;
|
||||
}
|
||||
break;
|
||||
case "en_US":
|
||||
count = GetCount(text, AutoDetectWordsEnglish);
|
||||
if (count > bestCount)
|
||||
{
|
||||
languageName = shortName;
|
||||
if (containsEnGb)
|
||||
{
|
||||
int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor");
|
||||
int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour");
|
||||
if (gbCount > usCount)
|
||||
languageName = "en_GB";
|
||||
}
|
||||
}
|
||||
break;
|
||||
case "en_GB":
|
||||
count = GetCount(text, "we", "are", "and", "you", "your", "what");
|
||||
if (count > bestCount)
|
||||
{
|
||||
languageName = shortName;
|
||||
if (containsEnUs)
|
||||
{
|
||||
int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor");
|
||||
int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour");
|
||||
if (gbCount < usCount)
|
||||
languageName = "en_US";
|
||||
}
|
||||
}
|
||||
break;
|
||||
case "sv_SE":
|
||||
count = GetCount(text, "vi", "är", "och", "Jag", "inte", "för");
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "es_ES":
|
||||
count = GetCount(text, AutoDetectWordsSpanish);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int frenchWords = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words
|
||||
if (frenchWords < 2)
|
||||
languageName = shortName;
|
||||
}
|
||||
break;
|
||||
case "fr_FR":
|
||||
count = GetCount(text, AutoDetectWordsFrench);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int spanishWords = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como"); // not french words
|
||||
int italianWords = GetCount(text, AutoDetectWordsItalian); // not italian words
|
||||
if (spanishWords < 2 && italianWords < 2)
|
||||
languageName = shortName;
|
||||
}
|
||||
break;
|
||||
case "it_IT":
|
||||
count = GetCount(text, AutoDetectWordsItalian);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int frenchWords = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words
|
||||
int spanishWords = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como"); // not french words
|
||||
if (frenchWords < 2 && spanishWords < 2)
|
||||
languageName = shortName;
|
||||
}
|
||||
break;
|
||||
case "de_DE":
|
||||
count = GetCount(text, "und", "auch", "sich", "bin", "hast", "möchte");
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "nl_NL":
|
||||
count = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n");
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "pl_PL":
|
||||
count = GetCount(text, "Czy", "ale", "ty", "siê", "jest", "mnie");
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "el_GR":
|
||||
count = GetCount(text, AutoDetectWordsGreek);
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "ru_RU":
|
||||
count = GetCount(text, AutoDetectWordsRussian);
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "ro_RO":
|
||||
count = GetCount(text, "sînt", "aici", "Sînt", "domnule", "pentru", "Vreau", "trãiascã", "niciodatã", "înseamnã", "vorbesti", "oamenii", "Asteaptã",
|
||||
"fãcut", "Fãrã", "spune", "decât", "pentru", "vreau");
|
||||
if (count > bestCount)
|
||||
{
|
||||
languageName = shortName;
|
||||
}
|
||||
else
|
||||
{
|
||||
count = GetCount(text, "daca", "pentru", "acum", "soare", "trebuie", "Trebuie", "nevoie", "decat", "echilibrul", "vorbesti", "oamenii", "zeului",
|
||||
"vrea", "atunci", "Poate", "Acum", "memoria", "soarele");
|
||||
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
}
|
||||
break;
|
||||
case "hr_HR": // Croatian
|
||||
count = GetCount(text, AutoDetectWordsCroatianAndSerbian);
|
||||
if (count > bestCount)
|
||||
{
|
||||
languageName = shortName;
|
||||
if (containsSrLatn)
|
||||
{
|
||||
int croatianCount = GetCount(text, AutoDetectWordsCroatian);
|
||||
int serbianCount = GetCount(text, AutoDetectWordsSerbian);
|
||||
if (serbianCount > croatianCount)
|
||||
languageName = "sr-Latn";
|
||||
}
|
||||
}
|
||||
break;
|
||||
case "sr-Latn": // Serbian (Latin)
|
||||
count = GetCount(text, AutoDetectWordsCroatianAndSerbian);
|
||||
if (count > bestCount)
|
||||
{
|
||||
languageName = shortName;
|
||||
if (containsHrHr)
|
||||
{
|
||||
int croatianCount = GetCount(text, AutoDetectWordsCroatian);
|
||||
int serbianCount = GetCount(text, AutoDetectWordsSerbian);
|
||||
if (serbianCount < croatianCount)
|
||||
languageName = "hr_HR";
|
||||
}
|
||||
}
|
||||
break;
|
||||
case "pt_PT": // Portuguese
|
||||
count = GetCount(text, AutoDetectWordsPortuguese);
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "pt_BR": // Portuguese (Brasil)
|
||||
count = GetCount(text, AutoDetectWordsPortuguese);
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "hu_HU": // Hungarian
|
||||
count = GetCount(text, AutoDetectWordsHungarian);
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return languageName;
|
||||
}
|
||||
|
||||
public static Encoding DetectAnsiEncoding(byte[] buffer)
|
||||
{
|
||||
if (Utilities.IsRunningOnMono())
|
||||
return Encoding.Default;
|
||||
|
||||
try
|
||||
{
|
||||
Encoding encoding = DetectEncoding.EncodingTools.DetectInputCodepage(buffer);
|
||||
|
||||
Encoding greekEncoding = Encoding.GetEncoding(1253); // Greek
|
||||
if (GetCount(greekEncoding.GetString(buffer), AutoDetectWordsGreek) > 5)
|
||||
return greekEncoding;
|
||||
|
||||
Encoding russianEncoding = Encoding.GetEncoding(1251); // Cyrillic
|
||||
if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5) // Russian
|
||||
return russianEncoding;
|
||||
if (GetCount(russianEncoding.GetString(buffer), "Какво", "тук", "може", "Как", "Ваше", "какво") > 5) // Bulgarian
|
||||
return russianEncoding;
|
||||
|
||||
russianEncoding = Encoding.GetEncoding(28595); // Russian
|
||||
if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5) // Russian
|
||||
return russianEncoding;
|
||||
|
||||
Encoding thaiEncoding = Encoding.GetEncoding(874); // Thai
|
||||
if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล") + GetCount(thaiEncoding.GetString(buffer), "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5)
|
||||
return thaiEncoding;
|
||||
|
||||
Encoding arabicEncoding = Encoding.GetEncoding(28596); // Arabic
|
||||
Encoding hewbrewEncoding = Encoding.GetEncoding(28598); // Hebrew
|
||||
if (GetCount(arabicEncoding.GetString(buffer), "من", "هل", "لا", "فى", "لقد", "ما") > 5)
|
||||
{
|
||||
if (GetCount(hewbrewEncoding.GetString(buffer), "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10)
|
||||
return hewbrewEncoding;
|
||||
return arabicEncoding;
|
||||
}
|
||||
if (GetCount(hewbrewEncoding.GetString(buffer), AutoDetectWordsHebrew) > 5)
|
||||
return hewbrewEncoding;
|
||||
|
||||
return encoding;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return Encoding.Default;
|
||||
}
|
||||
}
|
||||
|
||||
public static Encoding GetEncodingFromFile(string fileName)
|
||||
{
|
||||
var encoding = Encoding.Default;
|
||||
|
||||
try
|
||||
{
|
||||
foreach (EncodingInfo ei in Encoding.GetEncodings())
|
||||
{
|
||||
if (ei.CodePage + ": " + ei.DisplayName == Configuration.Settings.General.DefaultEncoding &&
|
||||
ei.Name != Encoding.UTF8.BodyName &&
|
||||
ei.Name != Encoding.Unicode.BodyName)
|
||||
{
|
||||
encoding = ei.GetEncoding();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
using (var file = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
|
||||
{
|
||||
var bom = new byte[12]; // Get the byte-order mark, if there is one
|
||||
file.Position = 0;
|
||||
file.Read(bom, 0, 12);
|
||||
if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf)
|
||||
encoding = Encoding.UTF8;
|
||||
else if (bom[0] == 0xff && bom[1] == 0xfe)
|
||||
encoding = Encoding.Unicode;
|
||||
else if (bom[0] == 0xfe && bom[1] == 0xff) // utf-16 and ucs-2
|
||||
encoding = Encoding.BigEndianUnicode;
|
||||
else if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) // ucs-4
|
||||
encoding = Encoding.UTF32;
|
||||
else if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76 && (bom[3] == 0x38 || bom[3] == 0x39 || bom[3] == 0x2b || bom[3] == 0x2f)) // utf-7
|
||||
encoding = Encoding.UTF7;
|
||||
else if (file.Length > 12)
|
||||
{
|
||||
long length = file.Length;
|
||||
if (length > 500000)
|
||||
length = 500000;
|
||||
|
||||
file.Position = 0;
|
||||
var buffer = new byte[length];
|
||||
file.Read(buffer, 0, (int)length);
|
||||
|
||||
bool couldBeUtf8;
|
||||
if (IsUtf8(buffer, out couldBeUtf8))
|
||||
{
|
||||
encoding = Encoding.UTF8;
|
||||
}
|
||||
else if (couldBeUtf8 && Configuration.Settings.General.DefaultEncoding == Encoding.UTF8.BodyName)
|
||||
{ // keep utf-8 encoding if it's default
|
||||
encoding = Encoding.UTF8;
|
||||
}
|
||||
else if (couldBeUtf8 && fileName.EndsWith(".xml", StringComparison.OrdinalIgnoreCase) && Encoding.Default.GetString(buffer).ToLower().Replace('\'', '"').Contains("encoding=\"utf-8\""))
|
||||
{ // keep utf-8 encoding for xml files with utf-8 in header (without any utf-8 encoded characters, but with only allowed utf-8 characters)
|
||||
encoding = Encoding.UTF8;
|
||||
}
|
||||
else if (Configuration.Settings.General.AutoGuessAnsiEncoding)
|
||||
{
|
||||
encoding = DetectAnsiEncoding(buffer);
|
||||
|
||||
Encoding greekEncoding = Encoding.GetEncoding(1253); // Greek
|
||||
if (GetCount(greekEncoding.GetString(buffer), AutoDetectWordsGreek) > 5)
|
||||
return greekEncoding;
|
||||
|
||||
Encoding russianEncoding = Encoding.GetEncoding(1251); // Cyrillic
|
||||
if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5) // Russian
|
||||
return russianEncoding;
|
||||
if (GetCount(russianEncoding.GetString(buffer), "Какво", "тук", "може", "Как", "Ваше", "какво") > 5) // Bulgarian
|
||||
return russianEncoding;
|
||||
russianEncoding = Encoding.GetEncoding(28595); // Russian
|
||||
if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5)
|
||||
return russianEncoding;
|
||||
|
||||
Encoding thaiEncoding = Encoding.GetEncoding(874); // Thai
|
||||
if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล") + GetCount(thaiEncoding.GetString(buffer), "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5)
|
||||
return thaiEncoding;
|
||||
|
||||
Encoding arabicEncoding = Encoding.GetEncoding(28596); // Arabic
|
||||
Encoding hewbrewEncoding = Encoding.GetEncoding(28598); // Hebrew
|
||||
if (GetCount(arabicEncoding.GetString(buffer), "من", "هل", "لا", "فى", "لقد", "ما") > 5)
|
||||
{
|
||||
if (GetCount(hewbrewEncoding.GetString(buffer), "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10)
|
||||
return hewbrewEncoding;
|
||||
return arabicEncoding;
|
||||
}
|
||||
if (GetCount(hewbrewEncoding.GetString(buffer), AutoDetectWordsHebrew) > 5)
|
||||
return hewbrewEncoding;
|
||||
|
||||
Encoding romanianEncoding = Encoding.GetEncoding(1250); // Romanian
|
||||
if (GetCount(romanianEncoding.GetString(buffer), "să", "şi", "văzut", "regulă", "găsit", "viaţă") > 99)
|
||||
return romanianEncoding;
|
||||
|
||||
Encoding koreanEncoding = Encoding.GetEncoding(949); // Korean
|
||||
if (GetCount(koreanEncoding.GetString(buffer), "그리고", "아니야", "하지만", "말이야", "그들은", "우리가") > 5)
|
||||
return koreanEncoding;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
}
|
||||
return encoding;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Will try to determine if buffer is utf-8 encoded or not.
|
||||
/// If any non-utf8 sequences are found then false is returned, if no utf8 multibytes sequences are found then false is returned.
|
||||
/// </summary>
|
||||
private static bool IsUtf8(byte[] buffer, out bool couldBeUtf8)
|
||||
{
|
||||
couldBeUtf8 = false;
|
||||
int utf8Count = 0;
|
||||
int i = 0;
|
||||
while (i < buffer.Length - 3)
|
||||
{
|
||||
byte b = buffer[i];
|
||||
if (b > 127)
|
||||
{
|
||||
if (b >= 194 && b <= 223 && buffer[i + 1] >= 128 && buffer[i + 1] <= 191)
|
||||
{ // 2-byte sequence
|
||||
utf8Count++;
|
||||
i++;
|
||||
}
|
||||
else if (b >= 224 && b <= 239 && buffer[i + 1] >= 128 && buffer[i + 1] <= 191 &&
|
||||
buffer[i + 2] >= 128 && buffer[i + 2] <= 191)
|
||||
{ // 3-byte sequence
|
||||
utf8Count++;
|
||||
i += 2;
|
||||
}
|
||||
else if (b >= 240 && b <= 244 && buffer[i + 1] >= 128 && buffer[i + 1] <= 191 &&
|
||||
buffer[i + 2] >= 128 && buffer[i + 2] <= 191 &&
|
||||
buffer[i + 3] >= 128 && buffer[i + 3] <= 191)
|
||||
{ // 4-byte sequence
|
||||
utf8Count++;
|
||||
i += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
couldBeUtf8 = true;
|
||||
if (utf8Count == 0)
|
||||
return false; // not utf-8 (no characters utf-8 encoded...)
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -173,6 +173,7 @@
|
||||
<Compile Include="ImageSplitter.cs" />
|
||||
<Compile Include="ImageSplitterItem.cs" />
|
||||
<Compile Include="Language.cs" />
|
||||
<Compile Include="LanguageAutoDetect.cs" />
|
||||
<Compile Include="LanguageDeserializer.cs" />
|
||||
<Compile Include="LanguageStructure.cs" />
|
||||
<Compile Include="ManagedBitmap.cs" />
|
||||
|
@ -139,7 +139,7 @@ namespace Nikse.SubtitleEdit.Core
|
||||
{
|
||||
try
|
||||
{
|
||||
sr = new StreamReader(fileName, Utilities.GetEncodingFromFile(fileName), true);
|
||||
sr = new StreamReader(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName), true);
|
||||
}
|
||||
catch
|
||||
{
|
||||
|
@ -215,7 +215,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
|
||||
}
|
||||
}
|
||||
|
||||
var language = Utilities.AutoDetectGoogleLanguage(subtitle);
|
||||
var language = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle);
|
||||
if (language == "he") // Hebrew
|
||||
{
|
||||
_languageIdLine1 = LanguageIdHebrew;
|
||||
|
@ -82,7 +82,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
|
||||
string languageEnglishName;
|
||||
try
|
||||
{
|
||||
string languageShortName = Utilities.AutoDetectGoogleLanguage(subtitle);
|
||||
string languageShortName = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle);
|
||||
var ci = CultureInfo.CreateSpecificCulture(languageShortName);
|
||||
languageEnglishName = ci.EnglishName;
|
||||
int indexOfStartP = languageEnglishName.IndexOf('(');
|
||||
|
@ -39,7 +39,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
|
||||
|
||||
public override string ToText(Subtitle subtitle, string title)
|
||||
{
|
||||
string language = Utilities.AutoDetectLanguageName("en_US", subtitle);
|
||||
string language = LanguageAutoDetect.AutoDetectLanguageName("en_US", subtitle);
|
||||
var ci = CultureInfo.GetCultureInfo(language.Replace("_", "-"));
|
||||
string languageTag = string.Format("{0}CC", language.Replace("_", string.Empty).ToUpper());
|
||||
string languageName = ci.Parent.EnglishName;
|
||||
|
@ -699,7 +699,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
|
||||
var sb = new StringBuilder();
|
||||
sb.AppendLine("Scenarist_SCC V1.0");
|
||||
sb.AppendLine();
|
||||
string language = Utilities.AutoDetectGoogleLanguage(subtitle);
|
||||
string language = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle);
|
||||
for (int i = 0; i < subtitle.Paragraphs.Count; i++)
|
||||
{
|
||||
Paragraph p = subtitle.Paragraphs[i];
|
||||
|
@ -40,7 +40,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
|
||||
" <body />" + Environment.NewLine +
|
||||
"</tmx>";
|
||||
|
||||
string lang = Utilities.AutoDetectLanguageName("en_US", subtitle);
|
||||
string lang = LanguageAutoDetect.AutoDetectLanguageName("en_US", subtitle);
|
||||
if (lang.StartsWith("en_"))
|
||||
lang = "EN";
|
||||
else if (lang.Length == 5)
|
||||
|
@ -744,202 +744,6 @@ namespace Nikse.SubtitleEdit.Core
|
||||
return s;
|
||||
}
|
||||
|
||||
public static Encoding GetEncodingFromFile(string fileName)
|
||||
{
|
||||
Encoding encoding = Encoding.Default;
|
||||
|
||||
try
|
||||
{
|
||||
foreach (EncodingInfo ei in Encoding.GetEncodings())
|
||||
{
|
||||
if (ei.CodePage + ": " + ei.DisplayName == Configuration.Settings.General.DefaultEncoding &&
|
||||
ei.Name != Encoding.UTF8.BodyName &&
|
||||
ei.Name != Encoding.Unicode.BodyName)
|
||||
{
|
||||
encoding = ei.GetEncoding();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
using (var file = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
|
||||
{
|
||||
var bom = new byte[12]; // Get the byte-order mark, if there is one
|
||||
file.Position = 0;
|
||||
file.Read(bom, 0, 12);
|
||||
if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf)
|
||||
encoding = Encoding.UTF8;
|
||||
else if (bom[0] == 0xff && bom[1] == 0xfe)
|
||||
encoding = Encoding.Unicode;
|
||||
else if (bom[0] == 0xfe && bom[1] == 0xff) // utf-16 and ucs-2
|
||||
encoding = Encoding.BigEndianUnicode;
|
||||
else if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) // ucs-4
|
||||
encoding = Encoding.UTF32;
|
||||
else if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76 && (bom[3] == 0x38 || bom[3] == 0x39 || bom[3] == 0x2b || bom[3] == 0x2f)) // utf-7
|
||||
encoding = Encoding.UTF7;
|
||||
else if (file.Length > 12)
|
||||
{
|
||||
long length = file.Length;
|
||||
if (length > 500000)
|
||||
length = 500000;
|
||||
|
||||
file.Position = 0;
|
||||
var buffer = new byte[length];
|
||||
file.Read(buffer, 0, (int)length);
|
||||
|
||||
bool couldBeUtf8;
|
||||
if (IsUtf8(buffer, out couldBeUtf8))
|
||||
{
|
||||
encoding = Encoding.UTF8;
|
||||
}
|
||||
else if (couldBeUtf8 && Configuration.Settings.General.DefaultEncoding == Encoding.UTF8.BodyName)
|
||||
{ // keep utf-8 encoding if it's default
|
||||
encoding = Encoding.UTF8;
|
||||
}
|
||||
else if (couldBeUtf8 && fileName.EndsWith(".xml", StringComparison.OrdinalIgnoreCase) && Encoding.Default.GetString(buffer).ToLower().Replace('\'', '"').Contains("encoding=\"utf-8\""))
|
||||
{ // keep utf-8 encoding for xml files with utf-8 in header (without any utf-8 encoded characters, but with only allowed utf-8 characters)
|
||||
encoding = Encoding.UTF8;
|
||||
}
|
||||
else if (Configuration.Settings.General.AutoGuessAnsiEncoding)
|
||||
{
|
||||
encoding = DetectAnsiEncoding(buffer);
|
||||
|
||||
Encoding greekEncoding = Encoding.GetEncoding(1253); // Greek
|
||||
if (GetCount(greekEncoding.GetString(buffer), AutoDetectWordsGreek) > 5)
|
||||
return greekEncoding;
|
||||
|
||||
Encoding russianEncoding = Encoding.GetEncoding(1251); // Cyrillic
|
||||
if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5) // Russian
|
||||
return russianEncoding;
|
||||
if (GetCount(russianEncoding.GetString(buffer), "Какво", "тук", "може", "Как", "Ваше", "какво") > 5) // Bulgarian
|
||||
return russianEncoding;
|
||||
russianEncoding = Encoding.GetEncoding(28595); // Russian
|
||||
if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5)
|
||||
return russianEncoding;
|
||||
|
||||
Encoding thaiEncoding = Encoding.GetEncoding(874); // Thai
|
||||
if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล") + GetCount(thaiEncoding.GetString(buffer), "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5)
|
||||
return thaiEncoding;
|
||||
|
||||
Encoding arabicEncoding = Encoding.GetEncoding(28596); // Arabic
|
||||
Encoding hewbrewEncoding = Encoding.GetEncoding(28598); // Hebrew
|
||||
if (GetCount(arabicEncoding.GetString(buffer), "من", "هل", "لا", "فى", "لقد", "ما") > 5)
|
||||
{
|
||||
if (GetCount(hewbrewEncoding.GetString(buffer), "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10)
|
||||
return hewbrewEncoding;
|
||||
return arabicEncoding;
|
||||
}
|
||||
if (GetCount(hewbrewEncoding.GetString(buffer), AutoDetectWordsHebrew) > 5)
|
||||
return hewbrewEncoding;
|
||||
|
||||
Encoding romanianEncoding = Encoding.GetEncoding(1250); // Romanian
|
||||
if (GetCount(romanianEncoding.GetString(buffer), "să", "şi", "văzut", "regulă", "găsit", "viaţă") > 99)
|
||||
return romanianEncoding;
|
||||
|
||||
Encoding koreanEncoding = Encoding.GetEncoding(949); // Korean
|
||||
if (GetCount(koreanEncoding.GetString(buffer), "그리고", "아니야", "하지만", "말이야", "그들은", "우리가") > 5)
|
||||
return koreanEncoding;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
}
|
||||
return encoding;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Will try to determine if buffer is utf-8 encoded or not.
|
||||
/// If any non-utf8 sequences are found then false is returned, if no utf8 multibytes sequences are found then false is returned.
|
||||
/// </summary>
|
||||
private static bool IsUtf8(byte[] buffer, out bool couldBeUtf8)
|
||||
{
|
||||
couldBeUtf8 = false;
|
||||
int utf8Count = 0;
|
||||
int i = 0;
|
||||
while (i < buffer.Length - 3)
|
||||
{
|
||||
byte b = buffer[i];
|
||||
if (b > 127)
|
||||
{
|
||||
if (b >= 194 && b <= 223 && buffer[i + 1] >= 128 && buffer[i + 1] <= 191)
|
||||
{ // 2-byte sequence
|
||||
utf8Count++;
|
||||
i++;
|
||||
}
|
||||
else if (b >= 224 && b <= 239 && buffer[i + 1] >= 128 && buffer[i + 1] <= 191 &&
|
||||
buffer[i + 2] >= 128 && buffer[i + 2] <= 191)
|
||||
{ // 3-byte sequence
|
||||
utf8Count++;
|
||||
i += 2;
|
||||
}
|
||||
else if (b >= 240 && b <= 244 && buffer[i + 1] >= 128 && buffer[i + 1] <= 191 &&
|
||||
buffer[i + 2] >= 128 && buffer[i + 2] <= 191 &&
|
||||
buffer[i + 3] >= 128 && buffer[i + 3] <= 191)
|
||||
{ // 4-byte sequence
|
||||
utf8Count++;
|
||||
i += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
couldBeUtf8 = true;
|
||||
if (utf8Count == 0)
|
||||
return false; // not utf-8 (no characters utf-8 encoded...)
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public static Encoding DetectAnsiEncoding(byte[] buffer)
|
||||
{
|
||||
if (IsRunningOnMono())
|
||||
return Encoding.Default;
|
||||
|
||||
try
|
||||
{
|
||||
Encoding encoding = DetectEncoding.EncodingTools.DetectInputCodepage(buffer);
|
||||
|
||||
Encoding greekEncoding = Encoding.GetEncoding(1253); // Greek
|
||||
if (GetCount(greekEncoding.GetString(buffer), AutoDetectWordsGreek) > 5)
|
||||
return greekEncoding;
|
||||
|
||||
Encoding russianEncoding = Encoding.GetEncoding(1251); // Cyrillic
|
||||
if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5) // Russian
|
||||
return russianEncoding;
|
||||
if (GetCount(russianEncoding.GetString(buffer), "Какво", "тук", "може", "Как", "Ваше", "какво") > 5) // Bulgarian
|
||||
return russianEncoding;
|
||||
|
||||
russianEncoding = Encoding.GetEncoding(28595); // Russian
|
||||
if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5) // Russian
|
||||
return russianEncoding;
|
||||
|
||||
Encoding thaiEncoding = Encoding.GetEncoding(874); // Thai
|
||||
if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล") + GetCount(thaiEncoding.GetString(buffer), "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5)
|
||||
return thaiEncoding;
|
||||
|
||||
Encoding arabicEncoding = Encoding.GetEncoding(28596); // Arabic
|
||||
Encoding hewbrewEncoding = Encoding.GetEncoding(28598); // Hebrew
|
||||
if (GetCount(arabicEncoding.GetString(buffer), "من", "هل", "لا", "فى", "لقد", "ما") > 5)
|
||||
{
|
||||
if (GetCount(hewbrewEncoding.GetString(buffer), "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10)
|
||||
return hewbrewEncoding;
|
||||
return arabicEncoding;
|
||||
}
|
||||
if (GetCount(hewbrewEncoding.GetString(buffer), AutoDetectWordsHebrew) > 5)
|
||||
return hewbrewEncoding;
|
||||
|
||||
return encoding;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return Encoding.Default;
|
||||
}
|
||||
}
|
||||
|
||||
public static string DictionaryFolder
|
||||
{
|
||||
get
|
||||
@ -995,513 +799,6 @@ namespace Nikse.SubtitleEdit.Core
|
||||
return duration;
|
||||
}
|
||||
|
||||
private static int GetCount(string text, params string[] words)
|
||||
{
|
||||
int count = 0;
|
||||
for (int i = 0; i < words.Length; i++)
|
||||
{
|
||||
count += Regex.Matches(text, "\\b" + words[i] + "\\b", (RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture)).Count;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private static int GetCountContains(string text, params string[] words)
|
||||
{
|
||||
int count = 0;
|
||||
for (int i = 0; i < words.Length; i++)
|
||||
{
|
||||
var regEx = new Regex(words[i]);
|
||||
count += regEx.Matches(text).Count;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
public static string AutoDetectGoogleLanguage(Encoding encoding)
|
||||
{
|
||||
switch (encoding.CodePage)
|
||||
{
|
||||
case 860:
|
||||
return "pt"; // Portuguese
|
||||
case 28599:
|
||||
case 1254:
|
||||
return "tr"; // Turkish
|
||||
case 28598:
|
||||
case 1255:
|
||||
return "he"; // Hebrew
|
||||
case 28596:
|
||||
case 1256:
|
||||
return "ar"; // Arabic
|
||||
case 1258:
|
||||
return "vi"; // Vietnamese
|
||||
case 949:
|
||||
case 1361:
|
||||
case 20949:
|
||||
case 51949:
|
||||
case 50225:
|
||||
return "ko"; // Korean
|
||||
case 1253:
|
||||
case 28597:
|
||||
return "el"; // Greek
|
||||
case 50220:
|
||||
case 50221:
|
||||
case 50222:
|
||||
case 51932:
|
||||
case 20932:
|
||||
case 10001:
|
||||
return "ja"; // Japanese
|
||||
case 20000:
|
||||
case 20002:
|
||||
case 20936:
|
||||
case 950:
|
||||
case 52936:
|
||||
case 54936:
|
||||
case 51936:
|
||||
return "zh"; // Chinese
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static readonly string[] AutoDetectWordsEnglish = { "we", "are", "and", "you", "your", "what" };
|
||||
public static readonly string[] AutoDetectWordsDanish = { "vi", "han", "og", "jeg", "var", "men", "gider", "bliver", "virkelig", "kommer", "tilbage", "Hej" };
|
||||
public static readonly string[] AutoDetectWordsNorwegian = { "vi", "er", "og", "jeg", "var", "men" };
|
||||
public static readonly string[] AutoDetectWordsSwedish = { "vi", "är", "och", "Jag", "inte", "för" };
|
||||
public static readonly string[] AutoDetectWordsSpanish = { "el", "bien", "Vamos", "Hola", "casa", "con" };
|
||||
public static readonly string[] AutoDetectWordsFrench = { "un", "vous", "avec", "pas", "ce", "une" };
|
||||
public static readonly string[] AutoDetectWordsGerman = { "und", "auch", "sich", "bin", "hast", "möchte" };
|
||||
public static readonly string[] AutoDetectWordsDutch = { "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n" };
|
||||
public static readonly string[] AutoDetectWordsPolish = { "Czy", "ale", "ty", "siê", "jest", "mnie" };
|
||||
public static readonly string[] AutoDetectWordsItalian = { "Cosa", "sono", "Grazie", "Buongiorno", "bene", "questo", "ragazzi", "propriamente", "numero", "hanno", "giorno", "faccio", "davvero", "negativo", "essere", "vuole", "sensitivo", "venire" };
|
||||
public static readonly string[] AutoDetectWordsPortuguese = { "[Nn]ão", "Então", "Estás", "isso", "com" };
|
||||
public static readonly string[] AutoDetectWordsGreek = { "μου", "είναι", "Είναι", "αυτό", "Τόμπυ", "καλά", "Ενταξει", "Ενταξει", "πρεπει", "Λοιπον", "τιποτα", "ξερεις" };
|
||||
public static readonly string[] AutoDetectWordsRussian = { "Это", "не", "ты", "что", "это", "Мы", "Да", "Нет", "Ты", "нет", "Он", "его", "тебя", "как", "Не", "вы", "меня", "Но", "то", "всё", "бы", "мы", "мне", "вас", "знаю", "ещё", "за", "нас", "чтобы", "был" };
|
||||
public static readonly string[] AutoDetectWordsBulgarian = { "Какво", "тук", "може", "Как", "Ваше", "какво" };
|
||||
public static readonly string[] AutoDetectWordsRomanian = { "Какво", "тук", "може", "Как", "Ваше", "какво" };
|
||||
public static readonly string[] AutoDetectWordsArabic = { "Какво", "тук", "може", "Как", "Ваше", "какво" };
|
||||
public static readonly string[] AutoDetectWordsHebrew = { "אתה", "אולי", "הוא", "בסדר", "יודע", "טוב" };
|
||||
public static readonly string[] AutoDetectWordsVietnamese = { "không", "tôi", "anh", "đó", "Tôi", "ông" };
|
||||
public static readonly string[] AutoDetectWordsHungarian = { "hogy", "lesz", "tudom", "vagy", "mondtam", "még" };
|
||||
public static readonly string[] AutoDetectWordsTurkish = { "için", "Tamam", "Hayır", "benim", "daha", "deðil", "önce", "lazým", "benim", "çalýþýyor", "burada", "efendim" };
|
||||
public static readonly string[] AutoDetectWordsCroatianAndSerbian = { "sam", "ali", "nije", "samo", "ovo", "kako", "dobro", "sve", "tako", "će", "mogu", "ću", "zašto", "nešto", "za" };
|
||||
public static readonly string[] AutoDetectWordsCroatian = { "što", "ovdje", "gdje", "kamo", "tko", "prije", "uvijek", "vrijeme", "vidjeti", "netko",
|
||||
"vidio", "nitko", "bok", "lijepo", "oprosti", "htio", "mjesto", "oprostite", "čovjek", "dolje",
|
||||
"čovječe", "dvije", "dijete", "dio", "poslije", "događa", "vjerovati", "vjerojatno", "vjerujem", "točno",
|
||||
"razumijem", "vidjela", "cijeli", "svijet", "obitelj", "volio", "sretan", "dovraga", "svijetu", "htjela",
|
||||
"vidjeli", "negdje", "želio", "ponovno", "djevojka", "umrijeti", "čovjeka", "mjesta", "djeca", "osjećam",
|
||||
"uopće", "djecu", "naprijed", "obitelji", "doista", "mjestu", "lijepa", "također", "riječ", "tijelo" };
|
||||
public static readonly string[] AutoDetectWordsSerbian = { "šta", "ovde", "gde", "ko", "pre", "uvek", "vreme", "videti", "neko",
|
||||
"video", "niko", "ćao", "lepo", "izvini", "hteo", "mesto", "izvinite", "čovek", "dole",
|
||||
"čoveče", "dve", "dete", "deo", "posle", "dešava", "verovati", "verovatno", "verujem", "tačno",
|
||||
"razumem", "videla", "ceo", "svet", "porodica", "voleo", "srećan", "dođavola", "svetu", "htela",
|
||||
"videli", "negde", "želeo", "ponovo", "devojka", "umreti", "čoveka", "mesta", "deca", "osećam",
|
||||
"uopšte", "decu", "napred", "porodicu", "zaista", "mestu", "lepa", "takođe", "reč", "telo" };
|
||||
|
||||
public static string AutoDetectGoogleLanguage(string text, int bestCount)
|
||||
{
|
||||
int count = GetCount(text, AutoDetectWordsEnglish);
|
||||
if (count > bestCount)
|
||||
return "en";
|
||||
|
||||
count = GetCount(text, AutoDetectWordsDanish);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int norwegianCount = GetCount(text, "ut", "deg", "meg", "merkelig", "mye", "spørre");
|
||||
int dutchCount = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n");
|
||||
if (norwegianCount < 2 && dutchCount < count)
|
||||
return "da";
|
||||
}
|
||||
|
||||
count = GetCount(text, AutoDetectWordsNorwegian);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int danishCount = GetCount(text, "siger", "dig", "mig", "mærkelig", "tilbage", "spørge");
|
||||
int dutchCount = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n");
|
||||
if (danishCount < 2 && dutchCount < count)
|
||||
return "no";
|
||||
}
|
||||
|
||||
count = GetCount(text, AutoDetectWordsSwedish);
|
||||
if (count > bestCount)
|
||||
return "sv";
|
||||
|
||||
count = GetCount(text, AutoDetectWordsSpanish);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int frenchCount = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words
|
||||
int portugueseCount = GetCount(text, "[NnCc]ão", "Então", "h?ouve", "pessoal", "rapariga", "tivesse", "fizeste",
|
||||
"jantar", "conheço", "atenção", "foste", "milhões", "devias", "ganhar", "raios"); // not spanish words
|
||||
if (frenchCount < 2 && portugueseCount < 2)
|
||||
return "es";
|
||||
}
|
||||
|
||||
count = GetCount(text, AutoDetectWordsItalian);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int frenchCount = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not italian words
|
||||
if (frenchCount < 2)
|
||||
return "it";
|
||||
}
|
||||
|
||||
count = GetCount(text, AutoDetectWordsFrench);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int romanianCount = GetCount(text, "[Ss]înt", "aici", "domnule", "pentru", "Vreau");
|
||||
if (romanianCount < 5)
|
||||
return "fr";
|
||||
}
|
||||
|
||||
count = GetCount(text, AutoDetectWordsPortuguese);
|
||||
if (count > bestCount)
|
||||
return "pt"; // Portuguese
|
||||
|
||||
count = GetCount(text, AutoDetectWordsGerman);
|
||||
if (count > bestCount)
|
||||
return "de";
|
||||
|
||||
count = GetCount(text, AutoDetectWordsDutch);
|
||||
if (count > bestCount)
|
||||
return "nl";
|
||||
|
||||
count = GetCount(text, AutoDetectWordsPolish);
|
||||
if (count > bestCount)
|
||||
return "pl";
|
||||
|
||||
count = GetCount(text, AutoDetectWordsGreek);
|
||||
if (count > bestCount)
|
||||
return "el"; // Greek
|
||||
|
||||
count = GetCount(text, AutoDetectWordsRussian);
|
||||
if (count > bestCount)
|
||||
return "ru"; // Russian
|
||||
|
||||
count = GetCount(text, AutoDetectWordsBulgarian);
|
||||
if (count > bestCount)
|
||||
return "bg"; // Bulgarian
|
||||
|
||||
count = GetCount(text, AutoDetectWordsArabic);
|
||||
if (count > bestCount)
|
||||
{
|
||||
if (GetCount(text, "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10)
|
||||
return "he";
|
||||
|
||||
int romanianCount = GetCount(text, "sînt", "aici", "Sînt", "domnule", "pentru", "Vreau", "trãiascã", "niciodatã", "înseamnã",
|
||||
"vorbesti", "oamenii", "Asteaptã", "fãcut", "Fãrã", "spune", "decât", "pentru", "vreau");
|
||||
if (romanianCount > count)
|
||||
return "ro"; // Romanian
|
||||
|
||||
romanianCount = GetCount(text, "daca", "pentru", "acum", "soare", "trebuie", "Trebuie", "nevoie", "decat", "echilibrul",
|
||||
"vorbesti", "oamenii", "zeului", "vrea", "atunci", "Poate", "Acum", "memoria", "soarele");
|
||||
if (romanianCount > count)
|
||||
return "ro"; // Romanian
|
||||
|
||||
return "ar"; // Arabic
|
||||
}
|
||||
|
||||
count = GetCount(text, AutoDetectWordsHebrew);
|
||||
if (count > bestCount)
|
||||
return "he"; // Hebrew
|
||||
|
||||
count = GetCount(text, AutoDetectWordsCroatianAndSerbian);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int croatianCount = GetCount(text, AutoDetectWordsCroatian);
|
||||
int serbianCount = GetCount(text, AutoDetectWordsSerbian);
|
||||
if (croatianCount > serbianCount)
|
||||
return "hr"; // Croatian
|
||||
|
||||
return "sr"; // Serbian
|
||||
}
|
||||
|
||||
count = GetCount(text, AutoDetectWordsVietnamese);
|
||||
if (count > bestCount)
|
||||
return "vi"; // Vietnamese
|
||||
|
||||
count = GetCount(text, AutoDetectWordsHungarian);
|
||||
if (count > bestCount)
|
||||
return "hu"; // Hungarian
|
||||
|
||||
count = GetCount(text, AutoDetectWordsTurkish);
|
||||
if (count > bestCount)
|
||||
return "tr"; // Turkish
|
||||
|
||||
count = GetCount(text, "yang", "tahu", "bisa", "akan", "tahun", "tapi", "dengan", "untuk", "rumah", "dalam", "sudah", "bertemu");
|
||||
if (count > bestCount)
|
||||
return "id"; // Indonesian
|
||||
|
||||
count = GetCount(text, "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล", "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์");
|
||||
if (count > 10 || count > bestCount)
|
||||
return "th"; // Thai
|
||||
|
||||
count = GetCount(text, "그리고", "아니야", "하지만", "말이야", "그들은", "우리가");
|
||||
if (count > 10 || count > bestCount)
|
||||
return "ko"; // Korean
|
||||
|
||||
count = GetCount(text, "että", "kuin", "minä", "mitään", "Mutta", "siitä", "täällä", "poika", "Kiitos", "enää", "vielä", "tässä");
|
||||
if (count > bestCount)
|
||||
return "fi"; // Finnish
|
||||
|
||||
count = GetCount(text, "sînt", "aici", "Sînt", "domnule", "pentru", "Vreau", "trãiascã", "niciodatã", "înseamnã", "vorbesti", "oamenii",
|
||||
"Asteaptã", "fãcut", "Fãrã", "spune", "decât", "pentru", "vreau");
|
||||
if (count > bestCount)
|
||||
return "ro"; // Romanian
|
||||
|
||||
count = GetCount(text, "daca", "pentru", "acum", "soare", "trebuie", "Trebuie", "nevoie", "decat", "echilibrul", "vorbesti", "oamenii",
|
||||
"zeului", "vrea", "atunci", "Poate", "Acum", "memoria", "soarele");
|
||||
if (count > bestCount)
|
||||
return "ro"; // Romanian
|
||||
|
||||
count = GetCountContains(text, "シ", "ュ", "シン", "シ", "ン", "ユ");
|
||||
count += GetCountContains(text, "イ", "ン", "チ", "ェ", "ク", "ハ");
|
||||
count += GetCountContains(text, "シ", "ュ", "う", "シ", "ン", "サ");
|
||||
count += GetCountContains(text, "シ", "ュ", "シ", "ン", "だ", "う");
|
||||
if (count > bestCount * 2)
|
||||
return "ja"; // Japanese - not tested...
|
||||
|
||||
count = GetCountContains(text, "是", "是早", "吧", "的", "爱", "上好");
|
||||
count += GetCountContains(text, "的", "啊", "好", "好", "亲", "的");
|
||||
count += GetCountContains(text, "谢", "走", "吧", "晚", "上", "好");
|
||||
count += GetCountContains(text, "来", "卡", "拉", "吐", "滚", "他");
|
||||
if (count > bestCount * 2)
|
||||
return "zh"; // Chinese (simplified) - not tested...
|
||||
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
public static string AutoDetectGoogleLanguage(Subtitle subtitle)
|
||||
{
|
||||
string languageId = AutoDetectGoogleLanguageOrNull(subtitle);
|
||||
if (languageId == null)
|
||||
languageId = "en";
|
||||
|
||||
return languageId;
|
||||
}
|
||||
|
||||
public static string AutoDetectGoogleLanguageOrNull(Subtitle subtitle)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
foreach (Paragraph p in subtitle.Paragraphs)
|
||||
sb.AppendLine(p.Text);
|
||||
|
||||
string languageId = AutoDetectGoogleLanguage(sb.ToString(), subtitle.Paragraphs.Count / 14);
|
||||
if (string.IsNullOrEmpty(languageId))
|
||||
languageId = null;
|
||||
|
||||
return languageId;
|
||||
}
|
||||
|
||||
public static string AutoDetectLanguageName(string languageName, Subtitle subtitle)
|
||||
{
|
||||
if (string.IsNullOrEmpty(languageName))
|
||||
languageName = "en_US";
|
||||
int bestCount = subtitle.Paragraphs.Count / 14;
|
||||
|
||||
var sb = new StringBuilder();
|
||||
foreach (Paragraph p in subtitle.Paragraphs)
|
||||
sb.AppendLine(p.Text);
|
||||
string text = sb.ToString();
|
||||
|
||||
List<string> dictionaryNames = GetDictionaryLanguages();
|
||||
|
||||
bool containsEnGb = false;
|
||||
bool containsEnUs = false;
|
||||
bool containsHrHr = false;
|
||||
bool containsSrLatn = false;
|
||||
foreach (string name in dictionaryNames)
|
||||
{
|
||||
if (name.Contains("[en_GB]"))
|
||||
containsEnGb = true;
|
||||
if (name.Contains("[en_US]"))
|
||||
containsEnUs = true;
|
||||
if (name.Contains("[hr_HR]"))
|
||||
containsHrHr = true;
|
||||
if (name.Contains("[sr-Latn]"))
|
||||
containsSrLatn = true;
|
||||
}
|
||||
|
||||
foreach (string name in dictionaryNames)
|
||||
{
|
||||
string shortName = string.Empty;
|
||||
int start = name.IndexOf('[');
|
||||
int end = name.IndexOf(']');
|
||||
if (start > 0 && end > start)
|
||||
{
|
||||
start++;
|
||||
shortName = name.Substring(start, end - start);
|
||||
}
|
||||
|
||||
int count;
|
||||
switch (shortName)
|
||||
{
|
||||
case "da_DK":
|
||||
count = GetCount(text, "vi", "hun", "og", "jeg", "var", "men", "bliver", "meget", "spørger", "Hej", "utrolig", "dejligt");
|
||||
if (count > bestCount)
|
||||
{
|
||||
int norweigianCount = GetCount(text, "ut", "deg", "meg", "merkelig", "mye", "spørre");
|
||||
if (norweigianCount < 2)
|
||||
languageName = shortName;
|
||||
}
|
||||
break;
|
||||
case "nb_NO":
|
||||
count = GetCount(text, AutoDetectWordsNorwegian);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int danishCount = GetCount(text, "siger", "dig", "mig", "mærkelig", "tilbage", "spørge");
|
||||
int dutchCount = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n");
|
||||
if (danishCount < 2 && dutchCount < count)
|
||||
languageName = shortName;
|
||||
}
|
||||
break;
|
||||
case "en_US":
|
||||
count = GetCount(text, AutoDetectWordsEnglish);
|
||||
if (count > bestCount)
|
||||
{
|
||||
languageName = shortName;
|
||||
if (containsEnGb)
|
||||
{
|
||||
int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor");
|
||||
int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour");
|
||||
if (gbCount > usCount)
|
||||
languageName = "en_GB";
|
||||
}
|
||||
}
|
||||
break;
|
||||
case "en_GB":
|
||||
count = GetCount(text, "we", "are", "and", "you", "your", "what");
|
||||
if (count > bestCount)
|
||||
{
|
||||
languageName = shortName;
|
||||
if (containsEnUs)
|
||||
{
|
||||
int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor");
|
||||
int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour");
|
||||
if (gbCount < usCount)
|
||||
languageName = "en_US";
|
||||
}
|
||||
}
|
||||
break;
|
||||
case "sv_SE":
|
||||
count = GetCount(text, "vi", "är", "och", "Jag", "inte", "för");
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "es_ES":
|
||||
count = GetCount(text, AutoDetectWordsSpanish);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int frenchWords = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words
|
||||
if (frenchWords < 2)
|
||||
languageName = shortName;
|
||||
}
|
||||
break;
|
||||
case "fr_FR":
|
||||
count = GetCount(text, AutoDetectWordsFrench);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int spanishWords = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como"); // not french words
|
||||
int italianWords = GetCount(text, AutoDetectWordsItalian); // not italian words
|
||||
if (spanishWords < 2 && italianWords < 2)
|
||||
languageName = shortName;
|
||||
}
|
||||
break;
|
||||
case "it_IT":
|
||||
count = GetCount(text, AutoDetectWordsItalian);
|
||||
if (count > bestCount)
|
||||
{
|
||||
int frenchWords = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words
|
||||
int spanishWords = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como"); // not french words
|
||||
if (frenchWords < 2 && spanishWords < 2)
|
||||
languageName = shortName;
|
||||
}
|
||||
break;
|
||||
case "de_DE":
|
||||
count = GetCount(text, "und", "auch", "sich", "bin", "hast", "möchte");
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "nl_NL":
|
||||
count = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n");
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "pl_PL":
|
||||
count = GetCount(text, "Czy", "ale", "ty", "siê", "jest", "mnie");
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "el_GR":
|
||||
count = GetCount(text, AutoDetectWordsGreek);
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "ru_RU":
|
||||
count = GetCount(text, AutoDetectWordsRussian);
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "ro_RO":
|
||||
count = GetCount(text, "sînt", "aici", "Sînt", "domnule", "pentru", "Vreau", "trãiascã", "niciodatã", "înseamnã", "vorbesti", "oamenii", "Asteaptã",
|
||||
"fãcut", "Fãrã", "spune", "decât", "pentru", "vreau");
|
||||
if (count > bestCount)
|
||||
{
|
||||
languageName = shortName;
|
||||
}
|
||||
else
|
||||
{
|
||||
count = GetCount(text, "daca", "pentru", "acum", "soare", "trebuie", "Trebuie", "nevoie", "decat", "echilibrul", "vorbesti", "oamenii", "zeului",
|
||||
"vrea", "atunci", "Poate", "Acum", "memoria", "soarele");
|
||||
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
}
|
||||
break;
|
||||
case "hr_HR": // Croatian
|
||||
count = GetCount(text, AutoDetectWordsCroatianAndSerbian);
|
||||
if (count > bestCount)
|
||||
{
|
||||
languageName = shortName;
|
||||
if (containsSrLatn)
|
||||
{
|
||||
int croatianCount = GetCount(text, AutoDetectWordsCroatian);
|
||||
int serbianCount = GetCount(text, AutoDetectWordsSerbian);
|
||||
if (serbianCount > croatianCount)
|
||||
languageName = "sr-Latn";
|
||||
}
|
||||
}
|
||||
break;
|
||||
case "sr-Latn": // Serbian (Latin)
|
||||
count = GetCount(text, AutoDetectWordsCroatianAndSerbian);
|
||||
if (count > bestCount)
|
||||
{
|
||||
languageName = shortName;
|
||||
if (containsHrHr)
|
||||
{
|
||||
int croatianCount = GetCount(text, AutoDetectWordsCroatian);
|
||||
int serbianCount = GetCount(text, AutoDetectWordsSerbian);
|
||||
if (serbianCount < croatianCount)
|
||||
languageName = "hr_HR";
|
||||
}
|
||||
}
|
||||
break;
|
||||
case "pt_PT": // Portuguese
|
||||
count = GetCount(text, AutoDetectWordsPortuguese);
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "pt_BR": // Portuguese (Brasil)
|
||||
count = GetCount(text, AutoDetectWordsPortuguese);
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
case "hu_HU": // Hungarian
|
||||
count = GetCount(text, AutoDetectWordsHungarian);
|
||||
if (count > bestCount)
|
||||
languageName = shortName;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return languageName;
|
||||
}
|
||||
|
||||
public static string ColorToHex(Color c)
|
||||
{
|
||||
return string.Format("#{0:x2}{1:x2}{2:x2}", c.R, c.G, c.B);
|
||||
|
@ -43,7 +43,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
}
|
||||
|
||||
comboBoxDictionaries.Items.Clear();
|
||||
string languageName = Utilities.AutoDetectLanguageName(Configuration.Settings.General.SpellCheckLanguage, _subtitle);
|
||||
string languageName = LanguageAutoDetect.AutoDetectLanguageName(Configuration.Settings.General.SpellCheckLanguage, _subtitle);
|
||||
foreach (string name in Utilities.GetDictionaryLanguages())
|
||||
{
|
||||
comboBoxDictionaries.Items.Add(name);
|
||||
@ -109,7 +109,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
}
|
||||
}
|
||||
|
||||
languageName = Utilities.AutoDetectLanguageName(languageName, _subtitle);
|
||||
languageName = LanguageAutoDetect.AutoDetectLanguageName(languageName, _subtitle);
|
||||
if (comboBoxDictionaries.Items.Count > 0)
|
||||
{
|
||||
string name = comboBoxDictionaries.SelectedItem.ToString();
|
||||
|
@ -105,7 +105,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
var sub = new Subtitle();
|
||||
foreach (Paragraph p in _paragraphs)
|
||||
sub.Paragraphs.Add(p);
|
||||
var language = Utilities.AutoDetectGoogleLanguage(sub);
|
||||
var language = LanguageAutoDetect.AutoDetectGoogleLanguage(sub);
|
||||
|
||||
listViewFixes.BeginUpdate();
|
||||
listViewFixes.Items.Clear();
|
||||
|
@ -773,7 +773,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
sub.RemoveEmptyLines();
|
||||
if (checkBoxFixCasing.Checked)
|
||||
{
|
||||
_changeCasing.FixCasing(sub, Utilities.AutoDetectGoogleLanguage(sub));
|
||||
_changeCasing.FixCasing(sub, LanguageAutoDetect.AutoDetectGoogleLanguage(sub));
|
||||
_changeCasingNames.Initialize(sub);
|
||||
_changeCasingNames.FixCasing();
|
||||
}
|
||||
|
@ -110,7 +110,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
private void FindAllNames()
|
||||
{
|
||||
string language = Utilities.AutoDetectLanguageName("en_US", _subtitle);
|
||||
string language = LanguageAutoDetect.AutoDetectLanguageName("en_US", _subtitle);
|
||||
if (string.IsNullOrEmpty(language))
|
||||
language = "en_US";
|
||||
|
||||
|
@ -54,7 +54,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
_fileBuffer = new byte[0];
|
||||
}
|
||||
|
||||
Encoding encoding = Utilities.DetectAnsiEncoding(_fileBuffer);
|
||||
Encoding encoding = LanguageAutoDetect.DetectAnsiEncoding(_fileBuffer);
|
||||
foreach (EncodingInfo ei in Encoding.GetEncodings())
|
||||
{
|
||||
var item = new ListViewItem(new[] { ei.CodePage.ToString(), ei.Name, ei.DisplayName });
|
||||
|
@ -68,7 +68,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
openFileDialog1.Filter = Utilities.GetOpenDialogFilter();
|
||||
subtitleListView1.SelectIndexAndEnsureVisible(0);
|
||||
_language1 = Utilities.AutoDetectGoogleLanguage(_subtitle1);
|
||||
_language1 = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle1);
|
||||
}
|
||||
|
||||
public void Initialize(Subtitle subtitle1, string subtitleFileName1, Subtitle subtitle2, string subtitleFileName2)
|
||||
@ -81,7 +81,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
_subtitle2 = subtitle2;
|
||||
labelSubtitle2.Text = subtitleFileName2;
|
||||
|
||||
_language1 = Utilities.AutoDetectGoogleLanguage(_subtitle1);
|
||||
_language1 = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle1);
|
||||
CompareSubtitles();
|
||||
|
||||
if (string.IsNullOrEmpty(subtitleFileName1))
|
||||
@ -167,7 +167,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
subtitleListView1.SelectIndexAndEnsureVisible(0);
|
||||
subtitleListView2.SelectIndexAndEnsureVisible(0);
|
||||
labelSubtitle1.Text = openFileDialog1.FileName;
|
||||
_language1 = Utilities.AutoDetectGoogleLanguage(_subtitle1);
|
||||
_language1 = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle1);
|
||||
if (_subtitle1.Paragraphs.Count > 0)
|
||||
CompareSubtitles();
|
||||
}
|
||||
@ -908,7 +908,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
subtitleListView1.SelectIndexAndEnsureVisible(0);
|
||||
subtitleListView2.SelectIndexAndEnsureVisible(0);
|
||||
labelSubtitle1.Text = filePath;
|
||||
_language1 = Utilities.AutoDetectGoogleLanguage(_subtitle1);
|
||||
_language1 = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle1);
|
||||
if (_subtitle1.Paragraphs.Count > 0)
|
||||
CompareSubtitles();
|
||||
}
|
||||
|
@ -2854,7 +2854,7 @@ $DROP=[DROPVALUE]" + Environment.NewLine + Environment.NewLine +
|
||||
labelLanguage.Visible = true;
|
||||
comboBoxLanguage.Visible = true;
|
||||
comboBoxLanguage.Items.Clear();
|
||||
string languageCode = Utilities.AutoDetectGoogleLanguageOrNull(subtitle);
|
||||
string languageCode = LanguageAutoDetect.AutoDetectGoogleLanguageOrNull(subtitle);
|
||||
if (languageCode == null)
|
||||
languageCode = Configuration.Settings.Tools.ExportVobSubLanguage;
|
||||
for (int i = 0; i < IfoParser.ArrayOfLanguage.Count; i++)
|
||||
|
@ -222,9 +222,9 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
public void Initialize(Subtitle subtitle, SubtitleFormat format, Encoding encoding)
|
||||
{
|
||||
_autoDetectGoogleLanguage = Utilities.AutoDetectGoogleLanguage(encoding); // Guess language via encoding
|
||||
_autoDetectGoogleLanguage = LanguageAutoDetect.AutoDetectGoogleLanguage(encoding); // Guess language via encoding
|
||||
if (string.IsNullOrEmpty(_autoDetectGoogleLanguage))
|
||||
_autoDetectGoogleLanguage = Utilities.AutoDetectGoogleLanguage(subtitle); // Guess language based on subtitle contents
|
||||
_autoDetectGoogleLanguage = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle); // Guess language based on subtitle contents
|
||||
if (_autoDetectGoogleLanguage.Equals("zh", StringComparison.OrdinalIgnoreCase))
|
||||
_autoDetectGoogleLanguage = "zh-CHS"; // Note that "zh-CHS" (Simplified Chinese) and "zh-CHT" (Traditional Chinese) are neutral cultures
|
||||
CultureInfo ci = CultureInfo.GetCultureInfo(_autoDetectGoogleLanguage);
|
||||
@ -515,7 +515,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
if (_namesEtcList == null)
|
||||
{
|
||||
_namesEtcList = new List<string>();
|
||||
string languageTwoLetterCode = Utilities.AutoDetectGoogleLanguage(Subtitle);
|
||||
string languageTwoLetterCode = LanguageAutoDetect.AutoDetectGoogleLanguage(Subtitle);
|
||||
|
||||
// Will contains both one word names and multi names
|
||||
var namesList = new NamesList(Configuration.DictionariesFolder, languageTwoLetterCode, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl);
|
||||
|
@ -101,9 +101,9 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
_subtitle = subtitle;
|
||||
_translatedSubtitle = new Subtitle(subtitle);
|
||||
|
||||
string defaultFromLanguage = Utilities.AutoDetectGoogleLanguage(encoding); // Guess language via encoding
|
||||
string defaultFromLanguage = LanguageAutoDetect.AutoDetectGoogleLanguage(encoding); // Guess language via encoding
|
||||
if (string.IsNullOrEmpty(defaultFromLanguage))
|
||||
defaultFromLanguage = Utilities.AutoDetectGoogleLanguage(subtitle); // Guess language based on subtitle contents
|
||||
defaultFromLanguage = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle); // Guess language based on subtitle contents
|
||||
|
||||
FillComboWithLanguages(comboBoxFrom);
|
||||
int i = 0;
|
||||
|
@ -46,7 +46,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
{
|
||||
try
|
||||
{
|
||||
Encoding encoding = Utilities.GetEncodingFromFile(fileName);
|
||||
Encoding encoding = LanguageAutoDetect.GetEncodingFromFile(fileName);
|
||||
string s = File.ReadAllText(fileName, encoding).Trim();
|
||||
if (s.Contains('.'))
|
||||
radioButtonSeconds.Checked = true;
|
||||
|
@ -607,7 +607,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
{
|
||||
try
|
||||
{
|
||||
Encoding encoding = Utilities.GetEncodingFromFile(fileName);
|
||||
Encoding encoding = LanguageAutoDetect.GetEncodingFromFile(fileName);
|
||||
textBoxText.Text = File.ReadAllText(fileName, encoding);
|
||||
SetVideoFileName(fileName);
|
||||
}
|
||||
|
@ -66,7 +66,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
try
|
||||
{
|
||||
SubtitleListview1.Items.Clear();
|
||||
Encoding encoding = Utilities.GetEncodingFromFile(fileName);
|
||||
Encoding encoding = LanguageAutoDetect.GetEncodingFromFile(fileName);
|
||||
textBoxText.Text = File.ReadAllText(fileName, encoding);
|
||||
|
||||
// check for RTF file
|
||||
|
@ -1934,7 +1934,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
if (format == null && ext == ".wsb")
|
||||
{
|
||||
var wsb = new Wsb();
|
||||
var list = new List<string>(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)));
|
||||
var list = new List<string>(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName)));
|
||||
if (wsb.IsMine(list, fileName))
|
||||
{
|
||||
wsb.LoadSubtitle(_subtitle, list, fileName);
|
||||
@ -2102,7 +2102,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
try
|
||||
{
|
||||
var bdnXml = new BdnXml();
|
||||
var list = new List<string>(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)));
|
||||
var list = new List<string>(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName)));
|
||||
if (bdnXml.IsMine(list, fileName))
|
||||
{
|
||||
if (ContinueNewOrExit())
|
||||
@ -2123,7 +2123,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
try
|
||||
{
|
||||
var fcpImage = new FinalCutProImage();
|
||||
var list = new List<string>(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)));
|
||||
var list = new List<string>(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName)));
|
||||
if (fcpImage.IsMine(list, fileName))
|
||||
{
|
||||
if (ContinueNewOrExit())
|
||||
@ -2204,7 +2204,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
try
|
||||
{
|
||||
var dost = new Dost();
|
||||
var list = new List<string>(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)));
|
||||
var list = new List<string>(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName)));
|
||||
if (dost.IsMine(list, fileName))
|
||||
{
|
||||
if (ContinueNewOrExit())
|
||||
@ -2223,7 +2223,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
try
|
||||
{
|
||||
var son = new Son();
|
||||
var list = new List<string>(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)));
|
||||
var list = new List<string>(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName)));
|
||||
if (son.IsMine(list, fileName))
|
||||
{
|
||||
if (ContinueNewOrExit())
|
||||
@ -2264,7 +2264,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
try
|
||||
{
|
||||
var satBoxPng = new SatBoxPng();
|
||||
var list = new List<string>(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)));
|
||||
var list = new List<string>(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName)));
|
||||
if (satBoxPng.IsMine(list, fileName))
|
||||
{
|
||||
var subtitle = new Subtitle();
|
||||
@ -2285,7 +2285,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
try
|
||||
{
|
||||
var sst = new SonicScenaristBitmaps();
|
||||
var list = new List<string>(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)));
|
||||
var list = new List<string>(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName)));
|
||||
if (sst.IsMine(list, fileName))
|
||||
{
|
||||
if (ContinueNewOrExit())
|
||||
@ -2304,7 +2304,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
try
|
||||
{
|
||||
var htmlSamiArray = new HtmlSamiArray();
|
||||
var list = new List<string>(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)));
|
||||
var list = new List<string>(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName)));
|
||||
if (htmlSamiArray.IsMine(list, fileName))
|
||||
{
|
||||
htmlSamiArray.LoadSubtitle(_subtitle, list, fileName);
|
||||
@ -2429,7 +2429,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
if (ext == ".xml" || ext == ".dfxp")
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
foreach (var line in File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)))
|
||||
foreach (var line in File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName)))
|
||||
sb.AppendLine(line);
|
||||
var xmlAsString = sb.ToString().Trim();
|
||||
|
||||
@ -2453,7 +2453,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
// Try to use a generic subtitle format parser (guessing subtitle format)
|
||||
try
|
||||
{
|
||||
var enc = Utilities.GetEncodingFromFile(fileName);
|
||||
var enc = LanguageAutoDetect.GetEncodingFromFile(fileName);
|
||||
var s = File.ReadAllText(fileName, enc);
|
||||
|
||||
// check for RTF file
|
||||
@ -5119,7 +5119,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
return;
|
||||
}
|
||||
|
||||
bool isSwedish = Utilities.AutoDetectGoogleLanguage(_subtitle) == "sv";
|
||||
bool isSwedish = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle) == "sv";
|
||||
string promptText = _language.TranslateSwedishToDanish;
|
||||
if (!isSwedish)
|
||||
promptText = _language.TranslateSwedishToDanishWarning;
|
||||
@ -5424,7 +5424,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
int totalLinesChanged = 0;
|
||||
try
|
||||
{
|
||||
wordSpellChecker = new WordSpellChecker(this, Utilities.AutoDetectGoogleLanguage(_subtitle));
|
||||
wordSpellChecker = new WordSpellChecker(this, LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle));
|
||||
wordSpellChecker.NewDocument();
|
||||
Application.DoEvents();
|
||||
}
|
||||
@ -6694,10 +6694,10 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
private void ButtonAutoBreakClick(object sender, EventArgs e)
|
||||
{
|
||||
string language = Utilities.AutoDetectGoogleLanguage(_subtitle);
|
||||
string language = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle);
|
||||
string languageOriginal = string.Empty;
|
||||
if (_subtitleAlternate != null)
|
||||
languageOriginal = Utilities.AutoDetectGoogleLanguage(_subtitleAlternate);
|
||||
languageOriginal = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitleAlternate);
|
||||
|
||||
if (SubtitleListview1.SelectedItems.Count > 1)
|
||||
{
|
||||
@ -7201,7 +7201,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
private void SplitSelectedParagraph(double? splitSeconds, int? textIndex)
|
||||
{
|
||||
string language = Utilities.AutoDetectGoogleLanguage(_subtitle);
|
||||
string language = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle);
|
||||
|
||||
int? alternateTextIndex = null;
|
||||
if (textBoxListViewTextAlternate.Focused)
|
||||
@ -7454,7 +7454,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
var originalCurrent = Utilities.GetOriginalParagraph(firstSelectedIndex, currentParagraph, _subtitleAlternate.Paragraphs);
|
||||
if (originalCurrent != null)
|
||||
{
|
||||
string languageOriginal = Utilities.AutoDetectGoogleLanguage(_subtitleAlternate);
|
||||
string languageOriginal = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitleAlternate);
|
||||
|
||||
originalCurrent.EndTime.TotalMilliseconds = currentParagraph.EndTime.TotalMilliseconds;
|
||||
var originalNew = new Paragraph(newParagraph);
|
||||
@ -7639,7 +7639,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
private void MergeBeforeToolStripMenuItemClick(object sender, EventArgs e)
|
||||
{
|
||||
string language = Utilities.AutoDetectGoogleLanguage(_subtitle);
|
||||
string language = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle);
|
||||
if (_subtitle.Paragraphs.Count > 0 && SubtitleListview1.SelectedItems.Count > 0)
|
||||
{
|
||||
int firstSelectedIndex = SubtitleListview1.SelectedItems[0].Index;
|
||||
@ -7746,7 +7746,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
string text = sb.ToString();
|
||||
text = HtmlUtil.FixInvalidItalicTags(text);
|
||||
text = ChangeAllLinesItalictoSingleItalic(text);
|
||||
text = Utilities.AutoBreakLine(text, Utilities.AutoDetectGoogleLanguage(_subtitle));
|
||||
text = Utilities.AutoBreakLine(text, LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle));
|
||||
currentParagraph.Text = text;
|
||||
|
||||
//display time
|
||||
@ -7894,7 +7894,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
if (old1.Contains(Environment.NewLine) || old2.Contains(Environment.NewLine) ||
|
||||
old1.Length > Configuration.Settings.General.SubtitleLineMaximumLength || old2.Length > Configuration.Settings.General.SubtitleLineMaximumLength)
|
||||
original.Text = Utilities.AutoBreakLine(original.Text, Utilities.AutoDetectGoogleLanguage(_subtitleAlternate));
|
||||
original.Text = Utilities.AutoBreakLine(original.Text, LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitleAlternate));
|
||||
|
||||
if (string.IsNullOrWhiteSpace(old1))
|
||||
original.Text = original.Text.TrimStart();
|
||||
@ -7939,7 +7939,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
if (old1.Contains(Environment.NewLine) || old2.Contains(Environment.NewLine) ||
|
||||
old1.Length > Configuration.Settings.General.SubtitleLineMaximumLength || old2.Length > Configuration.Settings.General.SubtitleLineMaximumLength)
|
||||
currentParagraph.Text = Utilities.AutoBreakLine(currentParagraph.Text, Utilities.AutoDetectGoogleLanguage(_subtitle));
|
||||
currentParagraph.Text = Utilities.AutoBreakLine(currentParagraph.Text, LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle));
|
||||
|
||||
if (string.IsNullOrWhiteSpace(old1))
|
||||
currentParagraph.Text = currentParagraph.Text.TrimStart();
|
||||
@ -9803,7 +9803,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
bool saveChangeCaseChanges = true;
|
||||
var casingNamesLinesChanged = 0;
|
||||
changeCasing.FixCasing(selectedLines, Utilities.AutoDetectLanguageName(Configuration.Settings.General.SpellCheckLanguage, _subtitle));
|
||||
changeCasing.FixCasing(selectedLines, LanguageAutoDetect.AutoDetectLanguageName(Configuration.Settings.General.SpellCheckLanguage, _subtitle));
|
||||
if (changeCasing.ChangeNamesToo)
|
||||
{
|
||||
using (var changeCasingNames = new ChangeCasingNames())
|
||||
@ -11572,10 +11572,10 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
if (_subtitle.Paragraphs.Count > 0 && SubtitleListview1.SelectedItems.Count > 0)
|
||||
{
|
||||
MakeHistoryForUndo(_language.BeforeAutoBalanceSelectedLines);
|
||||
string language = Utilities.AutoDetectGoogleLanguage(_subtitle);
|
||||
string language = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle);
|
||||
string languageOriginal = string.Empty;
|
||||
if (_subtitleAlternate != null)
|
||||
Utilities.AutoDetectGoogleLanguage(_subtitleAlternate);
|
||||
LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitleAlternate);
|
||||
foreach (ListViewItem item in SubtitleListview1.SelectedItems)
|
||||
{
|
||||
var p = _subtitle.GetParagraphOrDefault(item.Index);
|
||||
@ -11779,7 +11779,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
if (autoBreakUnbreakLines.ShowDialog() == DialogResult.OK && autoBreakUnbreakLines.FixedText.Count > 0)
|
||||
{
|
||||
MakeHistoryForUndo(_language.BeforeAutoBalanceSelectedLines);
|
||||
var language = Utilities.AutoDetectGoogleLanguage(_subtitle);
|
||||
var language = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle);
|
||||
SubtitleListview1.BeginUpdate();
|
||||
foreach (int index in SubtitleListview1.SelectedIndices)
|
||||
{
|
||||
@ -13744,7 +13744,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
private void buttonGoogleTranslateIt_Click(object sender, EventArgs e)
|
||||
{
|
||||
string languageId = Utilities.AutoDetectGoogleLanguage(_subtitle);
|
||||
string languageId = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle);
|
||||
System.Diagnostics.Process.Start("https://translate.google.com/#auto|" + languageId + "|" + Utilities.UrlEncode(textBoxSearchWord.Text));
|
||||
}
|
||||
|
||||
@ -17087,7 +17087,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
var p = _subtitle.GetParagraphOrDefault(firstSelectedIndex);
|
||||
if (p != null)
|
||||
{
|
||||
string defaultFromLanguage = Utilities.AutoDetectGoogleLanguage(_subtitle);
|
||||
string defaultFromLanguage = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle);
|
||||
string defaultToLanguage = defaultFromLanguage;
|
||||
if (_subtitleAlternate != null)
|
||||
{
|
||||
@ -17095,7 +17095,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
if (o != null)
|
||||
{
|
||||
p = o;
|
||||
defaultFromLanguage = Utilities.AutoDetectGoogleLanguage(_subtitleAlternate);
|
||||
defaultFromLanguage = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitleAlternate);
|
||||
}
|
||||
}
|
||||
Cursor = Cursors.WaitCursor;
|
||||
|
@ -110,7 +110,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
if (clearFixes)
|
||||
listViewFixes.Items.Clear();
|
||||
numberOfMerges = 0;
|
||||
string language = Utilities.AutoDetectGoogleLanguage(subtitle);
|
||||
string language = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle);
|
||||
var mergedSubtitle = new Subtitle();
|
||||
bool lastMerged = false;
|
||||
Paragraph p = null;
|
||||
|
@ -52,7 +52,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
NumberOfMerges = 0;
|
||||
_subtitle = subtitle;
|
||||
MergeTextWithSameTimeCodes_ResizeEnd(null, null);
|
||||
_language = Utilities.AutoDetectGoogleLanguage(subtitle);
|
||||
_language = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle);
|
||||
}
|
||||
|
||||
private void previewTimer_Tick(object sender, EventArgs e)
|
||||
|
@ -967,7 +967,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
}
|
||||
}
|
||||
if (autoDetect || string.IsNullOrEmpty(_languageName))
|
||||
_languageName = Utilities.AutoDetectLanguageName(_languageName, subtitle);
|
||||
_languageName = LanguageAutoDetect.AutoDetectLanguageName(_languageName, subtitle);
|
||||
string dictionary = Utilities.DictionaryFolder + _languageName;
|
||||
|
||||
LoadDictionaries(dictionaryFolder, dictionary);
|
||||
@ -1128,7 +1128,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
{
|
||||
gd.ShowDialog(this);
|
||||
}
|
||||
FillSpellCheckDictionaries(Utilities.AutoDetectLanguageName(null, _subtitle));
|
||||
FillSpellCheckDictionaries(LanguageAutoDetect.AutoDetectLanguageName(null, _subtitle));
|
||||
if (comboBoxDictionaries.Items.Count > 0 && comboBoxDictionaries.SelectedIndex == -1)
|
||||
comboBoxDictionaries.SelectedIndex = 0;
|
||||
ComboBoxDictionariesSelectedIndexChanged(null, null);
|
||||
|
@ -145,7 +145,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
if (clearFixes)
|
||||
listViewFixes.Items.Clear();
|
||||
numberOfSplits = 0;
|
||||
string language = Utilities.AutoDetectGoogleLanguage(subtitle);
|
||||
string language = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle);
|
||||
var splittedSubtitle = new Subtitle();
|
||||
string[] expectedPunctuations = { ". -", "! -", "? -" };
|
||||
for (int i = 0; i < subtitle.Paragraphs.Count; i++)
|
||||
|
39
src/Test/Core/LanguageAutoDetectTest.cs
Normal file
39
src/Test/Core/LanguageAutoDetectTest.cs
Normal file
@ -0,0 +1,39 @@
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||
using Nikse.SubtitleEdit.Core;
|
||||
|
||||
namespace Test.Core
|
||||
{
|
||||
|
||||
[DeploymentItem("Files")]
|
||||
[TestClass]
|
||||
public class LanguageAutoDetectTest
|
||||
{
|
||||
|
||||
private static string GetLanguageCode(string fileName)
|
||||
{
|
||||
fileName = Path.Combine(Directory.GetCurrentDirectory(), fileName);
|
||||
var sub = new Subtitle();
|
||||
Encoding encoding;
|
||||
sub.LoadSubtitle(fileName, out encoding, null);
|
||||
return LanguageAutoDetect.AutoDetectGoogleLanguage(sub);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void AutoDetectRussian()
|
||||
{
|
||||
var languageCode = GetLanguageCode("auto_detect_Russian.srt");
|
||||
Assert.AreEqual(languageCode, "ru");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void AutoDetectDanish()
|
||||
{
|
||||
var languageCode = GetLanguageCode("auto_detect_Danish.srt");
|
||||
Assert.AreEqual(languageCode, "da");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
1403
src/Test/Files/auto_detect_Danish.srt
Normal file
1403
src/Test/Files/auto_detect_Danish.srt
Normal file
File diff suppressed because it is too large
Load Diff
860
src/Test/Files/auto_detect_Russian.srt
Normal file
860
src/Test/Files/auto_detect_Russian.srt
Normal file
@ -0,0 +1,860 @@
|
||||
1
|
||||
00:00:51,397 --> 00:00:56,603
|
||||
Ричмонде, штат Вирджиния 1865
|
||||
BORGERKRIGENS оформление
|
||||
|
||||
2
|
||||
00:01:05,211 --> 00:01:09,515
|
||||
Последний броненосец
|
||||
блокады DER BRYDER союз
|
||||
|
||||
3
|
||||
00:01:22,929 --> 00:01:25,303
|
||||
Простите, г-н Kaptajn.
|
||||
|
||||
4
|
||||
00:01:25,315 --> 00:01:27,700
|
||||
Просто дальше. Вы принимаете
|
||||
на себя в будущем!
|
||||
|
||||
5
|
||||
00:01:32,105 --> 00:01:34,908
|
||||
Готовы к вылету, г-н капитан.
|
||||
|
||||
6
|
||||
00:01:43,817 --> 00:01:48,421
|
||||
Готовьтесь войти kanalen.
|
||||
Hastighed пяти узлов.
|
||||
|
||||
7
|
||||
00:01:54,127 --> 00:01:59,199
|
||||
- Направление nord. 115 градусов,
|
||||
115 градусов на север.
|
||||
|
||||
8
|
||||
00:02:01,401 --> 00:02:03,403
|
||||
Огонь!
|
||||
|
||||
9
|
||||
00:02:11,211 --> 00:02:13,113
|
||||
Огонь!
|
||||
|
||||
10
|
||||
00:02:18,318 --> 00:02:22,322
|
||||
- Полный вперед - Да,
|
||||
господин капитан!
|
||||
|
||||
11
|
||||
00:02:53,019 --> 00:02:57,223
|
||||
Стоп motorerne. Jeg хотим мира!
|
||||
|
||||
12
|
||||
00:02:57,823 --> 00:03:02,628
|
||||
Подожгли порты Лук пистолет!
|
||||
|
||||
13
|
||||
00:03:49,609 --> 00:03:53,913
|
||||
Датский перевод LHB и
|
||||
Fields Synkroniseret Inside
|
||||
|
||||
14
|
||||
00:04:15,801 --> 00:04:19,905
|
||||
FLÅDEHISTORIKERS фанатичный SØGEN
|
||||
EFTER GHOST военный корабль
|
||||
|
||||
15
|
||||
00:06:17,421 --> 00:06:19,924
|
||||
DEN не Батин.
|
||||
|
||||
16
|
||||
00:07:44,008 --> 00:07:48,412
|
||||
Г-жа Nwokolo. Я Ева Рохас, WHO.
|
||||
Dette является д-р. Хоппер.
|
||||
|
||||
17
|
||||
00:07:48,612 --> 00:07:50,614
|
||||
Пожалуйста, следуйте с.
|
||||
|
||||
18
|
||||
00:07:55,219 --> 00:07:58,623
|
||||
Извините mørket. Hans
|
||||
глаза не терпят света.
|
||||
|
||||
19
|
||||
00:07:58,723 --> 00:08:02,627
|
||||
- Как его зовут - Азикиве?
|
||||
Большинство людей называют его Kiwe.
|
||||
|
||||
20
|
||||
00:08:02,827 --> 00:08:07,398
|
||||
Здравствуйте, Kiwe, меня зовут Eva.
|
||||
Vi должны смотреть на тебя, ладно?
|
||||
|
||||
21
|
||||
00:08:07,598 --> 00:08:10,401
|
||||
- Как долго он болел
|
||||
- два дня?
|
||||
|
||||
22
|
||||
00:08:10,601 --> 00:08:15,205
|
||||
- Неужели он ездил недавно - Он был
|
||||
с отцом в Мали на прошлой неделе.
|
||||
|
||||
23
|
||||
00:08:15,406 --> 00:08:19,410
|
||||
- Где его отец теперь - Он в маяк.
|
||||
Там он работает.
|
||||
|
||||
24
|
||||
00:08:19,611 --> 00:08:21,801
|
||||
Артериальное давление 80
|
||||
более 50 Kredsløbssvigt.
|
||||
|
||||
25
|
||||
00:08:21,813 --> 00:08:24,015
|
||||
Мы даем ему кровь.
|
||||
|
||||
26
|
||||
00:08:30,721 --> 00:08:33,824
|
||||
- Сколько транквилизаторов, он должен иметь
|
||||
- Дайте ему 2 мл?
|
||||
|
||||
27
|
||||
00:08:38,897 --> 00:08:41,799
|
||||
Это хорошо, Kiwe. Alt хорошо.
|
||||
|
||||
28
|
||||
00:08:49,607 --> 00:08:53,310
|
||||
- Ты в порядке -
|
||||
Мали, как и другие?
|
||||
|
||||
29
|
||||
00:08:55,613 --> 00:08:59,417
|
||||
- Это epidemi.
|
||||
- Есть шесть случаев. Это не достаточно.
|
||||
|
||||
30
|
||||
00:08:59,517 --> 00:09:02,120
|
||||
Сколько длится Tres?
|
||||
|
||||
31
|
||||
00:09:02,220 --> 00:09:04,522
|
||||
Шесть тысяч?
|
||||
|
||||
32
|
||||
00:09:05,323 --> 00:09:09,226
|
||||
Когда начинать det
|
||||
at значит ничего?
|
||||
|
||||
33
|
||||
00:09:15,900 --> 00:09:18,802
|
||||
Нам нужно найти источник, Фрэнк.
|
||||
|
||||
34
|
||||
00:09:20,204 --> 00:09:23,907
|
||||
Вы хотите Мали, мужчин это
|
||||
происходит у вас нет.
|
||||
|
||||
35
|
||||
00:09:24,007 --> 00:09:27,811
|
||||
ВОЗ не потеряет flere
|
||||
medarbejdere в гражданскую войну.
|
||||
|
||||
36
|
||||
00:09:28,411 --> 00:09:30,413
|
||||
Сделайте свой доклад закончил.
|
||||
|
||||
37
|
||||
00:09:30,513 --> 00:09:32,904
|
||||
Прекрасно. Так что может быть
|
||||
использовано для ligsynet.
|
||||
|
||||
38
|
||||
00:09:32,916 --> 00:09:35,319
|
||||
Ева?
|
||||
|
||||
39
|
||||
00:09:38,021 --> 00:09:42,826
|
||||
Я делаю то, что kan. Jeg передать
|
||||
его на рассмотрение Совета.
|
||||
|
||||
40
|
||||
00:09:43,026 --> 00:09:46,797
|
||||
- Может быть, они слушают это gang.
|
||||
- Tak.
|
||||
|
||||
41
|
||||
00:09:47,297 --> 00:09:51,702
|
||||
- Нам нужна кровь из faderen.
|
||||
, я могу его найти.
|
||||
|
||||
42
|
||||
00:10:14,124 --> 00:10:16,126
|
||||
Алло?
|
||||
|
||||
43
|
||||
00:10:18,496 --> 00:10:20,698
|
||||
Г-н Nwokolo?
|
||||
|
||||
44
|
||||
00:10:30,608 --> 00:10:32,810
|
||||
Г-н Nwokolo?
|
||||
|
||||
45
|
||||
00:11:17,921 --> 00:11:20,823
|
||||
Вам нечего здесь делать.
|
||||
|
||||
46
|
||||
00:11:27,798 --> 00:11:30,500
|
||||
Быстрый Пометьте ее кошелек.
|
||||
|
||||
47
|
||||
00:11:55,925 --> 00:11:58,495
|
||||
Ты в порядке?
|
||||
|
||||
48
|
||||
00:12:31,394 --> 00:12:34,697
|
||||
Возьмите его Hvad ты делаешь?
|
||||
|
||||
49
|
||||
00:12:40,302 --> 00:12:43,205
|
||||
Остановить его с дерьмом!
|
||||
|
||||
50
|
||||
00:12:46,109 --> 00:12:48,811
|
||||
Stop держите спокойно.
|
||||
|
||||
51
|
||||
00:12:54,416 --> 00:12:58,420
|
||||
Постой тегов прямо сейчас!!
|
||||
|
||||
52
|
||||
00:13:01,923 --> 00:13:04,894
|
||||
Дайте мне skiftenøglen.
|
||||
Jeg нужно сейчас.
|
||||
|
||||
53
|
||||
00:13:05,094 --> 00:13:07,096
|
||||
Есть ли?
|
||||
|
||||
54
|
||||
00:13:09,198 --> 00:13:13,803
|
||||
Поэтому я должен использовать olien.
|
||||
Mange спасибо!
|
||||
|
||||
55
|
||||
00:13:16,906 --> 00:13:20,709
|
||||
Хорошо. Так же как и klaret.
|
||||
Vi закончены.
|
||||
|
||||
56
|
||||
00:13:21,310 --> 00:13:26,716
|
||||
Извините. Позвольте мне воспользоваться
|
||||
det. Jeg имени Аль Giordino.
|
||||
|
||||
57
|
||||
00:13:27,016 --> 00:13:30,319
|
||||
- Ева Rojas.
|
||||
- Хорошая работа.
|
||||
|
||||
58
|
||||
00:13:30,419 --> 00:13:32,822
|
||||
Добро пожаловать на борт.
|
||||
|
||||
59
|
||||
00:13:33,622 --> 00:13:36,725
|
||||
Включите den. Få его.
|
||||
|
||||
60
|
||||
00:13:41,497 --> 00:13:45,502
|
||||
Привет. Чувствовать себя лучше?
|
||||
СГЭ Вызывается Руди.
|
||||
|
||||
61
|
||||
00:13:45,602 --> 00:13:49,605
|
||||
- Простите, а где мы - Мы находимся
|
||||
на Martha Ann. Это NUMA-офф судна.
|
||||
|
||||
62
|
||||
00:13:49,806 --> 00:13:54,310
|
||||
Мы не знаем, кто вы, и мы valgte
|
||||
selv пропатчить себя в руки
|
||||
|
||||
63
|
||||
00:13:54,510 --> 00:13:59,815
|
||||
я ждал два месяца her. Ødelæg
|
||||
речь идет не о дать ему утонуть.
|
||||
|
||||
64
|
||||
00:14:00,015 --> 00:14:01,818
|
||||
Да, адмирал.
|
||||
|
||||
65
|
||||
00:14:02,318 --> 00:14:07,924
|
||||
Пятый .. Четвёртое .. Третий ..
|
||||
Второе .. 1!
|
||||
|
||||
66
|
||||
00:14:20,302 --> 00:14:22,503
|
||||
Дамы и господа, -
|
||||
|
||||
67
|
||||
00:14:22,504 --> 00:14:27,709
|
||||
- позвольте мне, после 772 лет пребывания
|
||||
på havbunden представить вам -
|
||||
|
||||
68
|
||||
00:14:28,010 --> 00:14:32,614
|
||||
- король Батин!
|
||||
|
||||
69
|
||||
00:14:34,115 --> 00:14:36,818
|
||||
Молодцы, все.
|
||||
|
||||
70
|
||||
00:14:36,918 --> 00:14:41,523
|
||||
Кроме вас, Ал. Какого черта ты делаешь?
|
||||
Det 10 тонн игры, а не дверь гаража!
|
||||
|
||||
71
|
||||
00:14:41,723 --> 00:14:46,895
|
||||
Вы думаете о ваших Томпсон 1291.
|
||||
Men это 1293rd
|
||||
|
||||
72
|
||||
00:14:47,196 --> 00:14:49,086
|
||||
Но вы не можете
|
||||
использовать любой из dem.
|
||||
|
||||
73
|
||||
00:14:49,098 --> 00:14:50,999
|
||||
Boys?
|
||||
|
||||
74
|
||||
00:14:51,099 --> 00:14:54,503
|
||||
Короля должна возвышаться над
|
||||
folket på музей около пяти часов.
|
||||
|
||||
75
|
||||
00:14:54,803 --> 00:14:58,808
|
||||
- Это будет fremme.
|
||||
, я надеюсь, тоже.
|
||||
|
||||
76
|
||||
00:15:00,309 --> 00:15:04,713
|
||||
- Вы на ноги
|
||||
- Спасибо вам?
|
||||
|
||||
77
|
||||
00:15:04,913 --> 00:15:08,116
|
||||
К счастью, вы только потеряли taske.
|
||||
Det это не место, чтобы пойти.
|
||||
|
||||
78
|
||||
00:15:08,216 --> 00:15:12,320
|
||||
- Черт, я потерял ход taske.
|
||||
- Это было надеяться не стоит умирать?
|
||||
|
||||
79
|
||||
00:15:12,521 --> 00:15:16,424
|
||||
- Это жесткий spørgsmål.
|
||||
- Нет ничего более ценного, чем ваша жизнь.
|
||||
|
||||
80
|
||||
00:15:16,624 --> 00:15:20,196
|
||||
Получить, что осел здесь и hjælp
|
||||
med, чтобы очистить его отсюда.
|
||||
|
||||
81
|
||||
00:15:20,296 --> 00:15:24,099
|
||||
- Простите, "жена" вызов -
|
||||
Получить его снимают с крючка!
|
||||
|
||||
82
|
||||
00:15:24,300 --> 00:15:26,402
|
||||
Получить промывают, мы
|
||||
собираемся на вечеринку.
|
||||
|
||||
83
|
||||
00:15:26,502 --> 00:15:28,704
|
||||
--Адмирал отставке.
|
||||
|
||||
84
|
||||
00:15:28,804 --> 00:15:32,808
|
||||
- Джим Sandecker.
|
||||
- Ева Рохас, я работаю в ВОЗ.
|
||||
|
||||
85
|
||||
00:15:34,009 --> 00:15:37,013
|
||||
Ты похож на тех, кто
|
||||
нуждается кофе.
|
||||
|
||||
86
|
||||
00:15:38,314 --> 00:15:42,818
|
||||
Мне нужна ваша hjælp. Er его
|
||||
слева направо или наоборот
|
||||
|
||||
87
|
||||
00:15:43,018 --> 00:15:46,822
|
||||
Это сводит меня с vanvid. Du должна
|
||||
научить меня, чтобы связать его.
|
||||
|
||||
88
|
||||
00:15:47,022 --> 00:15:50,992
|
||||
- Это то, что делать с "вокруг дерева."
|
||||
- I'll быть там через час.
|
||||
|
||||
89
|
||||
00:15:51,292 --> 00:15:55,396
|
||||
- Что? Дирк?
|
||||
- Это было Oshodi.
|
||||
|
||||
90
|
||||
00:15:55,696 --> 00:15:58,700
|
||||
Он считает, что он что-то нашел.
|
||||
|
||||
91
|
||||
00:15:58,900 --> 00:16:03,104
|
||||
- Есть также. Это здорово!
|
||||
- Спасибо.
|
||||
|
||||
92
|
||||
00:16:03,304 --> 00:16:05,807
|
||||
Нет, нет Det велик для меня!
|
||||
|
||||
93
|
||||
00:16:05,907 --> 00:16:10,211
|
||||
Я рад сказать правду Экер, у вас
|
||||
не приходят на сегодня музей -
|
||||
|
||||
94
|
||||
00:16:10,411 --> 00:16:13,914
|
||||
- потому что одна из nigerianske
|
||||
underverden нашли доказательства -
|
||||
|
||||
95
|
||||
00:16:14,014 --> 00:16:18,420
|
||||
- о том, что корабль затонул от
|
||||
borgerkrigen er во время шторма в Африка.
|
||||
|
||||
96
|
||||
00:16:18,620 --> 00:16:22,724
|
||||
Это то, что вы говорите,
|
||||
право Sandecker Freak Out!
|
||||
|
||||
97
|
||||
00:16:22,924 --> 00:16:25,748
|
||||
Я там. Он получает
|
||||
все красные дюйма ..
|
||||
|
||||
98
|
||||
00:16:25,760 --> 00:16:28,596
|
||||
лиса в погоне за кроликом
|
||||
вокруг дерева, -
|
||||
|
||||
99
|
||||
00:16:28,796 --> 00:16:33,601
|
||||
- в яму. Как завязать пн det.
|
||||
Tag легко. Я буду там.
|
||||
|
||||
100
|
||||
00:16:34,001 --> 00:16:37,606
|
||||
Спасибо.
|
||||
|
||||
101
|
||||
00:16:38,707 --> 00:16:42,510
|
||||
Во-первых, я благодарю Лагос museum
|
||||
for этот удивительный прием.
|
||||
|
||||
102
|
||||
00:16:42,710 --> 00:16:48,816
|
||||
Я также хочу поблагодарить наших
|
||||
hovedsponsor på этого проекта, Ив Massarde.
|
||||
|
||||
103
|
||||
00:16:54,022 --> 00:16:59,394
|
||||
Мы NUMA. Это Nationale
|
||||
Undervands Морское Агентство -
|
||||
|
||||
104
|
||||
00:16:59,694 --> 00:17:03,999
|
||||
- и это, дамы и господа,
|
||||
hvad что мы делаем.
|
||||
|
||||
105
|
||||
00:17:05,600 --> 00:17:08,002
|
||||
Kong Батин.
|
||||
|
||||
106
|
||||
00:17:10,505 --> 00:17:13,408
|
||||
- Он не в буфете
|
||||
- Черт.!
|
||||
|
||||
107
|
||||
00:17:13,508 --> 00:17:15,410
|
||||
- Хочешь Кебаб
|
||||
- Нет, спасибо.
|
||||
|
||||
108
|
||||
00:17:15,510 --> 00:17:19,614
|
||||
Правительства и private
|
||||
organisationer, как наша -
|
||||
|
||||
109
|
||||
00:17:19,814 --> 00:17:25,820
|
||||
- могут совместно содействовать at
|
||||
historie, которые были потеряны по пути -
|
||||
|
||||
110
|
||||
00:17:26,120 --> 00:17:31,592
|
||||
- снова вернулся в свою благодарность folk.
|
||||
Mange. Хорошего вечера.
|
||||
|
||||
111
|
||||
00:17:37,798 --> 00:17:40,689
|
||||
- Если у вас есть
|
||||
компакт-дисков на вашем
|
||||
|
||||
112
|
||||
00:17:40,701 --> 00:17:43,604
|
||||
корабле - Да, я купил у
|
||||
вас в прошлом месяце?
|
||||
|
||||
113
|
||||
00:17:44,405 --> 00:17:46,795
|
||||
У меня есть чудесное
|
||||
произведение искусства
|
||||
|
||||
114
|
||||
00:17:46,807 --> 00:17:49,209
|
||||
her. Direkte из Иракского
|
||||
национального музея.
|
||||
|
||||
115
|
||||
00:17:49,409 --> 00:17:52,814
|
||||
Не показывать мне эти ting. Så
|
||||
хорошие друзья, это не так.
|
||||
|
||||
116
|
||||
00:17:54,315 --> 00:17:57,218
|
||||
Вот курса.
|
||||
|
||||
117
|
||||
00:17:57,318 --> 00:18:02,022
|
||||
Особый stykke. Det
|
||||
я вам говорил.
|
||||
|
||||
118
|
||||
00:18:03,323 --> 00:18:07,194
|
||||
Это давит мое hjerte bare,
|
||||
чтобы показать вам.
|
||||
|
||||
119
|
||||
00:18:14,202 --> 00:18:18,606
|
||||
- Где ты это
|
||||
- Не касаясь?
|
||||
|
||||
120
|
||||
00:18:19,006 --> 00:18:22,509
|
||||
Таким образом, хорошие
|
||||
друзья, мы бы и нет?
|
||||
|
||||
121
|
||||
00:18:37,992 --> 00:18:40,494
|
||||
Это большая партия.
|
||||
|
||||
122
|
||||
00:18:42,096 --> 00:18:45,299
|
||||
- Спасибо за приглашение, admiral.
|
||||
- удовольствие на моей стороне.
|
||||
|
||||
123
|
||||
00:18:45,399 --> 00:18:49,404
|
||||
Ив, это женщина jeg fortalte вы, доктор.
|
||||
Ева Рохас.
|
||||
|
||||
124
|
||||
00:18:49,604 --> 00:18:52,707
|
||||
Мне очень приятно встретиться Dem.
|
||||
Mit зовут Ив Massarde.
|
||||
|
||||
125
|
||||
00:18:52,807 --> 00:18:55,510
|
||||
Это доктор. Фрэнк Хоппер.
|
||||
|
||||
126
|
||||
00:18:55,610 --> 00:18:58,512
|
||||
Ив делает masse
|
||||
forretninger в Африке.
|
||||
|
||||
127
|
||||
00:18:58,612 --> 00:19:01,115
|
||||
Даже некоторые в Мали.
|
||||
|
||||
128
|
||||
00:19:01,715 --> 00:19:03,717
|
||||
Вы извините меня?
|
||||
|
||||
129
|
||||
00:19:03,817 --> 00:19:08,623
|
||||
Я понимаю, что я считаю, - дер-это
|
||||
эпидемия на пути из Мали?
|
||||
|
||||
130
|
||||
00:19:08,823 --> 00:19:11,592
|
||||
- Мы не будем называть его epidemi.
|
||||
- Что вы это называете?
|
||||
|
||||
131
|
||||
00:19:11,692 --> 00:19:14,595
|
||||
Epidemi. Så вы делаете
|
||||
бизнес в Мали?
|
||||
|
||||
132
|
||||
00:19:14,695 --> 00:19:19,299
|
||||
Знаете кого-то, кто может помочь нам med at
|
||||
нажмите ВОЗ направить группу там, внизу?
|
||||
|
||||
133
|
||||
00:19:19,499 --> 00:19:23,203
|
||||
- Мали находится под контролем
|
||||
полевых командиров... генерал-Казим?
|
||||
|
||||
134
|
||||
00:19:23,403 --> 00:19:25,304
|
||||
Ты его знаешь?
|
||||
|
||||
135
|
||||
00:19:25,305 --> 00:19:27,333
|
||||
Он был лейтенантом в
|
||||
армии, но дал себя selv en
|
||||
|
||||
136
|
||||
00:19:27,345 --> 00:19:29,385
|
||||
продвижение по службе, когда
|
||||
он выстрелил в президента.
|
||||
|
||||
137
|
||||
00:19:29,510 --> 00:19:33,313
|
||||
- Он дает слово «военачальник» betydning.
|
||||
И он управляет страной?
|
||||
|
||||
138
|
||||
00:19:33,513 --> 00:19:37,117
|
||||
Половина. Другие kontrollerer ingen
|
||||
половины. Я не знаю, что хуже.
|
||||
|
||||
139
|
||||
00:19:37,217 --> 00:19:41,221
|
||||
Я предупреждаю вас. Это meget farligt
|
||||
для иностранцев прямо сейчас.
|
||||
|
||||
140
|
||||
00:19:41,421 --> 00:19:44,046
|
||||
Но, вероятно, более
|
||||
опасными для indfødte.
|
||||
|
||||
141
|
||||
00:19:44,058 --> 00:19:46,694
|
||||
Но ваши мертвые будут плохо
|
||||
выглядеть в газетах.
|
||||
|
||||
142
|
||||
00:19:46,894 --> 00:19:49,697
|
||||
Это делает эпидемию тоже.
|
||||
|
||||
143
|
||||
00:19:51,498 --> 00:19:57,604
|
||||
Хорошо. Я пытаюсь позвонить вокруг
|
||||
немного, мужчин я просто бизнесмен -
|
||||
|
||||
144
|
||||
00:19:57,904 --> 00:20:01,708
|
||||
- так было tålmodighed.
|
||||
- Да, это ее сильная сторона.
|
||||
|
||||
145
|
||||
00:20:01,808 --> 00:20:05,011
|
||||
Он не поможет os. Det была
|
||||
пустая трата времени.
|
||||
|
||||
146
|
||||
00:20:05,211 --> 00:20:11,418
|
||||
Вы можете не только баржи в en borgerkrig.
|
||||
Вы знаете, это слишком опасно.
|
||||
|
||||
147
|
||||
00:20:11,718 --> 00:20:13,620
|
||||
Я думаю, мы должны вернуться...
|
||||
|
||||
148
|
||||
00:20:14,221 --> 00:20:17,391
|
||||
Адмирал, вы когда-нибудь видели
|
||||
en gulddollar от Конфедерации?
|
||||
|
||||
149
|
||||
00:20:17,591 --> 00:20:21,294
|
||||
- Не начинайте снова - Нет, потому
|
||||
что они никогда не делали один!
|
||||
|
||||
150
|
||||
00:20:21,394 --> 00:20:23,797
|
||||
Импринтинг машина
|
||||
ødelagt ved войны.
|
||||
|
||||
151
|
||||
00:20:23,997 --> 00:20:28,602
|
||||
- Я молюсь dig., но не раньше,
|
||||
чем Джефферсон получил пять лет.
|
||||
|
||||
152
|
||||
00:20:28,802 --> 00:20:31,905
|
||||
Четыре из них он дал
|
||||
til sine генералов.
|
||||
|
||||
153
|
||||
00:20:32,106 --> 00:20:34,496
|
||||
Ли Джексон...
|
||||
|
||||
154
|
||||
00:20:34,508 --> 00:20:36,910
|
||||
Каждый раз, когда мы находимся
|
||||
в Африке, есть корабль.
|
||||
|
||||
155
|
||||
00:20:37,110 --> 00:20:40,714
|
||||
А старые havnejournaler. Vi
|
||||
едет в Австралию завтра.
|
||||
|
||||
156
|
||||
00:20:40,814 --> 00:20:45,119
|
||||
Четыре из них fundet.
|
||||
Men пятый не является.
|
||||
|
||||
157
|
||||
00:20:45,319 --> 00:20:50,790
|
||||
Это был дан друг familien. En умелым
|
||||
капитаном имени Мейсон гробниц.
|
||||
|
||||
158
|
||||
00:20:50,990 --> 00:20:55,094
|
||||
Капитан броненосец, CSS Техас.
|
||||
|
||||
159
|
||||
00:20:56,796 --> 00:21:00,800
|
||||
- Кто это у тебя - Oshodi,
|
||||
и у него от Endigue?
|
||||
|
||||
160
|
||||
00:21:00,900 --> 00:21:04,704
|
||||
Важно то, что в Endigue
|
||||
fandt Labbezanga в Мали.
|
||||
|
||||
161
|
||||
00:21:04,904 --> 00:21:09,909
|
||||
- Мой отец собирает mønter.
|
||||
- монета отплыл в Нигере с Техас.
|
||||
|
||||
162
|
||||
00:21:10,109 --> 00:21:14,914
|
||||
- Невозможно. Она не может с strøm.
|
||||
- отпусти меня к Labbezanga и нюхать мало.
|
||||
|
||||
163
|
||||
00:21:15,114 --> 00:21:18,617
|
||||
- Вы не получите моей båd.
|
||||
- три дня. Всего за три дня!
|
||||
|
||||
164
|
||||
00:21:19,418 --> 00:21:21,620
|
||||
Представьте себе, что.
|
||||
|
||||
165
|
||||
00:21:24,591 --> 00:21:29,796
|
||||
Хорошо, если это не удается, snakker Я
|
||||
никогда не говорить об этом больше!
|
||||
|
||||
166
|
||||
00:21:31,698 --> 00:21:34,100
|
||||
У вас есть 72 часа.
|
||||
|
||||
167
|
||||
00:21:34,200 --> 00:21:38,204
|
||||
Не наносекундных længere. I мальчика
|
||||
только что купили вы на лодке.
|
||||
|
||||
168
|
||||
00:21:38,404 --> 00:21:41,408
|
||||
Вы джентльмен, uanset, что
|
||||
говорят другие о тебе!
|
||||
|
||||
169
|
||||
00:21:41,508 --> 00:21:44,711
|
||||
Бьюсь об заклад, бутылку на
|
||||
, мы никогда не найти его.
|
||||
|
||||
170
|
||||
00:21:44,811 --> 00:21:47,414
|
||||
Скажем, целый ящик.
|
||||
|
||||
171
|
||||
00:22:17,910 --> 00:22:20,213
|
||||
Привет.
|
||||
|
||||
172
|
||||
00:22:20,314 --> 00:22:24,718
|
||||
Я пришел sent. Jeg сказали, я
|
||||
хотел бы получить в. Восьмой
|
||||
|
||||
173
|
||||
00:22:24,918 --> 00:22:28,421
|
||||
- Сказал я в. 9.
|
||||
- это более 10!
|
||||
|
||||
174
|
||||
00:22:30,489 --> 00:22:34,493
|
||||
Правда Экер сказал, что мы должны
|
||||
называть его hvis возникли проблемы.
|
||||
|
||||
175
|
||||
00:22:34,693 --> 00:22:36,595
|
||||
С чем?
|
||||
|
||||
176
|
||||
00:22:36,795 --> 00:22:40,500
|
||||
- Я должен взять нас вверх по реке к Mali.
|
||||
- Что?
|
||||
|
||||
177
|
||||
00:22:40,600 --> 00:22:45,704
|
||||
- Нет, нет. Мы вас не Mali.
|
||||
- Подожди!
|
||||
|
||||
178
|
||||
00:22:45,905 --> 00:22:49,608
|
||||
Существует вспышки в Mali.
|
||||
Det может начаться эпидемия.
|
||||
|
||||
179
|
||||
00:22:49,808 --> 00:22:52,311
|
||||
И вы хотите, лифт, доктор?
|
||||
|
||||
180
|
||||
00:22:52,411 --> 00:22:56,415
|
||||
- ВОЗ сократит свой бюджет
|
||||
- Это очень важно.
|
||||
|
||||
181
|
||||
00:22:56,615 --> 00:23:00,720
|
||||
- Иметь достаточно оборудования
|
||||
- Наверное, нет?
|
||||
|
||||
182
|
||||
00:23:05,891 --> 00:23:08,332
|
||||
Это небольшой лодке. Нет
|
||||
конфиденциальность!
|
||||
|
||||
183
|
||||
00:23:08,344 --> 00:23:10,796
|
||||
Я не стесняюсь.
|
||||
|
@ -1056,7 +1056,6 @@ namespace Test.Logic.Forms
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[DeploymentItem("SubtitleEdit.exe")]
|
||||
public void RemoveTextKeepMusicSymbolsButRemoveHI()
|
||||
{
|
||||
RemoveTextForHI target = GetRemoveTextForHiLib();
|
||||
@ -1070,7 +1069,6 @@ namespace Test.Logic.Forms
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[DeploymentItem("SubtitleEdit.exe")]
|
||||
public void RemoveTextRemoveEmdash()
|
||||
{
|
||||
RemoveTextForHI target = GetRemoveTextForHiLib();
|
||||
@ -1085,7 +1083,6 @@ namespace Test.Logic.Forms
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[DeploymentItem("SubtitleEdit.exe")]
|
||||
public void RemoveTextIfUppercaseEmdashRemoveInDialogue()
|
||||
{
|
||||
RemoveTextForHI target = GetRemoveTextForHiLib();
|
||||
@ -1098,7 +1095,6 @@ namespace Test.Logic.Forms
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[DeploymentItem("SubtitleEdit.exe")]
|
||||
public void RemoveTextIfUppercaseEmdashRemoveInDialogueWithSpaces()
|
||||
{
|
||||
RemoveTextForHI target = GetRemoveTextForHiLib();
|
||||
|
@ -43,6 +43,7 @@
|
||||
<Reference Include="System.Xml.Linq" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="Core\LanguageAutoDetectTest.cs" />
|
||||
<Compile Include="Core\StringExtensionsTest.cs" />
|
||||
<Compile Include="Core\SubtitleTest.cs" />
|
||||
<Compile Include="Logic\Ocr\BinaryOcrTest.cs" />
|
||||
@ -114,6 +115,16 @@
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Content Include="Files\auto_detect_Danish.srt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Content Include="Files\auto_detect_Russian.srt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
|
||||
<PropertyGroup>
|
||||
<PreBuildEvent>
|
||||
|
Loading…
Reference in New Issue
Block a user