From 075de1b239b1e48f1fc7347b6250a2ba35442be4 Mon Sep 17 00:00:00 2001 From: niksedk Date: Wed, 7 Oct 2015 21:58:57 +0200 Subject: [PATCH] Extracted language auto detection to "LanguageAutoDetect" + added two simple unit tests --- libse/Forms/SplitLongLinesHelper.cs | 2 +- libse/LanguageAutoDetect.cs | 716 +++++++++ libse/LibSE.csproj | 1 + libse/Subtitle.cs | 2 +- libse/SubtitleFormats/Cavena890.cs | 2 +- libse/SubtitleFormats/DCSubtitle.cs | 2 +- libse/SubtitleFormats/Sami.cs | 2 +- .../ScenaristClosedCaptions.cs | 2 +- libse/SubtitleFormats/Tmx14.cs | 2 +- libse/Utilities.cs | 707 +-------- src/Forms/AddToNames.cs | 4 +- src/Forms/AutoBreakUnbreakLines.cs | 2 +- src/Forms/BatchConvert.cs | 2 +- src/Forms/ChangeCasingNames.cs | 2 +- src/Forms/ChooseEncoding.cs | 2 +- src/Forms/Compare.cs | 8 +- src/Forms/ExportPngXml.cs | 2 +- src/Forms/FixCommonErrors.cs | 6 +- src/Forms/GoogleTranslate.cs | 4 +- src/Forms/ImportSceneChanges.cs | 2 +- src/Forms/ImportText.cs | 2 +- src/Forms/ImportUnknownFormat.cs | 2 +- src/Forms/Main.cs | 54 +- src/Forms/MergeShortLines.cs | 2 +- src/Forms/MergeTextWithSameTimeCodes.cs | 2 +- src/Forms/SpellCheck.cs | 4 +- src/Forms/SplitLongLines.cs | 2 +- src/Test/Core/LanguageAutoDetectTest.cs | 39 + src/Test/Files/auto_detect_Danish.srt | 1403 +++++++++++++++++ src/Test/Files/auto_detect_Russian.srt | 860 ++++++++++ .../Forms/RemoveTextForHearImpairedTest.cs | 4 - src/Test/Test.csproj | 11 + 32 files changed, 3090 insertions(+), 767 deletions(-) create mode 100644 libse/LanguageAutoDetect.cs create mode 100644 src/Test/Core/LanguageAutoDetectTest.cs create mode 100644 src/Test/Files/auto_detect_Danish.srt create mode 100644 src/Test/Files/auto_detect_Russian.srt diff --git a/libse/Forms/SplitLongLinesHelper.cs b/libse/Forms/SplitLongLinesHelper.cs index 5695da70b..8cd6e6f1a 100644 --- a/libse/Forms/SplitLongLinesHelper.cs +++ b/libse/Forms/SplitLongLinesHelper.cs @@ -41,7 +41,7 @@ namespace Nikse.SubtitleEdit.Core.Forms var splittedIndexes = new List(); var autoBreakedIndexes = new List(); var splittedSubtitle = new Subtitle(); - string language = Utilities.AutoDetectGoogleLanguage(subtitle); + string language = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle); for (int i = 0; i < subtitle.Paragraphs.Count; i++) { bool added = false; diff --git a/libse/LanguageAutoDetect.cs b/libse/LanguageAutoDetect.cs new file mode 100644 index 000000000..80aed0db0 --- /dev/null +++ b/libse/LanguageAutoDetect.cs @@ -0,0 +1,716 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; +using System.Text.RegularExpressions; + +namespace Nikse.SubtitleEdit.Core +{ + public static class LanguageAutoDetect + { + + private static int GetCount(string text, params string[] words) + { + int count = 0; + for (int i = 0; i < words.Length; i++) + { + count += Regex.Matches(text, "\\b" + words[i] + "\\b", (RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture)).Count; + } + return count; + } + + private static int GetCountContains(string text, params string[] words) + { + int count = 0; + for (int i = 0; i < words.Length; i++) + { + var regEx = new Regex(words[i]); + count += regEx.Matches(text).Count; + } + return count; + } + + public static string AutoDetectGoogleLanguage(Encoding encoding) + { + switch (encoding.CodePage) + { + case 860: + return "pt"; // Portuguese + case 28599: + case 1254: + return "tr"; // Turkish + case 28598: + case 1255: + return "he"; // Hebrew + case 28596: + case 1256: + return "ar"; // Arabic + case 1258: + return "vi"; // Vietnamese + case 949: + case 1361: + case 20949: + case 51949: + case 50225: + return "ko"; // Korean + case 1253: + case 28597: + return "el"; // Greek + case 50220: + case 50221: + case 50222: + case 51932: + case 20932: + case 10001: + return "ja"; // Japanese + case 20000: + case 20002: + case 20936: + case 950: + case 52936: + case 54936: + case 51936: + return "zh"; // Chinese + default: + return null; + } + } + + public static readonly string[] AutoDetectWordsEnglish = { "we", "are", "and", "you", "your", "what" }; + public static readonly string[] AutoDetectWordsDanish = { "vi", "han", "og", "jeg", "var", "men", "gider", "bliver", "virkelig", "kommer", "tilbage", "Hej" }; + public static readonly string[] AutoDetectWordsNorwegian = { "vi", "er", "og", "jeg", "var", "men" }; + public static readonly string[] AutoDetectWordsSwedish = { "vi", "är", "och", "Jag", "inte", "för" }; + public static readonly string[] AutoDetectWordsSpanish = { "el", "bien", "Vamos", "Hola", "casa", "con" }; + public static readonly string[] AutoDetectWordsFrench = { "un", "vous", "avec", "pas", "ce", "une" }; + public static readonly string[] AutoDetectWordsGerman = { "und", "auch", "sich", "bin", "hast", "möchte" }; + public static readonly string[] AutoDetectWordsDutch = { "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n" }; + public static readonly string[] AutoDetectWordsPolish = { "Czy", "ale", "ty", "siê", "jest", "mnie" }; + public static readonly string[] AutoDetectWordsItalian = { "Cosa", "sono", "Grazie", "Buongiorno", "bene", "questo", "ragazzi", "propriamente", "numero", "hanno", "giorno", "faccio", "davvero", "negativo", "essere", "vuole", "sensitivo", "venire" }; + public static readonly string[] AutoDetectWordsPortuguese = { "[Nn]ão", "Então", "Estás", "isso", "com" }; + public static readonly string[] AutoDetectWordsGreek = { "μου", "είναι", "Είναι", "αυτό", "Τόμπυ", "καλά", "Ενταξει", "Ενταξει", "πρεπει", "Λοιπον", "τιποτα", "ξερεις" }; + public static readonly string[] AutoDetectWordsRussian = { "Это", "не", "ты", "что", "это", "Мы", "Да", "Нет", "Ты", "нет", "Он", "его", "тебя", "как", "Не", "вы", "меня", "Но", "то", "всё", "бы", "мы", "мне", "вас", "знаю", "ещё", "за", "нас", "чтобы", "был" }; + public static readonly string[] AutoDetectWordsBulgarian = { "Какво", "тук", "може", "Как", "Ваше", "какво" }; + public static readonly string[] AutoDetectWordsRomanian = { "Какво", "тук", "може", "Как", "Ваше", "какво" }; + public static readonly string[] AutoDetectWordsArabic = { "Какво", "тук", "може", "Как", "Ваше", "какво" }; + public static readonly string[] AutoDetectWordsHebrew = { "אתה", "אולי", "הוא", "בסדר", "יודע", "טוב" }; + public static readonly string[] AutoDetectWordsVietnamese = { "không", "tôi", "anh", "đó", "Tôi", "ông" }; + public static readonly string[] AutoDetectWordsHungarian = { "hogy", "lesz", "tudom", "vagy", "mondtam", "még" }; + public static readonly string[] AutoDetectWordsTurkish = { "için", "Tamam", "Hayır", "benim", "daha", "deðil", "önce", "lazým", "benim", "çalýþýyor", "burada", "efendim" }; + public static readonly string[] AutoDetectWordsCroatianAndSerbian = { "sam", "ali", "nije", "samo", "ovo", "kako", "dobro", "sve", "tako", "će", "mogu", "ću", "zašto", "nešto", "za" }; + public static readonly string[] AutoDetectWordsCroatian = { "što", "ovdje", "gdje", "kamo", "tko", "prije", "uvijek", "vrijeme", "vidjeti", "netko", + "vidio", "nitko", "bok", "lijepo", "oprosti", "htio", "mjesto", "oprostite", "čovjek", "dolje", + "čovječe", "dvije", "dijete", "dio", "poslije", "događa", "vjerovati", "vjerojatno", "vjerujem", "točno", + "razumijem", "vidjela", "cijeli", "svijet", "obitelj", "volio", "sretan", "dovraga", "svijetu", "htjela", + "vidjeli", "negdje", "želio", "ponovno", "djevojka", "umrijeti", "čovjeka", "mjesta", "djeca", "osjećam", + "uopće", "djecu", "naprijed", "obitelji", "doista", "mjestu", "lijepa", "također", "riječ", "tijelo" }; + public static readonly string[] AutoDetectWordsSerbian = { "šta", "ovde", "gde", "ko", "pre", "uvek", "vreme", "videti", "neko", + "video", "niko", "ćao", "lepo", "izvini", "hteo", "mesto", "izvinite", "čovek", "dole", + "čoveče", "dve", "dete", "deo", "posle", "dešava", "verovati", "verovatno", "verujem", "tačno", + "razumem", "videla", "ceo", "svet", "porodica", "voleo", "srećan", "dođavola", "svetu", "htela", + "videli", "negde", "želeo", "ponovo", "devojka", "umreti", "čoveka", "mesta", "deca", "osećam", + "uopšte", "decu", "napred", "porodicu", "zaista", "mestu", "lepa", "takođe", "reč", "telo" }; + + public static string AutoDetectGoogleLanguage(string text, int bestCount) + { + int count = GetCount(text, AutoDetectWordsEnglish); + if (count > bestCount) + return "en"; + + count = GetCount(text, AutoDetectWordsDanish); + if (count > bestCount) + { + int norwegianCount = GetCount(text, "ut", "deg", "meg", "merkelig", "mye", "spørre"); + int dutchCount = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n"); + if (norwegianCount < 2 && dutchCount < count) + return "da"; + } + + count = GetCount(text, AutoDetectWordsNorwegian); + if (count > bestCount) + { + int danishCount = GetCount(text, "siger", "dig", "mig", "mærkelig", "tilbage", "spørge"); + int dutchCount = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n"); + if (danishCount < 2 && dutchCount < count) + return "no"; + } + + count = GetCount(text, AutoDetectWordsSwedish); + if (count > bestCount) + return "sv"; + + count = GetCount(text, AutoDetectWordsSpanish); + if (count > bestCount) + { + int frenchCount = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words + int portugueseCount = GetCount(text, "[NnCc]ão", "Então", "h?ouve", "pessoal", "rapariga", "tivesse", "fizeste", + "jantar", "conheço", "atenção", "foste", "milhões", "devias", "ganhar", "raios"); // not spanish words + if (frenchCount < 2 && portugueseCount < 2) + return "es"; + } + + count = GetCount(text, AutoDetectWordsItalian); + if (count > bestCount) + { + int frenchCount = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not italian words + if (frenchCount < 2) + return "it"; + } + + count = GetCount(text, AutoDetectWordsFrench); + if (count > bestCount) + { + int romanianCount = GetCount(text, "[Ss]înt", "aici", "domnule", "pentru", "Vreau"); + if (romanianCount < 5) + return "fr"; + } + + count = GetCount(text, AutoDetectWordsPortuguese); + if (count > bestCount) + return "pt"; // Portuguese + + count = GetCount(text, AutoDetectWordsGerman); + if (count > bestCount) + return "de"; + + count = GetCount(text, AutoDetectWordsDutch); + if (count > bestCount) + return "nl"; + + count = GetCount(text, AutoDetectWordsPolish); + if (count > bestCount) + return "pl"; + + count = GetCount(text, AutoDetectWordsGreek); + if (count > bestCount) + return "el"; // Greek + + count = GetCount(text, AutoDetectWordsRussian); + if (count > bestCount) + return "ru"; // Russian + + count = GetCount(text, AutoDetectWordsBulgarian); + if (count > bestCount) + return "bg"; // Bulgarian + + count = GetCount(text, AutoDetectWordsArabic); + if (count > bestCount) + { + if (GetCount(text, "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10) + return "he"; + + int romanianCount = GetCount(text, "sînt", "aici", "Sînt", "domnule", "pentru", "Vreau", "trãiascã", "niciodatã", "înseamnã", + "vorbesti", "oamenii", "Asteaptã", "fãcut", "Fãrã", "spune", "decât", "pentru", "vreau"); + if (romanianCount > count) + return "ro"; // Romanian + + romanianCount = GetCount(text, "daca", "pentru", "acum", "soare", "trebuie", "Trebuie", "nevoie", "decat", "echilibrul", + "vorbesti", "oamenii", "zeului", "vrea", "atunci", "Poate", "Acum", "memoria", "soarele"); + if (romanianCount > count) + return "ro"; // Romanian + + return "ar"; // Arabic + } + + count = GetCount(text, AutoDetectWordsHebrew); + if (count > bestCount) + return "he"; // Hebrew + + count = GetCount(text, AutoDetectWordsCroatianAndSerbian); + if (count > bestCount) + { + int croatianCount = GetCount(text, AutoDetectWordsCroatian); + int serbianCount = GetCount(text, AutoDetectWordsSerbian); + if (croatianCount > serbianCount) + return "hr"; // Croatian + + return "sr"; // Serbian + } + + count = GetCount(text, AutoDetectWordsVietnamese); + if (count > bestCount) + return "vi"; // Vietnamese + + count = GetCount(text, AutoDetectWordsHungarian); + if (count > bestCount) + return "hu"; // Hungarian + + count = GetCount(text, AutoDetectWordsTurkish); + if (count > bestCount) + return "tr"; // Turkish + + count = GetCount(text, "yang", "tahu", "bisa", "akan", "tahun", "tapi", "dengan", "untuk", "rumah", "dalam", "sudah", "bertemu"); + if (count > bestCount) + return "id"; // Indonesian + + count = GetCount(text, "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล", "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์"); + if (count > 10 || count > bestCount) + return "th"; // Thai + + count = GetCount(text, "그리고", "아니야", "하지만", "말이야", "그들은", "우리가"); + if (count > 10 || count > bestCount) + return "ko"; // Korean + + count = GetCount(text, "että", "kuin", "minä", "mitään", "Mutta", "siitä", "täällä", "poika", "Kiitos", "enää", "vielä", "tässä"); + if (count > bestCount) + return "fi"; // Finnish + + count = GetCount(text, "sînt", "aici", "Sînt", "domnule", "pentru", "Vreau", "trãiascã", "niciodatã", "înseamnã", "vorbesti", "oamenii", + "Asteaptã", "fãcut", "Fãrã", "spune", "decât", "pentru", "vreau"); + if (count > bestCount) + return "ro"; // Romanian + + count = GetCount(text, "daca", "pentru", "acum", "soare", "trebuie", "Trebuie", "nevoie", "decat", "echilibrul", "vorbesti", "oamenii", + "zeului", "vrea", "atunci", "Poate", "Acum", "memoria", "soarele"); + if (count > bestCount) + return "ro"; // Romanian + + count = GetCountContains(text, "シ", "ュ", "シン", "シ", "ン", "ユ"); + count += GetCountContains(text, "イ", "ン", "チ", "ェ", "ク", "ハ"); + count += GetCountContains(text, "シ", "ュ", "う", "シ", "ン", "サ"); + count += GetCountContains(text, "シ", "ュ", "シ", "ン", "だ", "う"); + if (count > bestCount * 2) + return "ja"; // Japanese - not tested... + + count = GetCountContains(text, "是", "是早", "吧", "的", "爱", "上好"); + count += GetCountContains(text, "的", "啊", "好", "好", "亲", "的"); + count += GetCountContains(text, "谢", "走", "吧", "晚", "上", "好"); + count += GetCountContains(text, "来", "卡", "拉", "吐", "滚", "他"); + if (count > bestCount * 2) + return "zh"; // Chinese (simplified) - not tested... + + return string.Empty; + } + + public static string AutoDetectGoogleLanguage(Subtitle subtitle) + { + string languageId = AutoDetectGoogleLanguageOrNull(subtitle); + if (languageId == null) + languageId = "en"; + + return languageId; + } + + public static string AutoDetectGoogleLanguageOrNull(Subtitle subtitle) + { + var sb = new StringBuilder(); + foreach (Paragraph p in subtitle.Paragraphs) + sb.AppendLine(p.Text); + + string languageId = AutoDetectGoogleLanguage(sb.ToString(), subtitle.Paragraphs.Count / 14); + if (string.IsNullOrEmpty(languageId)) + languageId = null; + + return languageId; + } + + public static string AutoDetectLanguageName(string languageName, Subtitle subtitle) + { + if (string.IsNullOrEmpty(languageName)) + languageName = "en_US"; + int bestCount = subtitle.Paragraphs.Count / 14; + + var sb = new StringBuilder(); + foreach (Paragraph p in subtitle.Paragraphs) + sb.AppendLine(p.Text); + string text = sb.ToString(); + + List dictionaryNames = Utilities.GetDictionaryLanguages(); + + bool containsEnGb = false; + bool containsEnUs = false; + bool containsHrHr = false; + bool containsSrLatn = false; + foreach (string name in dictionaryNames) + { + if (name.Contains("[en_GB]")) + containsEnGb = true; + if (name.Contains("[en_US]")) + containsEnUs = true; + if (name.Contains("[hr_HR]")) + containsHrHr = true; + if (name.Contains("[sr-Latn]")) + containsSrLatn = true; + } + + foreach (string name in dictionaryNames) + { + string shortName = string.Empty; + int start = name.IndexOf('['); + int end = name.IndexOf(']'); + if (start > 0 && end > start) + { + start++; + shortName = name.Substring(start, end - start); + } + + int count; + switch (shortName) + { + case "da_DK": + count = GetCount(text, "vi", "hun", "og", "jeg", "var", "men", "bliver", "meget", "spørger", "Hej", "utrolig", "dejligt"); + if (count > bestCount) + { + int norweigianCount = GetCount(text, "ut", "deg", "meg", "merkelig", "mye", "spørre"); + if (norweigianCount < 2) + languageName = shortName; + } + break; + case "nb_NO": + count = GetCount(text, AutoDetectWordsNorwegian); + if (count > bestCount) + { + int danishCount = GetCount(text, "siger", "dig", "mig", "mærkelig", "tilbage", "spørge"); + int dutchCount = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n"); + if (danishCount < 2 && dutchCount < count) + languageName = shortName; + } + break; + case "en_US": + count = GetCount(text, AutoDetectWordsEnglish); + if (count > bestCount) + { + languageName = shortName; + if (containsEnGb) + { + int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor"); + int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour"); + if (gbCount > usCount) + languageName = "en_GB"; + } + } + break; + case "en_GB": + count = GetCount(text, "we", "are", "and", "you", "your", "what"); + if (count > bestCount) + { + languageName = shortName; + if (containsEnUs) + { + int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor"); + int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour"); + if (gbCount < usCount) + languageName = "en_US"; + } + } + break; + case "sv_SE": + count = GetCount(text, "vi", "är", "och", "Jag", "inte", "för"); + if (count > bestCount) + languageName = shortName; + break; + case "es_ES": + count = GetCount(text, AutoDetectWordsSpanish); + if (count > bestCount) + { + int frenchWords = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words + if (frenchWords < 2) + languageName = shortName; + } + break; + case "fr_FR": + count = GetCount(text, AutoDetectWordsFrench); + if (count > bestCount) + { + int spanishWords = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como"); // not french words + int italianWords = GetCount(text, AutoDetectWordsItalian); // not italian words + if (spanishWords < 2 && italianWords < 2) + languageName = shortName; + } + break; + case "it_IT": + count = GetCount(text, AutoDetectWordsItalian); + if (count > bestCount) + { + int frenchWords = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words + int spanishWords = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como"); // not french words + if (frenchWords < 2 && spanishWords < 2) + languageName = shortName; + } + break; + case "de_DE": + count = GetCount(text, "und", "auch", "sich", "bin", "hast", "möchte"); + if (count > bestCount) + languageName = shortName; + break; + case "nl_NL": + count = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n"); + if (count > bestCount) + languageName = shortName; + break; + case "pl_PL": + count = GetCount(text, "Czy", "ale", "ty", "siê", "jest", "mnie"); + if (count > bestCount) + languageName = shortName; + break; + case "el_GR": + count = GetCount(text, AutoDetectWordsGreek); + if (count > bestCount) + languageName = shortName; + break; + case "ru_RU": + count = GetCount(text, AutoDetectWordsRussian); + if (count > bestCount) + languageName = shortName; + break; + case "ro_RO": + count = GetCount(text, "sînt", "aici", "Sînt", "domnule", "pentru", "Vreau", "trãiascã", "niciodatã", "înseamnã", "vorbesti", "oamenii", "Asteaptã", + "fãcut", "Fãrã", "spune", "decât", "pentru", "vreau"); + if (count > bestCount) + { + languageName = shortName; + } + else + { + count = GetCount(text, "daca", "pentru", "acum", "soare", "trebuie", "Trebuie", "nevoie", "decat", "echilibrul", "vorbesti", "oamenii", "zeului", + "vrea", "atunci", "Poate", "Acum", "memoria", "soarele"); + + if (count > bestCount) + languageName = shortName; + } + break; + case "hr_HR": // Croatian + count = GetCount(text, AutoDetectWordsCroatianAndSerbian); + if (count > bestCount) + { + languageName = shortName; + if (containsSrLatn) + { + int croatianCount = GetCount(text, AutoDetectWordsCroatian); + int serbianCount = GetCount(text, AutoDetectWordsSerbian); + if (serbianCount > croatianCount) + languageName = "sr-Latn"; + } + } + break; + case "sr-Latn": // Serbian (Latin) + count = GetCount(text, AutoDetectWordsCroatianAndSerbian); + if (count > bestCount) + { + languageName = shortName; + if (containsHrHr) + { + int croatianCount = GetCount(text, AutoDetectWordsCroatian); + int serbianCount = GetCount(text, AutoDetectWordsSerbian); + if (serbianCount < croatianCount) + languageName = "hr_HR"; + } + } + break; + case "pt_PT": // Portuguese + count = GetCount(text, AutoDetectWordsPortuguese); + if (count > bestCount) + languageName = shortName; + break; + case "pt_BR": // Portuguese (Brasil) + count = GetCount(text, AutoDetectWordsPortuguese); + if (count > bestCount) + languageName = shortName; + break; + case "hu_HU": // Hungarian + count = GetCount(text, AutoDetectWordsHungarian); + if (count > bestCount) + languageName = shortName; + break; + } + } + return languageName; + } + + public static Encoding DetectAnsiEncoding(byte[] buffer) + { + if (Utilities.IsRunningOnMono()) + return Encoding.Default; + + try + { + Encoding encoding = DetectEncoding.EncodingTools.DetectInputCodepage(buffer); + + Encoding greekEncoding = Encoding.GetEncoding(1253); // Greek + if (GetCount(greekEncoding.GetString(buffer), AutoDetectWordsGreek) > 5) + return greekEncoding; + + Encoding russianEncoding = Encoding.GetEncoding(1251); // Cyrillic + if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5) // Russian + return russianEncoding; + if (GetCount(russianEncoding.GetString(buffer), "Какво", "тук", "може", "Как", "Ваше", "какво") > 5) // Bulgarian + return russianEncoding; + + russianEncoding = Encoding.GetEncoding(28595); // Russian + if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5) // Russian + return russianEncoding; + + Encoding thaiEncoding = Encoding.GetEncoding(874); // Thai + if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล") + GetCount(thaiEncoding.GetString(buffer), "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5) + return thaiEncoding; + + Encoding arabicEncoding = Encoding.GetEncoding(28596); // Arabic + Encoding hewbrewEncoding = Encoding.GetEncoding(28598); // Hebrew + if (GetCount(arabicEncoding.GetString(buffer), "من", "هل", "لا", "فى", "لقد", "ما") > 5) + { + if (GetCount(hewbrewEncoding.GetString(buffer), "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10) + return hewbrewEncoding; + return arabicEncoding; + } + if (GetCount(hewbrewEncoding.GetString(buffer), AutoDetectWordsHebrew) > 5) + return hewbrewEncoding; + + return encoding; + } + catch + { + return Encoding.Default; + } + } + + public static Encoding GetEncodingFromFile(string fileName) + { + var encoding = Encoding.Default; + + try + { + foreach (EncodingInfo ei in Encoding.GetEncodings()) + { + if (ei.CodePage + ": " + ei.DisplayName == Configuration.Settings.General.DefaultEncoding && + ei.Name != Encoding.UTF8.BodyName && + ei.Name != Encoding.Unicode.BodyName) + { + encoding = ei.GetEncoding(); + break; + } + } + + using (var file = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) + { + var bom = new byte[12]; // Get the byte-order mark, if there is one + file.Position = 0; + file.Read(bom, 0, 12); + if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) + encoding = Encoding.UTF8; + else if (bom[0] == 0xff && bom[1] == 0xfe) + encoding = Encoding.Unicode; + else if (bom[0] == 0xfe && bom[1] == 0xff) // utf-16 and ucs-2 + encoding = Encoding.BigEndianUnicode; + else if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) // ucs-4 + encoding = Encoding.UTF32; + else if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76 && (bom[3] == 0x38 || bom[3] == 0x39 || bom[3] == 0x2b || bom[3] == 0x2f)) // utf-7 + encoding = Encoding.UTF7; + else if (file.Length > 12) + { + long length = file.Length; + if (length > 500000) + length = 500000; + + file.Position = 0; + var buffer = new byte[length]; + file.Read(buffer, 0, (int)length); + + bool couldBeUtf8; + if (IsUtf8(buffer, out couldBeUtf8)) + { + encoding = Encoding.UTF8; + } + else if (couldBeUtf8 && Configuration.Settings.General.DefaultEncoding == Encoding.UTF8.BodyName) + { // keep utf-8 encoding if it's default + encoding = Encoding.UTF8; + } + else if (couldBeUtf8 && fileName.EndsWith(".xml", StringComparison.OrdinalIgnoreCase) && Encoding.Default.GetString(buffer).ToLower().Replace('\'', '"').Contains("encoding=\"utf-8\"")) + { // keep utf-8 encoding for xml files with utf-8 in header (without any utf-8 encoded characters, but with only allowed utf-8 characters) + encoding = Encoding.UTF8; + } + else if (Configuration.Settings.General.AutoGuessAnsiEncoding) + { + encoding = DetectAnsiEncoding(buffer); + + Encoding greekEncoding = Encoding.GetEncoding(1253); // Greek + if (GetCount(greekEncoding.GetString(buffer), AutoDetectWordsGreek) > 5) + return greekEncoding; + + Encoding russianEncoding = Encoding.GetEncoding(1251); // Cyrillic + if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5) // Russian + return russianEncoding; + if (GetCount(russianEncoding.GetString(buffer), "Какво", "тук", "може", "Как", "Ваше", "какво") > 5) // Bulgarian + return russianEncoding; + russianEncoding = Encoding.GetEncoding(28595); // Russian + if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5) + return russianEncoding; + + Encoding thaiEncoding = Encoding.GetEncoding(874); // Thai + if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล") + GetCount(thaiEncoding.GetString(buffer), "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5) + return thaiEncoding; + + Encoding arabicEncoding = Encoding.GetEncoding(28596); // Arabic + Encoding hewbrewEncoding = Encoding.GetEncoding(28598); // Hebrew + if (GetCount(arabicEncoding.GetString(buffer), "من", "هل", "لا", "فى", "لقد", "ما") > 5) + { + if (GetCount(hewbrewEncoding.GetString(buffer), "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10) + return hewbrewEncoding; + return arabicEncoding; + } + if (GetCount(hewbrewEncoding.GetString(buffer), AutoDetectWordsHebrew) > 5) + return hewbrewEncoding; + + Encoding romanianEncoding = Encoding.GetEncoding(1250); // Romanian + if (GetCount(romanianEncoding.GetString(buffer), "să", "şi", "văzut", "regulă", "găsit", "viaţă") > 99) + return romanianEncoding; + + Encoding koreanEncoding = Encoding.GetEncoding(949); // Korean + if (GetCount(koreanEncoding.GetString(buffer), "그리고", "아니야", "하지만", "말이야", "그들은", "우리가") > 5) + return koreanEncoding; + } + } + } + } + catch + { + } + return encoding; + } + + /// + /// Will try to determine if buffer is utf-8 encoded or not. + /// If any non-utf8 sequences are found then false is returned, if no utf8 multibytes sequences are found then false is returned. + /// + private static bool IsUtf8(byte[] buffer, out bool couldBeUtf8) + { + couldBeUtf8 = false; + int utf8Count = 0; + int i = 0; + while (i < buffer.Length - 3) + { + byte b = buffer[i]; + if (b > 127) + { + if (b >= 194 && b <= 223 && buffer[i + 1] >= 128 && buffer[i + 1] <= 191) + { // 2-byte sequence + utf8Count++; + i++; + } + else if (b >= 224 && b <= 239 && buffer[i + 1] >= 128 && buffer[i + 1] <= 191 && + buffer[i + 2] >= 128 && buffer[i + 2] <= 191) + { // 3-byte sequence + utf8Count++; + i += 2; + } + else if (b >= 240 && b <= 244 && buffer[i + 1] >= 128 && buffer[i + 1] <= 191 && + buffer[i + 2] >= 128 && buffer[i + 2] <= 191 && + buffer[i + 3] >= 128 && buffer[i + 3] <= 191) + { // 4-byte sequence + utf8Count++; + i += 3; + } + else + { + return false; + } + } + i++; + } + couldBeUtf8 = true; + if (utf8Count == 0) + return false; // not utf-8 (no characters utf-8 encoded...) + + return true; + } + + } +} diff --git a/libse/LibSE.csproj b/libse/LibSE.csproj index 746653518..1ed3a007f 100644 --- a/libse/LibSE.csproj +++ b/libse/LibSE.csproj @@ -173,6 +173,7 @@ + diff --git a/libse/Subtitle.cs b/libse/Subtitle.cs index 2d146ffaf..ebd0404aa 100644 --- a/libse/Subtitle.cs +++ b/libse/Subtitle.cs @@ -139,7 +139,7 @@ namespace Nikse.SubtitleEdit.Core { try { - sr = new StreamReader(fileName, Utilities.GetEncodingFromFile(fileName), true); + sr = new StreamReader(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName), true); } catch { diff --git a/libse/SubtitleFormats/Cavena890.cs b/libse/SubtitleFormats/Cavena890.cs index 9481a386d..fbaafd597 100644 --- a/libse/SubtitleFormats/Cavena890.cs +++ b/libse/SubtitleFormats/Cavena890.cs @@ -215,7 +215,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats } } - var language = Utilities.AutoDetectGoogleLanguage(subtitle); + var language = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle); if (language == "he") // Hebrew { _languageIdLine1 = LanguageIdHebrew; diff --git a/libse/SubtitleFormats/DCSubtitle.cs b/libse/SubtitleFormats/DCSubtitle.cs index c649ed7b3..5092710f9 100644 --- a/libse/SubtitleFormats/DCSubtitle.cs +++ b/libse/SubtitleFormats/DCSubtitle.cs @@ -82,7 +82,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats string languageEnglishName; try { - string languageShortName = Utilities.AutoDetectGoogleLanguage(subtitle); + string languageShortName = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle); var ci = CultureInfo.CreateSpecificCulture(languageShortName); languageEnglishName = ci.EnglishName; int indexOfStartP = languageEnglishName.IndexOf('('); diff --git a/libse/SubtitleFormats/Sami.cs b/libse/SubtitleFormats/Sami.cs index 0aed23602..139eb70ae 100644 --- a/libse/SubtitleFormats/Sami.cs +++ b/libse/SubtitleFormats/Sami.cs @@ -39,7 +39,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats public override string ToText(Subtitle subtitle, string title) { - string language = Utilities.AutoDetectLanguageName("en_US", subtitle); + string language = LanguageAutoDetect.AutoDetectLanguageName("en_US", subtitle); var ci = CultureInfo.GetCultureInfo(language.Replace("_", "-")); string languageTag = string.Format("{0}CC", language.Replace("_", string.Empty).ToUpper()); string languageName = ci.Parent.EnglishName; diff --git a/libse/SubtitleFormats/ScenaristClosedCaptions.cs b/libse/SubtitleFormats/ScenaristClosedCaptions.cs index 7887c78d0..e0564d15f 100644 --- a/libse/SubtitleFormats/ScenaristClosedCaptions.cs +++ b/libse/SubtitleFormats/ScenaristClosedCaptions.cs @@ -699,7 +699,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats var sb = new StringBuilder(); sb.AppendLine("Scenarist_SCC V1.0"); sb.AppendLine(); - string language = Utilities.AutoDetectGoogleLanguage(subtitle); + string language = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle); for (int i = 0; i < subtitle.Paragraphs.Count; i++) { Paragraph p = subtitle.Paragraphs[i]; diff --git a/libse/SubtitleFormats/Tmx14.cs b/libse/SubtitleFormats/Tmx14.cs index 28d38e7c2..f46b74986 100644 --- a/libse/SubtitleFormats/Tmx14.cs +++ b/libse/SubtitleFormats/Tmx14.cs @@ -40,7 +40,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats " " + Environment.NewLine + ""; - string lang = Utilities.AutoDetectLanguageName("en_US", subtitle); + string lang = LanguageAutoDetect.AutoDetectLanguageName("en_US", subtitle); if (lang.StartsWith("en_")) lang = "EN"; else if (lang.Length == 5) diff --git a/libse/Utilities.cs b/libse/Utilities.cs index ce8f94cdd..431aef04f 100644 --- a/libse/Utilities.cs +++ b/libse/Utilities.cs @@ -742,203 +742,7 @@ namespace Nikse.SubtitleEdit.Core } } return s; - } - - public static Encoding GetEncodingFromFile(string fileName) - { - Encoding encoding = Encoding.Default; - - try - { - foreach (EncodingInfo ei in Encoding.GetEncodings()) - { - if (ei.CodePage + ": " + ei.DisplayName == Configuration.Settings.General.DefaultEncoding && - ei.Name != Encoding.UTF8.BodyName && - ei.Name != Encoding.Unicode.BodyName) - { - encoding = ei.GetEncoding(); - break; - } - } - - using (var file = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) - { - var bom = new byte[12]; // Get the byte-order mark, if there is one - file.Position = 0; - file.Read(bom, 0, 12); - if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) - encoding = Encoding.UTF8; - else if (bom[0] == 0xff && bom[1] == 0xfe) - encoding = Encoding.Unicode; - else if (bom[0] == 0xfe && bom[1] == 0xff) // utf-16 and ucs-2 - encoding = Encoding.BigEndianUnicode; - else if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) // ucs-4 - encoding = Encoding.UTF32; - else if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76 && (bom[3] == 0x38 || bom[3] == 0x39 || bom[3] == 0x2b || bom[3] == 0x2f)) // utf-7 - encoding = Encoding.UTF7; - else if (file.Length > 12) - { - long length = file.Length; - if (length > 500000) - length = 500000; - - file.Position = 0; - var buffer = new byte[length]; - file.Read(buffer, 0, (int)length); - - bool couldBeUtf8; - if (IsUtf8(buffer, out couldBeUtf8)) - { - encoding = Encoding.UTF8; - } - else if (couldBeUtf8 && Configuration.Settings.General.DefaultEncoding == Encoding.UTF8.BodyName) - { // keep utf-8 encoding if it's default - encoding = Encoding.UTF8; - } - else if (couldBeUtf8 && fileName.EndsWith(".xml", StringComparison.OrdinalIgnoreCase) && Encoding.Default.GetString(buffer).ToLower().Replace('\'', '"').Contains("encoding=\"utf-8\"")) - { // keep utf-8 encoding for xml files with utf-8 in header (without any utf-8 encoded characters, but with only allowed utf-8 characters) - encoding = Encoding.UTF8; - } - else if (Configuration.Settings.General.AutoGuessAnsiEncoding) - { - encoding = DetectAnsiEncoding(buffer); - - Encoding greekEncoding = Encoding.GetEncoding(1253); // Greek - if (GetCount(greekEncoding.GetString(buffer), AutoDetectWordsGreek) > 5) - return greekEncoding; - - Encoding russianEncoding = Encoding.GetEncoding(1251); // Cyrillic - if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5) // Russian - return russianEncoding; - if (GetCount(russianEncoding.GetString(buffer), "Какво", "тук", "може", "Как", "Ваше", "какво") > 5) // Bulgarian - return russianEncoding; - russianEncoding = Encoding.GetEncoding(28595); // Russian - if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5) - return russianEncoding; - - Encoding thaiEncoding = Encoding.GetEncoding(874); // Thai - if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล") + GetCount(thaiEncoding.GetString(buffer), "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5) - return thaiEncoding; - - Encoding arabicEncoding = Encoding.GetEncoding(28596); // Arabic - Encoding hewbrewEncoding = Encoding.GetEncoding(28598); // Hebrew - if (GetCount(arabicEncoding.GetString(buffer), "من", "هل", "لا", "فى", "لقد", "ما") > 5) - { - if (GetCount(hewbrewEncoding.GetString(buffer), "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10) - return hewbrewEncoding; - return arabicEncoding; - } - if (GetCount(hewbrewEncoding.GetString(buffer), AutoDetectWordsHebrew) > 5) - return hewbrewEncoding; - - Encoding romanianEncoding = Encoding.GetEncoding(1250); // Romanian - if (GetCount(romanianEncoding.GetString(buffer), "să", "şi", "văzut", "regulă", "găsit", "viaţă") > 99) - return romanianEncoding; - - Encoding koreanEncoding = Encoding.GetEncoding(949); // Korean - if (GetCount(koreanEncoding.GetString(buffer), "그리고", "아니야", "하지만", "말이야", "그들은", "우리가") > 5) - return koreanEncoding; - } - } - } - } - catch - { - } - return encoding; - } - - /// - /// Will try to determine if buffer is utf-8 encoded or not. - /// If any non-utf8 sequences are found then false is returned, if no utf8 multibytes sequences are found then false is returned. - /// - private static bool IsUtf8(byte[] buffer, out bool couldBeUtf8) - { - couldBeUtf8 = false; - int utf8Count = 0; - int i = 0; - while (i < buffer.Length - 3) - { - byte b = buffer[i]; - if (b > 127) - { - if (b >= 194 && b <= 223 && buffer[i + 1] >= 128 && buffer[i + 1] <= 191) - { // 2-byte sequence - utf8Count++; - i++; - } - else if (b >= 224 && b <= 239 && buffer[i + 1] >= 128 && buffer[i + 1] <= 191 && - buffer[i + 2] >= 128 && buffer[i + 2] <= 191) - { // 3-byte sequence - utf8Count++; - i += 2; - } - else if (b >= 240 && b <= 244 && buffer[i + 1] >= 128 && buffer[i + 1] <= 191 && - buffer[i + 2] >= 128 && buffer[i + 2] <= 191 && - buffer[i + 3] >= 128 && buffer[i + 3] <= 191) - { // 4-byte sequence - utf8Count++; - i += 3; - } - else - { - return false; - } - } - i++; - } - couldBeUtf8 = true; - if (utf8Count == 0) - return false; // not utf-8 (no characters utf-8 encoded...) - - return true; - } - - public static Encoding DetectAnsiEncoding(byte[] buffer) - { - if (IsRunningOnMono()) - return Encoding.Default; - - try - { - Encoding encoding = DetectEncoding.EncodingTools.DetectInputCodepage(buffer); - - Encoding greekEncoding = Encoding.GetEncoding(1253); // Greek - if (GetCount(greekEncoding.GetString(buffer), AutoDetectWordsGreek) > 5) - return greekEncoding; - - Encoding russianEncoding = Encoding.GetEncoding(1251); // Cyrillic - if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5) // Russian - return russianEncoding; - if (GetCount(russianEncoding.GetString(buffer), "Какво", "тук", "може", "Как", "Ваше", "какво") > 5) // Bulgarian - return russianEncoding; - - russianEncoding = Encoding.GetEncoding(28595); // Russian - if (GetCount(russianEncoding.GetString(buffer), "что", "быть", "весь", "этот", "один", "такой") > 5) // Russian - return russianEncoding; - - Encoding thaiEncoding = Encoding.GetEncoding(874); // Thai - if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล") + GetCount(thaiEncoding.GetString(buffer), "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5) - return thaiEncoding; - - Encoding arabicEncoding = Encoding.GetEncoding(28596); // Arabic - Encoding hewbrewEncoding = Encoding.GetEncoding(28598); // Hebrew - if (GetCount(arabicEncoding.GetString(buffer), "من", "هل", "لا", "فى", "لقد", "ما") > 5) - { - if (GetCount(hewbrewEncoding.GetString(buffer), "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10) - return hewbrewEncoding; - return arabicEncoding; - } - if (GetCount(hewbrewEncoding.GetString(buffer), AutoDetectWordsHebrew) > 5) - return hewbrewEncoding; - - return encoding; - } - catch - { - return Encoding.Default; - } - } + } public static string DictionaryFolder { @@ -993,514 +797,7 @@ namespace Nikse.SubtitleEdit.Core duration = Configuration.Settings.General.SubtitleMaximumDisplayMilliseconds; return duration; - } - - private static int GetCount(string text, params string[] words) - { - int count = 0; - for (int i = 0; i < words.Length; i++) - { - count += Regex.Matches(text, "\\b" + words[i] + "\\b", (RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture)).Count; - } - return count; - } - - private static int GetCountContains(string text, params string[] words) - { - int count = 0; - for (int i = 0; i < words.Length; i++) - { - var regEx = new Regex(words[i]); - count += regEx.Matches(text).Count; - } - return count; - } - - public static string AutoDetectGoogleLanguage(Encoding encoding) - { - switch (encoding.CodePage) - { - case 860: - return "pt"; // Portuguese - case 28599: - case 1254: - return "tr"; // Turkish - case 28598: - case 1255: - return "he"; // Hebrew - case 28596: - case 1256: - return "ar"; // Arabic - case 1258: - return "vi"; // Vietnamese - case 949: - case 1361: - case 20949: - case 51949: - case 50225: - return "ko"; // Korean - case 1253: - case 28597: - return "el"; // Greek - case 50220: - case 50221: - case 50222: - case 51932: - case 20932: - case 10001: - return "ja"; // Japanese - case 20000: - case 20002: - case 20936: - case 950: - case 52936: - case 54936: - case 51936: - return "zh"; // Chinese - default: - return null; - } - } - - public static readonly string[] AutoDetectWordsEnglish = { "we", "are", "and", "you", "your", "what" }; - public static readonly string[] AutoDetectWordsDanish = { "vi", "han", "og", "jeg", "var", "men", "gider", "bliver", "virkelig", "kommer", "tilbage", "Hej" }; - public static readonly string[] AutoDetectWordsNorwegian = { "vi", "er", "og", "jeg", "var", "men" }; - public static readonly string[] AutoDetectWordsSwedish = { "vi", "är", "och", "Jag", "inte", "för" }; - public static readonly string[] AutoDetectWordsSpanish = { "el", "bien", "Vamos", "Hola", "casa", "con" }; - public static readonly string[] AutoDetectWordsFrench = { "un", "vous", "avec", "pas", "ce", "une" }; - public static readonly string[] AutoDetectWordsGerman = { "und", "auch", "sich", "bin", "hast", "möchte" }; - public static readonly string[] AutoDetectWordsDutch = { "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n" }; - public static readonly string[] AutoDetectWordsPolish = { "Czy", "ale", "ty", "siê", "jest", "mnie" }; - public static readonly string[] AutoDetectWordsItalian = { "Cosa", "sono", "Grazie", "Buongiorno", "bene", "questo", "ragazzi", "propriamente", "numero", "hanno", "giorno", "faccio", "davvero", "negativo", "essere", "vuole", "sensitivo", "venire" }; - public static readonly string[] AutoDetectWordsPortuguese = { "[Nn]ão", "Então", "Estás", "isso", "com" }; - public static readonly string[] AutoDetectWordsGreek = { "μου", "είναι", "Είναι", "αυτό", "Τόμπυ", "καλά", "Ενταξει", "Ενταξει", "πρεπει", "Λοιπον", "τιποτα", "ξερεις" }; - public static readonly string[] AutoDetectWordsRussian = { "Это", "не", "ты", "что", "это", "Мы", "Да", "Нет", "Ты", "нет", "Он", "его", "тебя", "как", "Не", "вы", "меня", "Но", "то", "всё", "бы", "мы", "мне", "вас", "знаю", "ещё", "за", "нас", "чтобы", "был" }; - public static readonly string[] AutoDetectWordsBulgarian = { "Какво", "тук", "може", "Как", "Ваше", "какво" }; - public static readonly string[] AutoDetectWordsRomanian = { "Какво", "тук", "може", "Как", "Ваше", "какво" }; - public static readonly string[] AutoDetectWordsArabic = { "Какво", "тук", "може", "Как", "Ваше", "какво" }; - public static readonly string[] AutoDetectWordsHebrew = { "אתה", "אולי", "הוא", "בסדר", "יודע", "טוב" }; - public static readonly string[] AutoDetectWordsVietnamese = { "không", "tôi", "anh", "đó", "Tôi", "ông" }; - public static readonly string[] AutoDetectWordsHungarian = { "hogy", "lesz", "tudom", "vagy", "mondtam", "még" }; - public static readonly string[] AutoDetectWordsTurkish = { "için", "Tamam", "Hayır", "benim", "daha", "deðil", "önce", "lazým", "benim", "çalýþýyor", "burada", "efendim" }; - public static readonly string[] AutoDetectWordsCroatianAndSerbian = { "sam", "ali", "nije", "samo", "ovo", "kako", "dobro", "sve", "tako", "će", "mogu", "ću", "zašto", "nešto", "za" }; - public static readonly string[] AutoDetectWordsCroatian = { "što", "ovdje", "gdje", "kamo", "tko", "prije", "uvijek", "vrijeme", "vidjeti", "netko", - "vidio", "nitko", "bok", "lijepo", "oprosti", "htio", "mjesto", "oprostite", "čovjek", "dolje", - "čovječe", "dvije", "dijete", "dio", "poslije", "događa", "vjerovati", "vjerojatno", "vjerujem", "točno", - "razumijem", "vidjela", "cijeli", "svijet", "obitelj", "volio", "sretan", "dovraga", "svijetu", "htjela", - "vidjeli", "negdje", "želio", "ponovno", "djevojka", "umrijeti", "čovjeka", "mjesta", "djeca", "osjećam", - "uopće", "djecu", "naprijed", "obitelji", "doista", "mjestu", "lijepa", "također", "riječ", "tijelo" }; - public static readonly string[] AutoDetectWordsSerbian = { "šta", "ovde", "gde", "ko", "pre", "uvek", "vreme", "videti", "neko", - "video", "niko", "ćao", "lepo", "izvini", "hteo", "mesto", "izvinite", "čovek", "dole", - "čoveče", "dve", "dete", "deo", "posle", "dešava", "verovati", "verovatno", "verujem", "tačno", - "razumem", "videla", "ceo", "svet", "porodica", "voleo", "srećan", "dođavola", "svetu", "htela", - "videli", "negde", "želeo", "ponovo", "devojka", "umreti", "čoveka", "mesta", "deca", "osećam", - "uopšte", "decu", "napred", "porodicu", "zaista", "mestu", "lepa", "takođe", "reč", "telo" }; - - public static string AutoDetectGoogleLanguage(string text, int bestCount) - { - int count = GetCount(text, AutoDetectWordsEnglish); - if (count > bestCount) - return "en"; - - count = GetCount(text, AutoDetectWordsDanish); - if (count > bestCount) - { - int norwegianCount = GetCount(text, "ut", "deg", "meg", "merkelig", "mye", "spørre"); - int dutchCount = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n"); - if (norwegianCount < 2 && dutchCount < count) - return "da"; - } - - count = GetCount(text, AutoDetectWordsNorwegian); - if (count > bestCount) - { - int danishCount = GetCount(text, "siger", "dig", "mig", "mærkelig", "tilbage", "spørge"); - int dutchCount = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n"); - if (danishCount < 2 && dutchCount < count) - return "no"; - } - - count = GetCount(text, AutoDetectWordsSwedish); - if (count > bestCount) - return "sv"; - - count = GetCount(text, AutoDetectWordsSpanish); - if (count > bestCount) - { - int frenchCount = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words - int portugueseCount = GetCount(text, "[NnCc]ão", "Então", "h?ouve", "pessoal", "rapariga", "tivesse", "fizeste", - "jantar", "conheço", "atenção", "foste", "milhões", "devias", "ganhar", "raios"); // not spanish words - if (frenchCount < 2 && portugueseCount < 2) - return "es"; - } - - count = GetCount(text, AutoDetectWordsItalian); - if (count > bestCount) - { - int frenchCount = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not italian words - if (frenchCount < 2) - return "it"; - } - - count = GetCount(text, AutoDetectWordsFrench); - if (count > bestCount) - { - int romanianCount = GetCount(text, "[Ss]înt", "aici", "domnule", "pentru", "Vreau"); - if (romanianCount < 5) - return "fr"; - } - - count = GetCount(text, AutoDetectWordsPortuguese); - if (count > bestCount) - return "pt"; // Portuguese - - count = GetCount(text, AutoDetectWordsGerman); - if (count > bestCount) - return "de"; - - count = GetCount(text, AutoDetectWordsDutch); - if (count > bestCount) - return "nl"; - - count = GetCount(text, AutoDetectWordsPolish); - if (count > bestCount) - return "pl"; - - count = GetCount(text, AutoDetectWordsGreek); - if (count > bestCount) - return "el"; // Greek - - count = GetCount(text, AutoDetectWordsRussian); - if (count > bestCount) - return "ru"; // Russian - - count = GetCount(text, AutoDetectWordsBulgarian); - if (count > bestCount) - return "bg"; // Bulgarian - - count = GetCount(text, AutoDetectWordsArabic); - if (count > bestCount) - { - if (GetCount(text, "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10) - return "he"; - - int romanianCount = GetCount(text, "sînt", "aici", "Sînt", "domnule", "pentru", "Vreau", "trãiascã", "niciodatã", "înseamnã", - "vorbesti", "oamenii", "Asteaptã", "fãcut", "Fãrã", "spune", "decât", "pentru", "vreau"); - if (romanianCount > count) - return "ro"; // Romanian - - romanianCount = GetCount(text, "daca", "pentru", "acum", "soare", "trebuie", "Trebuie", "nevoie", "decat", "echilibrul", - "vorbesti", "oamenii", "zeului", "vrea", "atunci", "Poate", "Acum", "memoria", "soarele"); - if (romanianCount > count) - return "ro"; // Romanian - - return "ar"; // Arabic - } - - count = GetCount(text, AutoDetectWordsHebrew); - if (count > bestCount) - return "he"; // Hebrew - - count = GetCount(text, AutoDetectWordsCroatianAndSerbian); - if (count > bestCount) - { - int croatianCount = GetCount(text, AutoDetectWordsCroatian); - int serbianCount = GetCount(text, AutoDetectWordsSerbian); - if (croatianCount > serbianCount) - return "hr"; // Croatian - - return "sr"; // Serbian - } - - count = GetCount(text, AutoDetectWordsVietnamese); - if (count > bestCount) - return "vi"; // Vietnamese - - count = GetCount(text, AutoDetectWordsHungarian); - if (count > bestCount) - return "hu"; // Hungarian - - count = GetCount(text, AutoDetectWordsTurkish); - if (count > bestCount) - return "tr"; // Turkish - - count = GetCount(text, "yang", "tahu", "bisa", "akan", "tahun", "tapi", "dengan", "untuk", "rumah", "dalam", "sudah", "bertemu"); - if (count > bestCount) - return "id"; // Indonesian - - count = GetCount(text, "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล", "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์"); - if (count > 10 || count > bestCount) - return "th"; // Thai - - count = GetCount(text, "그리고", "아니야", "하지만", "말이야", "그들은", "우리가"); - if (count > 10 || count > bestCount) - return "ko"; // Korean - - count = GetCount(text, "että", "kuin", "minä", "mitään", "Mutta", "siitä", "täällä", "poika", "Kiitos", "enää", "vielä", "tässä"); - if (count > bestCount) - return "fi"; // Finnish - - count = GetCount(text, "sînt", "aici", "Sînt", "domnule", "pentru", "Vreau", "trãiascã", "niciodatã", "înseamnã", "vorbesti", "oamenii", - "Asteaptã", "fãcut", "Fãrã", "spune", "decât", "pentru", "vreau"); - if (count > bestCount) - return "ro"; // Romanian - - count = GetCount(text, "daca", "pentru", "acum", "soare", "trebuie", "Trebuie", "nevoie", "decat", "echilibrul", "vorbesti", "oamenii", - "zeului", "vrea", "atunci", "Poate", "Acum", "memoria", "soarele"); - if (count > bestCount) - return "ro"; // Romanian - - count = GetCountContains(text, "シ", "ュ", "シン", "シ", "ン", "ユ"); - count += GetCountContains(text, "イ", "ン", "チ", "ェ", "ク", "ハ"); - count += GetCountContains(text, "シ", "ュ", "う", "シ", "ン", "サ"); - count += GetCountContains(text, "シ", "ュ", "シ", "ン", "だ", "う"); - if (count > bestCount * 2) - return "ja"; // Japanese - not tested... - - count = GetCountContains(text, "是", "是早", "吧", "的", "爱", "上好"); - count += GetCountContains(text, "的", "啊", "好", "好", "亲", "的"); - count += GetCountContains(text, "谢", "走", "吧", "晚", "上", "好"); - count += GetCountContains(text, "来", "卡", "拉", "吐", "滚", "他"); - if (count > bestCount * 2) - return "zh"; // Chinese (simplified) - not tested... - - return string.Empty; - } - - public static string AutoDetectGoogleLanguage(Subtitle subtitle) - { - string languageId = AutoDetectGoogleLanguageOrNull(subtitle); - if (languageId == null) - languageId = "en"; - - return languageId; - } - - public static string AutoDetectGoogleLanguageOrNull(Subtitle subtitle) - { - var sb = new StringBuilder(); - foreach (Paragraph p in subtitle.Paragraphs) - sb.AppendLine(p.Text); - - string languageId = AutoDetectGoogleLanguage(sb.ToString(), subtitle.Paragraphs.Count / 14); - if (string.IsNullOrEmpty(languageId)) - languageId = null; - - return languageId; - } - - public static string AutoDetectLanguageName(string languageName, Subtitle subtitle) - { - if (string.IsNullOrEmpty(languageName)) - languageName = "en_US"; - int bestCount = subtitle.Paragraphs.Count / 14; - - var sb = new StringBuilder(); - foreach (Paragraph p in subtitle.Paragraphs) - sb.AppendLine(p.Text); - string text = sb.ToString(); - - List dictionaryNames = GetDictionaryLanguages(); - - bool containsEnGb = false; - bool containsEnUs = false; - bool containsHrHr = false; - bool containsSrLatn = false; - foreach (string name in dictionaryNames) - { - if (name.Contains("[en_GB]")) - containsEnGb = true; - if (name.Contains("[en_US]")) - containsEnUs = true; - if (name.Contains("[hr_HR]")) - containsHrHr = true; - if (name.Contains("[sr-Latn]")) - containsSrLatn = true; - } - - foreach (string name in dictionaryNames) - { - string shortName = string.Empty; - int start = name.IndexOf('['); - int end = name.IndexOf(']'); - if (start > 0 && end > start) - { - start++; - shortName = name.Substring(start, end - start); - } - - int count; - switch (shortName) - { - case "da_DK": - count = GetCount(text, "vi", "hun", "og", "jeg", "var", "men", "bliver", "meget", "spørger", "Hej", "utrolig", "dejligt"); - if (count > bestCount) - { - int norweigianCount = GetCount(text, "ut", "deg", "meg", "merkelig", "mye", "spørre"); - if (norweigianCount < 2) - languageName = shortName; - } - break; - case "nb_NO": - count = GetCount(text, AutoDetectWordsNorwegian); - if (count > bestCount) - { - int danishCount = GetCount(text, "siger", "dig", "mig", "mærkelig", "tilbage", "spørge"); - int dutchCount = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n"); - if (danishCount < 2 && dutchCount < count) - languageName = shortName; - } - break; - case "en_US": - count = GetCount(text, AutoDetectWordsEnglish); - if (count > bestCount) - { - languageName = shortName; - if (containsEnGb) - { - int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor"); - int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour"); - if (gbCount > usCount) - languageName = "en_GB"; - } - } - break; - case "en_GB": - count = GetCount(text, "we", "are", "and", "you", "your", "what"); - if (count > bestCount) - { - languageName = shortName; - if (containsEnUs) - { - int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor"); - int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour"); - if (gbCount < usCount) - languageName = "en_US"; - } - } - break; - case "sv_SE": - count = GetCount(text, "vi", "är", "och", "Jag", "inte", "för"); - if (count > bestCount) - languageName = shortName; - break; - case "es_ES": - count = GetCount(text, AutoDetectWordsSpanish); - if (count > bestCount) - { - int frenchWords = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words - if (frenchWords < 2) - languageName = shortName; - } - break; - case "fr_FR": - count = GetCount(text, AutoDetectWordsFrench); - if (count > bestCount) - { - int spanishWords = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como"); // not french words - int italianWords = GetCount(text, AutoDetectWordsItalian); // not italian words - if (spanishWords < 2 && italianWords < 2) - languageName = shortName; - } - break; - case "it_IT": - count = GetCount(text, AutoDetectWordsItalian); - if (count > bestCount) - { - int frenchWords = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words - int spanishWords = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como"); // not french words - if (frenchWords < 2 && spanishWords < 2) - languageName = shortName; - } - break; - case "de_DE": - count = GetCount(text, "und", "auch", "sich", "bin", "hast", "möchte"); - if (count > bestCount) - languageName = shortName; - break; - case "nl_NL": - count = GetCount(text, "van", "een", "[Hh]et", "m(ij|ij)", "z(ij|ij)n"); - if (count > bestCount) - languageName = shortName; - break; - case "pl_PL": - count = GetCount(text, "Czy", "ale", "ty", "siê", "jest", "mnie"); - if (count > bestCount) - languageName = shortName; - break; - case "el_GR": - count = GetCount(text, AutoDetectWordsGreek); - if (count > bestCount) - languageName = shortName; - break; - case "ru_RU": - count = GetCount(text, AutoDetectWordsRussian); - if (count > bestCount) - languageName = shortName; - break; - case "ro_RO": - count = GetCount(text, "sînt", "aici", "Sînt", "domnule", "pentru", "Vreau", "trãiascã", "niciodatã", "înseamnã", "vorbesti", "oamenii", "Asteaptã", - "fãcut", "Fãrã", "spune", "decât", "pentru", "vreau"); - if (count > bestCount) - { - languageName = shortName; - } - else - { - count = GetCount(text, "daca", "pentru", "acum", "soare", "trebuie", "Trebuie", "nevoie", "decat", "echilibrul", "vorbesti", "oamenii", "zeului", - "vrea", "atunci", "Poate", "Acum", "memoria", "soarele"); - - if (count > bestCount) - languageName = shortName; - } - break; - case "hr_HR": // Croatian - count = GetCount(text, AutoDetectWordsCroatianAndSerbian); - if (count > bestCount) - { - languageName = shortName; - if (containsSrLatn) - { - int croatianCount = GetCount(text, AutoDetectWordsCroatian); - int serbianCount = GetCount(text, AutoDetectWordsSerbian); - if (serbianCount > croatianCount) - languageName = "sr-Latn"; - } - } - break; - case "sr-Latn": // Serbian (Latin) - count = GetCount(text, AutoDetectWordsCroatianAndSerbian); - if (count > bestCount) - { - languageName = shortName; - if (containsHrHr) - { - int croatianCount = GetCount(text, AutoDetectWordsCroatian); - int serbianCount = GetCount(text, AutoDetectWordsSerbian); - if (serbianCount < croatianCount) - languageName = "hr_HR"; - } - } - break; - case "pt_PT": // Portuguese - count = GetCount(text, AutoDetectWordsPortuguese); - if (count > bestCount) - languageName = shortName; - break; - case "pt_BR": // Portuguese (Brasil) - count = GetCount(text, AutoDetectWordsPortuguese); - if (count > bestCount) - languageName = shortName; - break; - case "hu_HU": // Hungarian - count = GetCount(text, AutoDetectWordsHungarian); - if (count > bestCount) - languageName = shortName; - break; - } - } - return languageName; - } + } public static string ColorToHex(Color c) { diff --git a/src/Forms/AddToNames.cs b/src/Forms/AddToNames.cs index 11c7cc877..45611653d 100644 --- a/src/Forms/AddToNames.cs +++ b/src/Forms/AddToNames.cs @@ -43,7 +43,7 @@ namespace Nikse.SubtitleEdit.Forms } comboBoxDictionaries.Items.Clear(); - string languageName = Utilities.AutoDetectLanguageName(Configuration.Settings.General.SpellCheckLanguage, _subtitle); + string languageName = LanguageAutoDetect.AutoDetectLanguageName(Configuration.Settings.General.SpellCheckLanguage, _subtitle); foreach (string name in Utilities.GetDictionaryLanguages()) { comboBoxDictionaries.Items.Add(name); @@ -109,7 +109,7 @@ namespace Nikse.SubtitleEdit.Forms } } - languageName = Utilities.AutoDetectLanguageName(languageName, _subtitle); + languageName = LanguageAutoDetect.AutoDetectLanguageName(languageName, _subtitle); if (comboBoxDictionaries.Items.Count > 0) { string name = comboBoxDictionaries.SelectedItem.ToString(); diff --git a/src/Forms/AutoBreakUnbreakLines.cs b/src/Forms/AutoBreakUnbreakLines.cs index b5d265a26..80a225174 100644 --- a/src/Forms/AutoBreakUnbreakLines.cs +++ b/src/Forms/AutoBreakUnbreakLines.cs @@ -105,7 +105,7 @@ namespace Nikse.SubtitleEdit.Forms var sub = new Subtitle(); foreach (Paragraph p in _paragraphs) sub.Paragraphs.Add(p); - var language = Utilities.AutoDetectGoogleLanguage(sub); + var language = LanguageAutoDetect.AutoDetectGoogleLanguage(sub); listViewFixes.BeginUpdate(); listViewFixes.Items.Clear(); diff --git a/src/Forms/BatchConvert.cs b/src/Forms/BatchConvert.cs index 16f652b3a..157a8e4c8 100644 --- a/src/Forms/BatchConvert.cs +++ b/src/Forms/BatchConvert.cs @@ -773,7 +773,7 @@ namespace Nikse.SubtitleEdit.Forms sub.RemoveEmptyLines(); if (checkBoxFixCasing.Checked) { - _changeCasing.FixCasing(sub, Utilities.AutoDetectGoogleLanguage(sub)); + _changeCasing.FixCasing(sub, LanguageAutoDetect.AutoDetectGoogleLanguage(sub)); _changeCasingNames.Initialize(sub); _changeCasingNames.FixCasing(); } diff --git a/src/Forms/ChangeCasingNames.cs b/src/Forms/ChangeCasingNames.cs index 5afe389fc..8706a1108 100644 --- a/src/Forms/ChangeCasingNames.cs +++ b/src/Forms/ChangeCasingNames.cs @@ -110,7 +110,7 @@ namespace Nikse.SubtitleEdit.Forms private void FindAllNames() { - string language = Utilities.AutoDetectLanguageName("en_US", _subtitle); + string language = LanguageAutoDetect.AutoDetectLanguageName("en_US", _subtitle); if (string.IsNullOrEmpty(language)) language = "en_US"; diff --git a/src/Forms/ChooseEncoding.cs b/src/Forms/ChooseEncoding.cs index f8462179b..fa78f4322 100644 --- a/src/Forms/ChooseEncoding.cs +++ b/src/Forms/ChooseEncoding.cs @@ -54,7 +54,7 @@ namespace Nikse.SubtitleEdit.Forms _fileBuffer = new byte[0]; } - Encoding encoding = Utilities.DetectAnsiEncoding(_fileBuffer); + Encoding encoding = LanguageAutoDetect.DetectAnsiEncoding(_fileBuffer); foreach (EncodingInfo ei in Encoding.GetEncodings()) { var item = new ListViewItem(new[] { ei.CodePage.ToString(), ei.Name, ei.DisplayName }); diff --git a/src/Forms/Compare.cs b/src/Forms/Compare.cs index 8ad9bed79..ca073d1b6 100644 --- a/src/Forms/Compare.cs +++ b/src/Forms/Compare.cs @@ -68,7 +68,7 @@ namespace Nikse.SubtitleEdit.Forms openFileDialog1.Filter = Utilities.GetOpenDialogFilter(); subtitleListView1.SelectIndexAndEnsureVisible(0); - _language1 = Utilities.AutoDetectGoogleLanguage(_subtitle1); + _language1 = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle1); } public void Initialize(Subtitle subtitle1, string subtitleFileName1, Subtitle subtitle2, string subtitleFileName2) @@ -81,7 +81,7 @@ namespace Nikse.SubtitleEdit.Forms _subtitle2 = subtitle2; labelSubtitle2.Text = subtitleFileName2; - _language1 = Utilities.AutoDetectGoogleLanguage(_subtitle1); + _language1 = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle1); CompareSubtitles(); if (string.IsNullOrEmpty(subtitleFileName1)) @@ -167,7 +167,7 @@ namespace Nikse.SubtitleEdit.Forms subtitleListView1.SelectIndexAndEnsureVisible(0); subtitleListView2.SelectIndexAndEnsureVisible(0); labelSubtitle1.Text = openFileDialog1.FileName; - _language1 = Utilities.AutoDetectGoogleLanguage(_subtitle1); + _language1 = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle1); if (_subtitle1.Paragraphs.Count > 0) CompareSubtitles(); } @@ -908,7 +908,7 @@ namespace Nikse.SubtitleEdit.Forms subtitleListView1.SelectIndexAndEnsureVisible(0); subtitleListView2.SelectIndexAndEnsureVisible(0); labelSubtitle1.Text = filePath; - _language1 = Utilities.AutoDetectGoogleLanguage(_subtitle1); + _language1 = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle1); if (_subtitle1.Paragraphs.Count > 0) CompareSubtitles(); } diff --git a/src/Forms/ExportPngXml.cs b/src/Forms/ExportPngXml.cs index bf3ff1092..29c32a16c 100644 --- a/src/Forms/ExportPngXml.cs +++ b/src/Forms/ExportPngXml.cs @@ -2854,7 +2854,7 @@ $DROP=[DROPVALUE]" + Environment.NewLine + Environment.NewLine + labelLanguage.Visible = true; comboBoxLanguage.Visible = true; comboBoxLanguage.Items.Clear(); - string languageCode = Utilities.AutoDetectGoogleLanguageOrNull(subtitle); + string languageCode = LanguageAutoDetect.AutoDetectGoogleLanguageOrNull(subtitle); if (languageCode == null) languageCode = Configuration.Settings.Tools.ExportVobSubLanguage; for (int i = 0; i < IfoParser.ArrayOfLanguage.Count; i++) diff --git a/src/Forms/FixCommonErrors.cs b/src/Forms/FixCommonErrors.cs index 04e45bc75..4a5aaa6fd 100644 --- a/src/Forms/FixCommonErrors.cs +++ b/src/Forms/FixCommonErrors.cs @@ -222,9 +222,9 @@ namespace Nikse.SubtitleEdit.Forms public void Initialize(Subtitle subtitle, SubtitleFormat format, Encoding encoding) { - _autoDetectGoogleLanguage = Utilities.AutoDetectGoogleLanguage(encoding); // Guess language via encoding + _autoDetectGoogleLanguage = LanguageAutoDetect.AutoDetectGoogleLanguage(encoding); // Guess language via encoding if (string.IsNullOrEmpty(_autoDetectGoogleLanguage)) - _autoDetectGoogleLanguage = Utilities.AutoDetectGoogleLanguage(subtitle); // Guess language based on subtitle contents + _autoDetectGoogleLanguage = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle); // Guess language based on subtitle contents if (_autoDetectGoogleLanguage.Equals("zh", StringComparison.OrdinalIgnoreCase)) _autoDetectGoogleLanguage = "zh-CHS"; // Note that "zh-CHS" (Simplified Chinese) and "zh-CHT" (Traditional Chinese) are neutral cultures CultureInfo ci = CultureInfo.GetCultureInfo(_autoDetectGoogleLanguage); @@ -515,7 +515,7 @@ namespace Nikse.SubtitleEdit.Forms if (_namesEtcList == null) { _namesEtcList = new List(); - string languageTwoLetterCode = Utilities.AutoDetectGoogleLanguage(Subtitle); + string languageTwoLetterCode = LanguageAutoDetect.AutoDetectGoogleLanguage(Subtitle); // Will contains both one word names and multi names var namesList = new NamesList(Configuration.DictionariesFolder, languageTwoLetterCode, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl); diff --git a/src/Forms/GoogleTranslate.cs b/src/Forms/GoogleTranslate.cs index 67c0e1190..0a4d44d0e 100644 --- a/src/Forms/GoogleTranslate.cs +++ b/src/Forms/GoogleTranslate.cs @@ -101,9 +101,9 @@ namespace Nikse.SubtitleEdit.Forms _subtitle = subtitle; _translatedSubtitle = new Subtitle(subtitle); - string defaultFromLanguage = Utilities.AutoDetectGoogleLanguage(encoding); // Guess language via encoding + string defaultFromLanguage = LanguageAutoDetect.AutoDetectGoogleLanguage(encoding); // Guess language via encoding if (string.IsNullOrEmpty(defaultFromLanguage)) - defaultFromLanguage = Utilities.AutoDetectGoogleLanguage(subtitle); // Guess language based on subtitle contents + defaultFromLanguage = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle); // Guess language based on subtitle contents FillComboWithLanguages(comboBoxFrom); int i = 0; diff --git a/src/Forms/ImportSceneChanges.cs b/src/Forms/ImportSceneChanges.cs index 67f735e2d..ce4b81fea 100644 --- a/src/Forms/ImportSceneChanges.cs +++ b/src/Forms/ImportSceneChanges.cs @@ -46,7 +46,7 @@ namespace Nikse.SubtitleEdit.Forms { try { - Encoding encoding = Utilities.GetEncodingFromFile(fileName); + Encoding encoding = LanguageAutoDetect.GetEncodingFromFile(fileName); string s = File.ReadAllText(fileName, encoding).Trim(); if (s.Contains('.')) radioButtonSeconds.Checked = true; diff --git a/src/Forms/ImportText.cs b/src/Forms/ImportText.cs index 10f61e6d0..fba20e585 100644 --- a/src/Forms/ImportText.cs +++ b/src/Forms/ImportText.cs @@ -607,7 +607,7 @@ namespace Nikse.SubtitleEdit.Forms { try { - Encoding encoding = Utilities.GetEncodingFromFile(fileName); + Encoding encoding = LanguageAutoDetect.GetEncodingFromFile(fileName); textBoxText.Text = File.ReadAllText(fileName, encoding); SetVideoFileName(fileName); } diff --git a/src/Forms/ImportUnknownFormat.cs b/src/Forms/ImportUnknownFormat.cs index d481a85c8..dbf4934fe 100644 --- a/src/Forms/ImportUnknownFormat.cs +++ b/src/Forms/ImportUnknownFormat.cs @@ -66,7 +66,7 @@ namespace Nikse.SubtitleEdit.Forms try { SubtitleListview1.Items.Clear(); - Encoding encoding = Utilities.GetEncodingFromFile(fileName); + Encoding encoding = LanguageAutoDetect.GetEncodingFromFile(fileName); textBoxText.Text = File.ReadAllText(fileName, encoding); // check for RTF file diff --git a/src/Forms/Main.cs b/src/Forms/Main.cs index 2327190b1..d4d3b1d7a 100644 --- a/src/Forms/Main.cs +++ b/src/Forms/Main.cs @@ -1934,7 +1934,7 @@ namespace Nikse.SubtitleEdit.Forms if (format == null && ext == ".wsb") { var wsb = new Wsb(); - var list = new List(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName))); + var list = new List(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName))); if (wsb.IsMine(list, fileName)) { wsb.LoadSubtitle(_subtitle, list, fileName); @@ -2102,7 +2102,7 @@ namespace Nikse.SubtitleEdit.Forms try { var bdnXml = new BdnXml(); - var list = new List(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName))); + var list = new List(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName))); if (bdnXml.IsMine(list, fileName)) { if (ContinueNewOrExit()) @@ -2123,7 +2123,7 @@ namespace Nikse.SubtitleEdit.Forms try { var fcpImage = new FinalCutProImage(); - var list = new List(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName))); + var list = new List(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName))); if (fcpImage.IsMine(list, fileName)) { if (ContinueNewOrExit()) @@ -2204,7 +2204,7 @@ namespace Nikse.SubtitleEdit.Forms try { var dost = new Dost(); - var list = new List(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName))); + var list = new List(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName))); if (dost.IsMine(list, fileName)) { if (ContinueNewOrExit()) @@ -2223,7 +2223,7 @@ namespace Nikse.SubtitleEdit.Forms try { var son = new Son(); - var list = new List(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName))); + var list = new List(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName))); if (son.IsMine(list, fileName)) { if (ContinueNewOrExit()) @@ -2264,7 +2264,7 @@ namespace Nikse.SubtitleEdit.Forms try { var satBoxPng = new SatBoxPng(); - var list = new List(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName))); + var list = new List(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName))); if (satBoxPng.IsMine(list, fileName)) { var subtitle = new Subtitle(); @@ -2285,7 +2285,7 @@ namespace Nikse.SubtitleEdit.Forms try { var sst = new SonicScenaristBitmaps(); - var list = new List(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName))); + var list = new List(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName))); if (sst.IsMine(list, fileName)) { if (ContinueNewOrExit()) @@ -2304,7 +2304,7 @@ namespace Nikse.SubtitleEdit.Forms try { var htmlSamiArray = new HtmlSamiArray(); - var list = new List(File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName))); + var list = new List(File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName))); if (htmlSamiArray.IsMine(list, fileName)) { htmlSamiArray.LoadSubtitle(_subtitle, list, fileName); @@ -2429,7 +2429,7 @@ namespace Nikse.SubtitleEdit.Forms if (ext == ".xml" || ext == ".dfxp") { var sb = new StringBuilder(); - foreach (var line in File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName))) + foreach (var line in File.ReadAllLines(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName))) sb.AppendLine(line); var xmlAsString = sb.ToString().Trim(); @@ -2453,7 +2453,7 @@ namespace Nikse.SubtitleEdit.Forms // Try to use a generic subtitle format parser (guessing subtitle format) try { - var enc = Utilities.GetEncodingFromFile(fileName); + var enc = LanguageAutoDetect.GetEncodingFromFile(fileName); var s = File.ReadAllText(fileName, enc); // check for RTF file @@ -5119,7 +5119,7 @@ namespace Nikse.SubtitleEdit.Forms return; } - bool isSwedish = Utilities.AutoDetectGoogleLanguage(_subtitle) == "sv"; + bool isSwedish = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle) == "sv"; string promptText = _language.TranslateSwedishToDanish; if (!isSwedish) promptText = _language.TranslateSwedishToDanishWarning; @@ -5424,7 +5424,7 @@ namespace Nikse.SubtitleEdit.Forms int totalLinesChanged = 0; try { - wordSpellChecker = new WordSpellChecker(this, Utilities.AutoDetectGoogleLanguage(_subtitle)); + wordSpellChecker = new WordSpellChecker(this, LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle)); wordSpellChecker.NewDocument(); Application.DoEvents(); } @@ -6694,10 +6694,10 @@ namespace Nikse.SubtitleEdit.Forms private void ButtonAutoBreakClick(object sender, EventArgs e) { - string language = Utilities.AutoDetectGoogleLanguage(_subtitle); + string language = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle); string languageOriginal = string.Empty; if (_subtitleAlternate != null) - languageOriginal = Utilities.AutoDetectGoogleLanguage(_subtitleAlternate); + languageOriginal = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitleAlternate); if (SubtitleListview1.SelectedItems.Count > 1) { @@ -7201,7 +7201,7 @@ namespace Nikse.SubtitleEdit.Forms private void SplitSelectedParagraph(double? splitSeconds, int? textIndex) { - string language = Utilities.AutoDetectGoogleLanguage(_subtitle); + string language = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle); int? alternateTextIndex = null; if (textBoxListViewTextAlternate.Focused) @@ -7454,7 +7454,7 @@ namespace Nikse.SubtitleEdit.Forms var originalCurrent = Utilities.GetOriginalParagraph(firstSelectedIndex, currentParagraph, _subtitleAlternate.Paragraphs); if (originalCurrent != null) { - string languageOriginal = Utilities.AutoDetectGoogleLanguage(_subtitleAlternate); + string languageOriginal = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitleAlternate); originalCurrent.EndTime.TotalMilliseconds = currentParagraph.EndTime.TotalMilliseconds; var originalNew = new Paragraph(newParagraph); @@ -7639,7 +7639,7 @@ namespace Nikse.SubtitleEdit.Forms private void MergeBeforeToolStripMenuItemClick(object sender, EventArgs e) { - string language = Utilities.AutoDetectGoogleLanguage(_subtitle); + string language = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle); if (_subtitle.Paragraphs.Count > 0 && SubtitleListview1.SelectedItems.Count > 0) { int firstSelectedIndex = SubtitleListview1.SelectedItems[0].Index; @@ -7746,7 +7746,7 @@ namespace Nikse.SubtitleEdit.Forms string text = sb.ToString(); text = HtmlUtil.FixInvalidItalicTags(text); text = ChangeAllLinesItalictoSingleItalic(text); - text = Utilities.AutoBreakLine(text, Utilities.AutoDetectGoogleLanguage(_subtitle)); + text = Utilities.AutoBreakLine(text, LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle)); currentParagraph.Text = text; //display time @@ -7894,7 +7894,7 @@ namespace Nikse.SubtitleEdit.Forms if (old1.Contains(Environment.NewLine) || old2.Contains(Environment.NewLine) || old1.Length > Configuration.Settings.General.SubtitleLineMaximumLength || old2.Length > Configuration.Settings.General.SubtitleLineMaximumLength) - original.Text = Utilities.AutoBreakLine(original.Text, Utilities.AutoDetectGoogleLanguage(_subtitleAlternate)); + original.Text = Utilities.AutoBreakLine(original.Text, LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitleAlternate)); if (string.IsNullOrWhiteSpace(old1)) original.Text = original.Text.TrimStart(); @@ -7939,7 +7939,7 @@ namespace Nikse.SubtitleEdit.Forms if (old1.Contains(Environment.NewLine) || old2.Contains(Environment.NewLine) || old1.Length > Configuration.Settings.General.SubtitleLineMaximumLength || old2.Length > Configuration.Settings.General.SubtitleLineMaximumLength) - currentParagraph.Text = Utilities.AutoBreakLine(currentParagraph.Text, Utilities.AutoDetectGoogleLanguage(_subtitle)); + currentParagraph.Text = Utilities.AutoBreakLine(currentParagraph.Text, LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle)); if (string.IsNullOrWhiteSpace(old1)) currentParagraph.Text = currentParagraph.Text.TrimStart(); @@ -9803,7 +9803,7 @@ namespace Nikse.SubtitleEdit.Forms bool saveChangeCaseChanges = true; var casingNamesLinesChanged = 0; - changeCasing.FixCasing(selectedLines, Utilities.AutoDetectLanguageName(Configuration.Settings.General.SpellCheckLanguage, _subtitle)); + changeCasing.FixCasing(selectedLines, LanguageAutoDetect.AutoDetectLanguageName(Configuration.Settings.General.SpellCheckLanguage, _subtitle)); if (changeCasing.ChangeNamesToo) { using (var changeCasingNames = new ChangeCasingNames()) @@ -11572,10 +11572,10 @@ namespace Nikse.SubtitleEdit.Forms if (_subtitle.Paragraphs.Count > 0 && SubtitleListview1.SelectedItems.Count > 0) { MakeHistoryForUndo(_language.BeforeAutoBalanceSelectedLines); - string language = Utilities.AutoDetectGoogleLanguage(_subtitle); + string language = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle); string languageOriginal = string.Empty; if (_subtitleAlternate != null) - Utilities.AutoDetectGoogleLanguage(_subtitleAlternate); + LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitleAlternate); foreach (ListViewItem item in SubtitleListview1.SelectedItems) { var p = _subtitle.GetParagraphOrDefault(item.Index); @@ -11779,7 +11779,7 @@ namespace Nikse.SubtitleEdit.Forms if (autoBreakUnbreakLines.ShowDialog() == DialogResult.OK && autoBreakUnbreakLines.FixedText.Count > 0) { MakeHistoryForUndo(_language.BeforeAutoBalanceSelectedLines); - var language = Utilities.AutoDetectGoogleLanguage(_subtitle); + var language = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle); SubtitleListview1.BeginUpdate(); foreach (int index in SubtitleListview1.SelectedIndices) { @@ -13744,7 +13744,7 @@ namespace Nikse.SubtitleEdit.Forms private void buttonGoogleTranslateIt_Click(object sender, EventArgs e) { - string languageId = Utilities.AutoDetectGoogleLanguage(_subtitle); + string languageId = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle); System.Diagnostics.Process.Start("https://translate.google.com/#auto|" + languageId + "|" + Utilities.UrlEncode(textBoxSearchWord.Text)); } @@ -17087,7 +17087,7 @@ namespace Nikse.SubtitleEdit.Forms var p = _subtitle.GetParagraphOrDefault(firstSelectedIndex); if (p != null) { - string defaultFromLanguage = Utilities.AutoDetectGoogleLanguage(_subtitle); + string defaultFromLanguage = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle); string defaultToLanguage = defaultFromLanguage; if (_subtitleAlternate != null) { @@ -17095,7 +17095,7 @@ namespace Nikse.SubtitleEdit.Forms if (o != null) { p = o; - defaultFromLanguage = Utilities.AutoDetectGoogleLanguage(_subtitleAlternate); + defaultFromLanguage = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitleAlternate); } } Cursor = Cursors.WaitCursor; diff --git a/src/Forms/MergeShortLines.cs b/src/Forms/MergeShortLines.cs index 551f08290..19a60c355 100644 --- a/src/Forms/MergeShortLines.cs +++ b/src/Forms/MergeShortLines.cs @@ -110,7 +110,7 @@ namespace Nikse.SubtitleEdit.Forms if (clearFixes) listViewFixes.Items.Clear(); numberOfMerges = 0; - string language = Utilities.AutoDetectGoogleLanguage(subtitle); + string language = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle); var mergedSubtitle = new Subtitle(); bool lastMerged = false; Paragraph p = null; diff --git a/src/Forms/MergeTextWithSameTimeCodes.cs b/src/Forms/MergeTextWithSameTimeCodes.cs index 4c281d6ca..3817d80fa 100644 --- a/src/Forms/MergeTextWithSameTimeCodes.cs +++ b/src/Forms/MergeTextWithSameTimeCodes.cs @@ -52,7 +52,7 @@ namespace Nikse.SubtitleEdit.Forms NumberOfMerges = 0; _subtitle = subtitle; MergeTextWithSameTimeCodes_ResizeEnd(null, null); - _language = Utilities.AutoDetectGoogleLanguage(subtitle); + _language = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle); } private void previewTimer_Tick(object sender, EventArgs e) diff --git a/src/Forms/SpellCheck.cs b/src/Forms/SpellCheck.cs index 351354524..e7cab1111 100644 --- a/src/Forms/SpellCheck.cs +++ b/src/Forms/SpellCheck.cs @@ -967,7 +967,7 @@ namespace Nikse.SubtitleEdit.Forms } } if (autoDetect || string.IsNullOrEmpty(_languageName)) - _languageName = Utilities.AutoDetectLanguageName(_languageName, subtitle); + _languageName = LanguageAutoDetect.AutoDetectLanguageName(_languageName, subtitle); string dictionary = Utilities.DictionaryFolder + _languageName; LoadDictionaries(dictionaryFolder, dictionary); @@ -1128,7 +1128,7 @@ namespace Nikse.SubtitleEdit.Forms { gd.ShowDialog(this); } - FillSpellCheckDictionaries(Utilities.AutoDetectLanguageName(null, _subtitle)); + FillSpellCheckDictionaries(LanguageAutoDetect.AutoDetectLanguageName(null, _subtitle)); if (comboBoxDictionaries.Items.Count > 0 && comboBoxDictionaries.SelectedIndex == -1) comboBoxDictionaries.SelectedIndex = 0; ComboBoxDictionariesSelectedIndexChanged(null, null); diff --git a/src/Forms/SplitLongLines.cs b/src/Forms/SplitLongLines.cs index b2814646d..53ec804fc 100644 --- a/src/Forms/SplitLongLines.cs +++ b/src/Forms/SplitLongLines.cs @@ -145,7 +145,7 @@ namespace Nikse.SubtitleEdit.Forms if (clearFixes) listViewFixes.Items.Clear(); numberOfSplits = 0; - string language = Utilities.AutoDetectGoogleLanguage(subtitle); + string language = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle); var splittedSubtitle = new Subtitle(); string[] expectedPunctuations = { ". -", "! -", "? -" }; for (int i = 0; i < subtitle.Paragraphs.Count; i++) diff --git a/src/Test/Core/LanguageAutoDetectTest.cs b/src/Test/Core/LanguageAutoDetectTest.cs new file mode 100644 index 000000000..b32dc585b --- /dev/null +++ b/src/Test/Core/LanguageAutoDetectTest.cs @@ -0,0 +1,39 @@ +using System.IO; +using System.Text; +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Nikse.SubtitleEdit.Core; + +namespace Test.Core +{ + + [DeploymentItem("Files")] + [TestClass] + public class LanguageAutoDetectTest + { + + private static string GetLanguageCode(string fileName) + { + fileName = Path.Combine(Directory.GetCurrentDirectory(), fileName); + var sub = new Subtitle(); + Encoding encoding; + sub.LoadSubtitle(fileName, out encoding, null); + return LanguageAutoDetect.AutoDetectGoogleLanguage(sub); + } + + [TestMethod] + public void AutoDetectRussian() + { + var languageCode = GetLanguageCode("auto_detect_Russian.srt"); + Assert.AreEqual(languageCode, "ru"); + } + + [TestMethod] + public void AutoDetectDanish() + { + var languageCode = GetLanguageCode("auto_detect_Danish.srt"); + Assert.AreEqual(languageCode, "da"); + } + + + } +} diff --git a/src/Test/Files/auto_detect_Danish.srt b/src/Test/Files/auto_detect_Danish.srt new file mode 100644 index 000000000..744692c39 --- /dev/null +++ b/src/Test/Files/auto_detect_Danish.srt @@ -0,0 +1,1403 @@ +1 +00:00:00,000 --> 00:00:02,200 +Danske underteksster + +2 +00:00:02,201 --> 00:00:04,201 +FBI agent Dunham. + +3 +00:00:04,202 --> 00:00:07,200 +Peter Bishop, og hans +far Walter Bishop. + +4 +00:00:07,201 --> 00:00:13,500 +Deres arbejde, har ledt dem til Walters +tidligere kollega Dr. William Bell. + +5 +00:00:13,501 --> 00:00:17,300 +Vi har beviser på, at William Bell +står bag nogle biologiske angreb. + +6 +00:00:17,301 --> 00:00:20,900 +Deres søgen efter Bell, har +ført dem nær det umulige. + +7 +00:00:20,901 --> 00:00:25,200 +- Jeg skal nok finde ham. +- William Bell er ikke I denne verden. + +8 +00:00:25,201 --> 00:00:29,000 +Jeg har ventet længe på dette. + +9 +00:01:24,500 --> 00:01:27,800 +Undskyld mig? + +10 +00:01:27,801 --> 00:01:30,000 +Er du okay? + +11 +00:02:59,000 --> 00:03:02,800 +Et bilsammenstød, politiet +afhører vidner nu. + +12 +00:03:02,801 --> 00:03:07,200 +- Hvad skete der? +- Det er din sag. + +13 +00:03:07,201 --> 00:03:11,100 +- Hvem kørte bilen? + +14 +00:03:22,501 --> 00:03:28,500 +- Du skal ikke lave noget til mig, Walter. +- Der er noget du ikke ved om mig. + +15 +00:03:28,501 --> 00:03:38,199 +- At du ikke kan forlade et supermarked? +- Før jeg blev professor I biokemi, + +16 +00:03:38,200 --> 00:03:45,100 +var jeg souschef I +Bakersfield Food Lab. + +17 +00:03:45,101 --> 00:03:51,800 +- Min chef var Seymor Brodien. +- Burde jeg kende ham? + +18 +00:03:51,801 --> 00:03:55,600 +Han opfandt ho-ho'et. + +19 +00:03:55,601 --> 00:04:01,500 +Selvfølgelig. Walter, jeg vil ud herfra, +og du skal ikke bage mig en kage. + +20 +00:04:01,501 --> 00:04:08,800 +- Jeg laver dig en cremesovs. +- Jeg har aldrig kunne lide cremesovs. + +21 +00:04:08,801 --> 00:04:16,600 +Du kunne lide det som barn. Fredag er din +fødselsdag, og du skal have god mad. + +22 +00:04:16,601 --> 00:04:24,500 +- Er jeg ude herfra på min fødselsdag? +- Inviter agent Dunham. + +23 +00:04:24,501 --> 00:04:28,199 +Jeg vil se hendes ansigt, +når hun smager min budding. + +24 +00:04:28,200 --> 00:04:30,100 +Det er foruroligende. + +25 +00:04:30,101 --> 00:04:33,000 +Peter Bishop. + +26 +00:04:34,201 --> 00:04:36,000 +Hvad? + +27 +00:04:41,400 --> 00:04:43,900 +Mr. Bishop? Jeg er agent Jessup. + +28 +00:04:43,901 --> 00:04:47,000 +- Hej, det er min far Walter. +- Jeg har nogle spørgsmål. + +29 +00:04:47,001 --> 00:04:49,600 +- Hvordan har hun det? +- Vi ved det ikke. + +30 +00:04:49,601 --> 00:04:54,300 +- Hvordan kan I ikke vide det? +- Vil du forklare Jeres arbejde? + +31 +00:04:54,301 --> 00:04:57,899 +- Hvor er Charlie Francis? + +32 +00:04:57,900 --> 00:05:02,600 +- Han er I Quantico, jeg leder nu. +- Hvor er Olivia Dunham? + +33 +00:05:02,601 --> 00:05:08,400 +Vidner så en mand forlade bilen, men +ingen så nogle efterlade den anden bil. + +34 +00:05:08,401 --> 00:05:16,800 +Døren og selen var stadig låste, der var +ingen tegn på at nogle havde kørt bilen. + +35 +00:05:16,801 --> 00:05:18,649 +- Passer bremsesporene? + +36 +00:05:18,650 --> 00:05:24,000 +Fortæl hvad Jeres arbejde er? Dig, +Dunham og din far, jeres arbejde... + +37 +00:05:24,001 --> 00:05:30,300 +...Er hemmeligt. Jeg har +ikke tid til at forklare. + +38 +00:05:30,301 --> 00:05:36,500 +Jeg vil samarbejde, så snart du +finder en agent der kan hjælpe. + +39 +00:05:56,000 --> 00:06:05,999 +Fringe Sæson 2 Episode 2 +"A New Day In An Old Town" + +40 +00:06:06,000 --> 00:06:15,000 +Undertekster Af: + +41 +00:06:36,501 --> 00:06:43,000 +Amy Jessup, agent Broyles. +Vi skal snakke. + +42 +00:06:43,001 --> 00:06:48,900 +Det hele står I denne rapport. + +43 +00:06:48,901 --> 00:06:56,500 +En tilfældig borger kørte ind I en agent. +Derved er sagen lukket. + +44 +00:06:56,501 --> 00:07:00,300 +Skriv under her, tak. + +45 +00:07:00,301 --> 00:07:03,300 +- Men.. +- Skriv under, agent. + +46 +00:07:03,301 --> 00:07:06,500 +Det er en ordre. + +47 +00:07:25,600 --> 00:07:28,200 +Walter. + +48 +00:07:30,201 --> 00:07:34,900 +Hendes skader var for alvorlige. + +49 +00:07:34,901 --> 00:07:37,500 +Vi kan ikke genoplive +hendes hjerne. + +50 +00:07:37,501 --> 00:07:43,600 +Patienter med denne slags traumer, +genvinder aldrig bevidsthed. + +51 +00:07:43,601 --> 00:07:47,900 +Simpelt og absurd. + +52 +00:07:50,200 --> 00:07:58,500 +Liv og død, er kun relativt. +Det defineres af kulturen. + +53 +00:07:58,501 --> 00:08:04,599 +Det er uden tvivl en primitiv diagnose. +Agent Dunham er ikke død. + +54 +00:08:04,600 --> 00:08:07,300 +Hun er ikke død. + +55 +00:08:09,050 --> 00:08:11,300 +Walter, vent! + +56 +00:08:11,301 --> 00:08:15,600 +- I må ikke gå derind! +- Vær sød at gå. + +57 +00:08:44,800 --> 00:08:48,800 +Det må du undskylde. + +58 +00:09:19,301 --> 00:09:23,700 +Må jeg gøre dig selskab? + +59 +00:09:32,701 --> 00:09:35,800 +Du drikker vel ikke? + +60 +00:09:35,801 --> 00:09:38,500 +En dobbelt. + +61 +00:09:42,200 --> 00:09:47,500 +- Hvordan fandt du mig? +- Jeg arbejder for FBI. + +62 +00:09:54,100 --> 00:09:57,300 +Min far er I "La La Land". + +63 +00:09:57,301 --> 00:10:05,499 +Med en dejlig cocktail. Med Valium, +Haldol, Seconal og Lorazepam. + +64 +00:10:05,500 --> 00:10:09,201 +Astrid babysitter. + +65 +00:10:09,202 --> 00:10:16,200 +Jeg skal til Washington I morgen. +Afdelingens betydning revurderes. + +66 +00:10:16,201 --> 00:10:22,500 +Jeg har fået at vide, at vores +resultater er uacceptable. + +67 +00:10:23,600 --> 00:10:28,500 +Lukker De din afdeling? + +68 +00:10:37,300 --> 00:10:41,000 +Hvad lavede vi overhovedet? + +69 +00:10:41,001 --> 00:10:48,600 +Vi ventede på en makaber død, eller +på at universet faldt fra hinanden. + +70 +00:10:48,601 --> 00:10:54,800 +Vi var et rengøringshold, +som skulle ordne rodet. + +71 +00:10:57,700 --> 00:11:02,700 +- Vi kom alligevel altid for sent. +- Det er ikke sandt. + +72 +00:11:03,500 --> 00:11:07,300 +Vi svigtede Olivia. + +73 +00:11:15,701 --> 00:11:19,500 +For agent Dunham. + +74 +00:11:22,101 --> 00:11:24,500 +For agent Dunham. + +75 +00:11:38,900 --> 00:11:41,500 +"Hemmeligt materiale". + +76 +00:12:24,400 --> 00:12:27,600 +Hej, Rachel. + +77 +00:12:27,601 --> 00:12:31,300 +- Hvornår kom du? +- For nogle timer siden. + +78 +00:12:31,301 --> 00:12:37,000 +Ella er hos en veninde. Hun måtte +ikke huske sin tante sådan. + +79 +00:12:37,001 --> 00:12:43,500 +Hun havde en +livsvilje uden lige. + +80 +00:12:44,500 --> 00:12:50,900 +De vil afslutte det I morgen, så +jeg ville bare sidde hos hende.. + +81 +00:12:53,900 --> 00:12:57,200 +Har du lyst til at...? + +82 +00:12:59,100 --> 00:13:01,500 +Ja.. + +83 +00:13:04,000 --> 00:13:09,000 +Hun kunne godt lide dig, Peter. +Vidste du det? + +84 +00:13:54,000 --> 00:13:56,700 +Hej.. + +85 +00:14:18,000 --> 00:14:20,100 +Farvel Olivia.. + +86 +00:14:27,000 --> 00:14:29,600 +- Kan du huske dit navn? +- Olivia Dunham. + +87 +00:14:29,601 --> 00:14:32,800 +- Hvad år har vi? +- Hvad laver jeg på Hospitalet? + +88 +00:14:32,801 --> 00:14:37,000 +Du var ude for en +ulykke I New York. + +89 +00:14:38,200 --> 00:14:42,000 +- Er Peter her? +- Ja. + +90 +00:14:44,800 --> 00:14:47,400 +- Jeg var et sted. +- Ja, I New York. + +91 +00:14:47,401 --> 00:14:52,499 +- Nej. +- Du var ude for en ulykke. + +92 +00:14:52,500 --> 00:14:58,600 +Jeg tog et sted hen. Nogle +prøvede at stoppe mig. + +93 +00:14:58,601 --> 00:15:03,499 +- Hvad sker der? +- Mr. Bishop, hun skal slappe af. + +94 +00:15:03,500 --> 00:15:10,300 +Han fortalte mig noget. Jeg +skulle gøre noget vigtigt. + +95 +00:15:10,301 --> 00:15:14,899 +- Hvem fortalte dig det? +- Ved det ikke, men jeg skal gøre noget, + +96 +00:15:14,900 --> 00:15:20,000 +og jeg tror vores liv afhænger af det. +- Hvis liv? + +97 +00:15:22,500 --> 00:15:24,900 +Alles. + +98 +00:15:24,901 --> 00:15:32,900 +Jeg skal bruge min pistol, Peter. Nogle +vil skade mig, jeg er ikke sikker. + +99 +00:15:45,800 --> 00:15:51,399 +- Jeg skal snakke med agent Broyles. +- Han er I Washington. + +100 +00:15:51,400 --> 00:15:55,500 +Så agent Charlie Francis. + +101 +00:15:56,001 --> 00:15:58,201 +Hvad angår det? + +102 +00:15:58,702 --> 00:16:05,100 +Jeg arbejder for jer. Jeg skal +have filerne fra bilulykken I går. + +103 +00:16:06,200 --> 00:16:13,000 +- Din legitimation er ophævet. +- Hør her, du skal hjælpe mig. + +104 +00:16:13,001 --> 00:16:17,200 +- Du kan sende en ansøgning.. +- Nej, lyt til mig. + +105 +00:16:17,201 --> 00:16:23,800 +En af dine agenters liv afhænger af mit +arbejde, så find en der kan hjælpe! + +106 +00:16:23,801 --> 00:16:25,500 +Forlad venligst bygningen Hr. + +107 +00:16:29,900 --> 00:16:33,900 +Jeg tager mig af ham. + +108 +00:16:38,000 --> 00:16:40,500 +Jeg er med hende. + +109 +00:16:43,301 --> 00:16:46,000 +- Hvad laver du I Boston? +- Kan du lide at kæmpe? + +110 +00:16:46,001 --> 00:16:51,299 +Tiden I Irak, med mafiaen. +Du har en lang historie. + +111 +00:16:51,300 --> 00:16:56,000 +- Du ville hjælpe med at få filerne? +- De ligger på bagsædet. + +112 +00:16:56,001 --> 00:17:00,600 +Bremsesporene var misvisende. +De var mørkest bagerst. + +113 +00:17:00,601 --> 00:17:05,000 +Hårdest bremsning +længst fra ulykken? + +114 +00:17:05,001 --> 00:17:07,700 +- Så føreren sænkede ikke farten. +- Han gassede op. + +115 +00:17:07,701 --> 00:17:13,400 +- Så han har ventet. +- Det er billeder fra overvågningen. + +116 +00:17:13,401 --> 00:17:18,900 +George Reed. Littleton Road. +Vi er der om 15 minutter. + +117 +00:17:18,901 --> 00:17:25,000 +- Din tur. Hvad er Fringe Afdelingen? +Hvad laver I? + +118 +00:17:25,601 --> 00:17:28,000 +Ingenting. + +119 +00:17:28,001 --> 00:17:30,200 +Ikke længere. + +120 +00:17:32,700 --> 00:17:35,500 +George Reed? + +121 +00:17:36,500 --> 00:17:38,300 +Mr. Reed? + +122 +00:17:39,301 --> 00:17:43,900 +Et lig. Se hans hudfarve. + +123 +00:17:44,400 --> 00:17:50,100 +Hvis han kørte bilen I New +York, så var det ikke I går. + +124 +00:17:50,500 --> 00:17:52,899 +Jeg skal hente nogen. + +125 +00:17:52,900 --> 00:17:56,700 +Hans anus er helt våd. + +126 +00:17:56,701 --> 00:18:05,900 +Udvidelse af endetarm og øjne. +Forårsaget af et stort væsketab. + +127 +00:18:06,700 --> 00:18:15,000 +Hvis det er en virus, så påvirker den ikke +fugle. Liget skal til mit laboratorium. + +128 +00:18:15,001 --> 00:18:19,300 +Det skal til lighuset, galning. +Er han sindssyg? + +129 +00:18:19,301 --> 00:18:21,600 +- Peter? +- Hvem leder? + +130 +00:18:21,601 --> 00:18:25,100 +- Det gør jeg. Lyt +til den gamle. + +131 +00:18:25,101 --> 00:18:27,900 +Fantastisk. + +132 +00:18:29,700 --> 00:18:34,000 +- Er han sindssyg? +- Ja! + +133 +00:18:34,901 --> 00:18:39,499 +Hvad end Fringe Afdelingen er, +så siger FBI, at den er lukket. + +134 +00:18:39,500 --> 00:18:46,500 +- Og jeg har lige givet dig et lig. +- Hvorfor gør du det her? + +135 +00:18:46,501 --> 00:18:51,700 +- Jeg følger sagen. +- Det er ikke pænt at lyve, agent Jessup. + +136 +00:18:51,701 --> 00:18:56,200 +De fleste ville blive +bange for det her. + +137 +00:18:58,101 --> 00:19:05,300 +- Der er mere mellem himmel og jord.. +- Virkelig? Du er skør. + +138 +00:19:05,301 --> 00:19:08,000 +Peter?! + +139 +00:19:10,201 --> 00:19:15,000 +De sagde, at jeg måtte sidde +bagi med liget. Må jeg? + +140 +00:19:15,001 --> 00:19:17,300 +- Ja, selvfølgelig. + +141 +00:19:18,700 --> 00:19:22,700 +Men ikke mere medicin. + +142 +00:19:24,500 --> 00:19:28,500 +Jeg har ventet på +jer, hele mit liv. + +143 +00:19:37,500 --> 00:19:40,500 +Nu skal jeg være der. + +144 +00:19:42,501 --> 00:19:46,400 +Hvad kan jeg hjælpe dig med? + +145 +00:19:47,201 --> 00:19:52,600 +- Jeg søger en Selectric 251. +- Den findes ikke. + +146 +00:19:52,601 --> 00:19:59,500 +- Der er 245 og 255. +- Jeg skal bruge en 251. + +147 +00:20:03,801 --> 00:20:07,600 +Du er en af dem.. + +148 +00:20:09,300 --> 00:20:15,600 +Der er gået seks år. +Jeg regnede med at.. + +149 +00:20:28,201 --> 00:20:32,700 +Bagerst. Den sidste dør. + +150 +00:20:36,000 --> 00:20:40,301 +Fortæl dem, at jeg +ikke venter evigt. + +151 +00:21:59,800 --> 00:22:03,950 +- Hvad skal vi her? +- Min far var videnskabsmand. + +152 +00:22:03,951 --> 00:22:08,800 +- Som blev sendt på St. Claires. +- Ja, men der er mere. + +153 +00:22:08,801 --> 00:22:17,900 +Værre. Og noget godt også. Men +han plejede at arbejde her. + +154 +00:22:17,901 --> 00:22:24,300 +Han arbejdede her som professor, med +hemmelige projekter for staten. + +155 +00:22:24,301 --> 00:22:30,300 +Vi er her fordi at han mener, +at han kun kan arbejde hernede. + +156 +00:22:30,301 --> 00:22:33,400 +Sidste chance for at vende om? + +157 +00:22:33,801 --> 00:22:37,700 +- Laver du sjov? +- Agent Jessup.. + +158 +00:22:38,301 --> 00:22:40,700 +...velkommen til +Fringe Afdelingen. + +159 +00:22:40,701 --> 00:22:42,900 +Astrid mød Amy. + +160 +00:22:42,901 --> 00:22:45,500 +- Fed frisure. +- Tak. + +161 +00:22:46,150 --> 00:22:52,800 +Hej, igen. Jeg skal bruge: 4 +brændere, 8 skåle og 1 fryser. + +162 +00:22:52,801 --> 00:22:57,900 +- Skal skålene være sterile? +- De er til cremesovsen, til fødselsdagen. + +163 +00:22:57,901 --> 00:23:01,100 +- Glem cremesovsen, Walter. +- Nej. + +164 +00:23:01,101 --> 00:23:08,900 +Jeg skal bruge saks, tang, +og en sav til obduktionen. + +165 +00:23:13,400 --> 00:23:17,100 +- Jeg kom så hurtigt jeg kunne. +- Jeg er okay. + +166 +00:23:17,101 --> 00:23:21,100 +- Du behøvede ikke at komme så langt. +- Du er ikke okay. + +167 +00:23:21,101 --> 00:23:24,200 +Jeg har det fint, Charlie. + +168 +00:23:29,400 --> 00:23:37,200 +På andet år hos politiet, fik vi et opkald. +Vi besøger gerningsstedet. + +169 +00:23:37,201 --> 00:23:45,000 +Manden siger at det +var et skænderi. + +170 +00:23:45,001 --> 00:23:50,100 +Min partner ringer fra det andet rum, +og siger at pigen skal med ambulancen. + +171 +00:23:50,101 --> 00:23:56,900 +Jeg vidste at han havde tævet sin +kæreste, så jeg tvang ham I håndjern. + +172 +00:23:56,901 --> 00:24:01,850 +Så hører jeg et skud +fra det andet rum. + +173 +00:24:01,851 --> 00:24:07,900 +Jeg vender mig, og +ser en blodig pige. + +174 +00:24:10,201 --> 00:24:15,200 +Hun er brutalt behandlet. + +175 +00:24:18,101 --> 00:24:22,899 +Hun sigter på mig med en pistol. + +176 +00:24:22,900 --> 00:24:30,400 +Jeg vidste ikke, at hun +havde dræbt min partner. + +177 +00:24:33,900 --> 00:24:39,400 +Hun skyder, og rammer +mig I brystet. + +178 +00:24:45,200 --> 00:24:54,800 +Jeg var I behandling I 20 dage, og jeg +fortalte min kone, at jeg havde det fint. + +179 +00:24:56,101 --> 00:25:00,000 +Jeg var rystet, og bange. + +180 +00:25:04,001 --> 00:25:08,200 +Hvis du har gjort som mig.. + +181 +00:25:09,200 --> 00:25:13,200 +Så har du en pistol under puden. + +182 +00:25:13,801 --> 00:25:17,101 +Peter ringede til mig. + +183 +00:25:26,700 --> 00:25:32,700 +Du kan snyde doktorerne, +men ikke mig. + +184 +00:25:37,400 --> 00:25:42,100 +Jeg ved ikke hvad +der er sket mig. + +185 +00:25:42,101 --> 00:25:47,000 +Jeg er så bange. + +186 +00:25:49,801 --> 00:25:58,400 +Jeg kan ikke engang lade den. +Min hånd ryster for meget. + +187 +00:25:58,700 --> 00:26:03,100 +Du skal nok klare den. + +188 +00:26:06,400 --> 00:26:12,100 +Sikke en skam. Disse +lunger var gode engang. + +189 +00:26:12,101 --> 00:26:15,101 +- Koger blandingen? +- Om lidt. + +190 +00:26:15,102 --> 00:26:20,200 +Bland æg, sukker +og salt I en skål. + +191 +00:26:21,400 --> 00:26:25,400 +Der er intet galt med +blodprøven, Walter. + +192 +00:26:26,101 --> 00:26:34,100 +- Hvad tænker du? +- Jeg elsker cremesovs, men hader tærte. + +193 +00:26:34,101 --> 00:26:39,200 +Hvilet kan skyldes smagen, eller +problemer med franskmændene. + +194 +00:26:39,201 --> 00:26:44,900 +Hvilket jeg betvivler, da jeg elsker +"Moules a la creme normande". + +195 +00:26:44,901 --> 00:26:50,400 +Fantastisk, men har du nogle +idéer om hvordan han døde? + +196 +00:26:50,401 --> 00:26:58,600 +Nej, væsketabet virker til at være +forårsaget af en anden proces. + +197 +00:26:58,601 --> 00:27:06,600 +Men jeg fandt det utænkelige. +Kig I munden. + +198 +00:27:08,200 --> 00:27:12,400 +Kom tættere på. + +199 +00:27:13,901 --> 00:27:19,600 +- Hvad ser du I ganen? +- 3 huller. + +200 +00:27:19,601 --> 00:27:26,700 +Hvad er det? Og hvad mener +du med det utænkelige? + +201 +00:27:27,101 --> 00:27:30,800 +Det fik mig til at huske. + +202 +00:27:33,701 --> 00:27:42,700 +- Er det dig? +- Ja, min far er besat af min barndom. + +203 +00:27:42,701 --> 00:27:48,000 +Han tjekker også mit åndedræt når jeg +sover, hvilket et lidt uhyggeligt. + +204 +00:27:48,001 --> 00:27:53,300 +Fokuser, tak. Dette er et eksperiment +som Belly og jeg lavede. + +205 +00:27:53,301 --> 00:28:01,900 +Vi lavede et fantastisk stof. +Leary troede ikke på os. + +206 +00:28:01,901 --> 00:28:06,200 +- Vil hendes hoved sprænge? +- Ja, men ikke fysisk. + +207 +00:28:06,201 --> 00:28:10,500 +Vi prøver at forøge hendes +femte, sjette og syvende chakra. + +208 +00:28:10,501 --> 00:28:14,900 +- Hals, tre øjne og en krone. +- Ja, Astrix. + +209 +00:28:14,901 --> 00:28:21,800 +Kommunikation, tale, frekvens, +viden, og tanker. Åndelighed. + +210 +00:28:21,801 --> 00:28:24,400 +- Jeg kan se ham. +- Hun skulle se gud. + +211 +00:28:24,401 --> 00:28:29,900 +Hvem er han? Manden +med maskinen? + +212 +00:28:29,901 --> 00:28:38,700 +Med tre søm. Tre søm I munden. + +213 +00:28:40,400 --> 00:28:46,400 +Han er soldat. Fra andetsteds. + +214 +00:28:46,401 --> 00:28:51,900 +Fra et sted som minder om +dette, men det er ikke her. + +215 +00:28:53,900 --> 00:28:58,399 +Han er fra et andet univers. + +216 +00:28:58,400 --> 00:29:03,700 +- Jeg kan se han stikker maskinen op. +- Han stikker maskinen op? + +217 +00:29:03,701 --> 00:29:08,100 +Ja, I munden. + +218 +00:29:09,200 --> 00:29:16,600 +Han forandres. Han skifter form. +Sådan gemmer de sig. De ligner os. + +219 +00:29:16,601 --> 00:29:20,000 +De kan være hvem som helst. + +220 +00:29:21,500 --> 00:29:28,500 +Log på databasen, og find +et lig med tre huller. + +221 +00:29:29,501 --> 00:29:33,400 +De kan være hvem som helst. + +222 +00:29:50,600 --> 00:29:56,100 +Usammenhængende tegn på invasion, +rapporter om løbsk teknologi. + +223 +00:29:56,101 --> 00:30:04,500 +"X" betegnelsen, og din Fringe Afdeling, +har været på FBI's budget I over 50 år. + +224 +00:30:04,501 --> 00:30:10,800 +- De flinke dage er ovre, Mr. Broyles. +- Oberst.. + +225 +00:30:10,801 --> 00:30:16,700 +...eller speciel agent, senator. +- Oberst Broyles, + +226 +00:30:16,701 --> 00:30:22,500 +vi har alle et job.. +- Ja senator, jeg har et job, + +227 +00:30:22,501 --> 00:30:27,039 +som jeg har haft I lang +tid, for at beskytte + +228 +00:30:27,051 --> 00:30:31,600 +vores land, og vi er +ikke I sikkerhed. + +229 +00:30:31,601 --> 00:30:42,100 +Truslen kan være kendt, andre gange +er den utænkelig, og så står vi klar! + +230 +00:30:42,101 --> 00:30:47,400 +Og I burde takke gud for det. + +231 +00:30:48,800 --> 00:30:57,800 +Uden noget brugbart, noget +til at støtte finanserne.. + +232 +00:31:07,900 --> 00:31:12,700 +Hvis dit fremmøde er et tegn.. + +233 +00:31:12,701 --> 00:31:17,400 +Fringe Afdelingen +må ikke lukkes. + +234 +00:31:17,401 --> 00:31:23,900 +- Jeg skal ikke overbevises. +- Jamen vi kan ikke selv styre det. + +235 +00:31:33,000 --> 00:31:36,900 +Gør hvad du altid +har gjort Phillip.. + +236 +00:31:37,500 --> 00:31:40,700 +...red dagen. + +237 +00:31:58,200 --> 00:32:02,700 +- Politi? +- Ja, eller faktisk kun hende. + +238 +00:32:02,701 --> 00:32:07,299 +Hvor er liget som udløste +alarmen, med tre huller I munden? + +239 +00:32:07,300 --> 00:32:12,100 +- Lige derovre. +- Tak. + +240 +00:32:16,400 --> 00:32:22,400 +- De fandt liget 2 blokke fra biluheldet. +- Så de efterlader den originale krop? + +241 +00:32:22,401 --> 00:32:32,900 +Pigen fra videoen, sagde at det var +en soldat. Soldater gør én ting. + +242 +00:32:32,901 --> 00:32:37,800 +De fuldfører missionen. +Mordene var ikke tilfældige. + +243 +00:32:37,801 --> 00:32:43,400 +- Så han søger stadig sit mål. +- Olivia.. + +244 +00:33:08,000 --> 00:33:11,900 +Åh gud, du skræmte mig. + +245 +00:33:13,300 --> 00:33:18,600 +Vi har sendt hans beskrivelse til +hospitalet, hvis han er der, fanger vi ham. + +246 +00:33:22,900 --> 00:33:28,400 +- Du ser bedre ud. +- Tak, jeg er også ved at blive mig selv. + +247 +00:33:30,100 --> 00:33:34,400 +- Hvordan er din hukommelse? +- I små bidder. + +248 +00:33:34,401 --> 00:33:40,000 +Jeg husker at jeg kørte til New +York, men heller ikke mere. + +249 +00:33:46,100 --> 00:33:53,200 +Sådan er det med kvæstelser, +folk kan miste flere dage. + +250 +00:33:54,200 --> 00:33:58,700 +Jeg skulle mødes med nogle.. + +251 +00:33:58,701 --> 00:34:03,700 +...og nogle kørte ind I mig. + +252 +00:34:06,701 --> 00:34:10,800 +- Men.. +- Men hvad? + +253 +00:34:13,400 --> 00:34:16,700 +Det giver ikke mening. + +254 +00:34:18,600 --> 00:34:24,900 +Jeg var et sted, for at +snakke med nogen, og så.. + +255 +00:34:25,801 --> 00:34:29,400 +...kom jeg tilbage til ulykken. + +256 +00:34:31,200 --> 00:34:38,100 +- Kan du huske hvem +du skulle mødes med? + +257 +00:34:39,900 --> 00:34:42,900 +Næsten.. + +258 +00:34:42,901 --> 00:34:49,399 +- Han fortalte mig noget. +- Hvilket? + +259 +00:34:49,400 --> 00:34:53,600 +- Fortsæt. +- Jeg kan ikke. + +260 +00:34:58,300 --> 00:35:03,700 +- Noget er gemt. +- Hvor? + +261 +00:35:04,000 --> 00:35:07,700 +- Jeg ved det ikke. +- Hvor er den gemt? + +262 +00:35:09,600 --> 00:35:14,400 +Jeg kan ikke huske det. + +263 +00:35:19,100 --> 00:35:23,900 +- Er det alt du ved? +- Ja. + +264 +00:35:27,100 --> 00:35:30,400 +Okay. + +265 +00:35:44,300 --> 00:35:47,200 +- Den mistænkte er blevet set. +- På Olivias værelse? + +266 +00:35:47,201 --> 00:35:51,800 +Etagen er låst, kun +sygeplejersken må komme. + +267 +00:35:56,200 --> 00:35:59,100 +Kæmp ikke imod, det er forbi. + +268 +00:36:06,000 --> 00:36:14,600 +Mistænkte er nu en mørkhåret kvinde I +en grå trøje. Hun løber mod kælderen. + +269 +00:36:14,901 --> 00:36:17,200 +- Er du okay? +- Fang den kælling. + +270 +00:37:34,200 --> 00:37:38,400 +- Charlie, er du okay? +- Ja. + +271 +00:38:12,700 --> 00:38:20,000 +- Hvem var hun? +- En formskifter fra et andet univers. + +272 +00:38:20,001 --> 00:38:23,600 +Fra der hvor du har været. + +273 +00:38:25,500 --> 00:38:31,701 +Er det en dårlig ting, at ingen +af os tror, at jeg er skør? + +274 +00:38:34,100 --> 00:38:43,600 +- Hun spurgte efter noget der var gemt. +- Uanset hvad, regner Walter det ud. + +275 +00:38:51,700 --> 00:38:57,200 +- Jeg skal spørge dig om noget, Olivia. +- Ja? + +276 +00:39:05,200 --> 00:39:11,200 +Det sagde du til mig, lige før du vågnede. +Kan du huske det? + +277 +00:39:12,400 --> 00:39:21,100 +- Nej. Er det Latin? +- Nej, det er græsk. + +278 +00:39:24,000 --> 00:39:29,800 +Min mor sagde det til mig hver +aften, inden hun lagde mig I seng. + +279 +00:39:29,801 --> 00:39:35,800 +Der er vel ingen mening, når det +ikke kan blive mere skørt, vel? + +280 +00:39:38,700 --> 00:39:42,599 +Hvad betyder det? + +281 +00:39:42,600 --> 00:39:46,700 +Vær en bedre mand, end din far. + +282 +00:39:50,600 --> 00:39:58,699 +Walter var allerede væk.. Det var +en kode imellem min mor og jeg. + +283 +00:39:58,700 --> 00:40:05,500 +Hold dine venner tæt. +Beskyt dem du holder af. + +284 +00:40:07,700 --> 00:40:11,300 +Det er du god til. + +285 +00:40:17,900 --> 00:40:21,700 +Det er godt at se dig +igen, Olivia Dunham. + +286 +00:40:22,200 --> 00:40:25,400 +Det er også godt at se dig. + +287 +00:40:26,101 --> 00:40:29,200 +Det er tid til at blive tjekket. + +288 +00:40:30,201 --> 00:40:33,100 +Du burde få lidt søvn. + +289 +00:40:35,800 --> 00:40:42,200 +Peter? Lukker vores afdeling? + +290 +00:40:43,001 --> 00:40:46,001 +Nej. + +291 +00:40:50,500 --> 00:40:53,100 +- Agent Broyles? +- Mr. Bishop. + +292 +00:40:53,101 --> 00:40:59,500 +- Du ville have resultater. +- Ja. + +293 +00:41:00,600 --> 00:41:06,099 +Walter siger at denne teknologi ikke +er herfra, og at det er et bevis. + +294 +00:41:06,100 --> 00:41:11,700 +Hvis de kan lave den, så får de +en hær, som ligner dem de ønsker. + +295 +00:41:11,701 --> 00:41:16,599 +De kan få denne teknologi, +hvis De ikke lukker os. + +296 +00:41:16,600 --> 00:41:24,700 +Fra nu af, er det os der bestemmer. +Vi vil ikke komme for sent mere. + +297 +00:41:29,400 --> 00:41:34,700 +Nogle skal jo redde +deres røve, ikke? + +298 +00:41:37,800 --> 00:41:42,400 +Du har overrasket mig Mr. +Bishop. + +299 +00:41:42,800 --> 00:41:45,300 +Tak. + +300 +00:42:26,300 --> 00:42:30,900 +Tillykke med +fødselsdagen, Peter! + +301 +00:42:30,901 --> 00:42:35,000 +Cremesovs! Tillykke Peter. + diff --git a/src/Test/Files/auto_detect_Russian.srt b/src/Test/Files/auto_detect_Russian.srt new file mode 100644 index 000000000..d87544e5d --- /dev/null +++ b/src/Test/Files/auto_detect_Russian.srt @@ -0,0 +1,860 @@ +1 +00:00:51,397 --> 00:00:56,603 +Ричмонде, штат Вирджиния 1865 +BORGERKRIGENS оформление + +2 +00:01:05,211 --> 00:01:09,515 +Последний броненосец +блокады DER BRYDER союз + +3 +00:01:22,929 --> 00:01:25,303 +Простите, г-н Kaptajn. + +4 +00:01:25,315 --> 00:01:27,700 +Просто дальше. Вы принимаете +на себя в будущем! + +5 +00:01:32,105 --> 00:01:34,908 +Готовы к вылету, г-н капитан. + +6 +00:01:43,817 --> 00:01:48,421 +Готовьтесь войти kanalen. +Hastighed пяти узлов. + +7 +00:01:54,127 --> 00:01:59,199 +- Направление nord. 115 градусов, +115 градусов на север. + +8 +00:02:01,401 --> 00:02:03,403 +Огонь! + +9 +00:02:11,211 --> 00:02:13,113 +Огонь! + +10 +00:02:18,318 --> 00:02:22,322 +- Полный вперед - Да, +господин капитан! + +11 +00:02:53,019 --> 00:02:57,223 +Стоп motorerne. Jeg хотим мира! + +12 +00:02:57,823 --> 00:03:02,628 +Подожгли порты Лук пистолет! + +13 +00:03:49,609 --> 00:03:53,913 +Датский перевод LHB и +Fields Synkroniseret Inside + +14 +00:04:15,801 --> 00:04:19,905 +FLÅDEHISTORIKERS фанатичный SØGEN +EFTER GHOST военный корабль + +15 +00:06:17,421 --> 00:06:19,924 +DEN не Батин. + +16 +00:07:44,008 --> 00:07:48,412 +Г-жа Nwokolo. Я Ева Рохас, WHO. +Dette является д-р. Хоппер. + +17 +00:07:48,612 --> 00:07:50,614 +Пожалуйста, следуйте с. + +18 +00:07:55,219 --> 00:07:58,623 +Извините mørket. Hans +глаза не терпят света. + +19 +00:07:58,723 --> 00:08:02,627 +- Как его зовут - Азикиве? +Большинство людей называют его Kiwe. + +20 +00:08:02,827 --> 00:08:07,398 +Здравствуйте, Kiwe, меня зовут Eva. +Vi должны смотреть на тебя, ладно? + +21 +00:08:07,598 --> 00:08:10,401 +- Как долго он болел +- два дня? + +22 +00:08:10,601 --> 00:08:15,205 +- Неужели он ездил недавно - Он был +с отцом в Мали на прошлой неделе. + +23 +00:08:15,406 --> 00:08:19,410 +- Где его отец теперь - Он в маяк. +Там он работает. + +24 +00:08:19,611 --> 00:08:21,801 +Артериальное давление 80 +более 50 Kredsløbssvigt. + +25 +00:08:21,813 --> 00:08:24,015 +Мы даем ему кровь. + +26 +00:08:30,721 --> 00:08:33,824 +- Сколько транквилизаторов, он должен иметь +- Дайте ему 2 мл? + +27 +00:08:38,897 --> 00:08:41,799 +Это хорошо, Kiwe. Alt хорошо. + +28 +00:08:49,607 --> 00:08:53,310 +- Ты в порядке - +Мали, как и другие? + +29 +00:08:55,613 --> 00:08:59,417 +- Это epidemi. +- Есть шесть случаев. Это не достаточно. + +30 +00:08:59,517 --> 00:09:02,120 +Сколько длится Tres? + +31 +00:09:02,220 --> 00:09:04,522 +Шесть тысяч? + +32 +00:09:05,323 --> 00:09:09,226 +Когда начинать det +at значит ничего? + +33 +00:09:15,900 --> 00:09:18,802 +Нам нужно найти источник, Фрэнк. + +34 +00:09:20,204 --> 00:09:23,907 +Вы хотите Мали, мужчин это +происходит у вас нет. + +35 +00:09:24,007 --> 00:09:27,811 +ВОЗ не потеряет flere +medarbejdere в гражданскую войну. + +36 +00:09:28,411 --> 00:09:30,413 +Сделайте свой доклад закончил. + +37 +00:09:30,513 --> 00:09:32,904 +Прекрасно. Так что может быть +использовано для ligsynet. + +38 +00:09:32,916 --> 00:09:35,319 +Ева? + +39 +00:09:38,021 --> 00:09:42,826 +Я делаю то, что kan. Jeg передать +его на рассмотрение Совета. + +40 +00:09:43,026 --> 00:09:46,797 +- Может быть, они слушают это gang. +- Tak. + +41 +00:09:47,297 --> 00:09:51,702 +- Нам нужна кровь из faderen. +, я могу его найти. + +42 +00:10:14,124 --> 00:10:16,126 +Алло? + +43 +00:10:18,496 --> 00:10:20,698 +Г-н Nwokolo? + +44 +00:10:30,608 --> 00:10:32,810 +Г-н Nwokolo? + +45 +00:11:17,921 --> 00:11:20,823 +Вам нечего здесь делать. + +46 +00:11:27,798 --> 00:11:30,500 +Быстрый Пометьте ее кошелек. + +47 +00:11:55,925 --> 00:11:58,495 +Ты в порядке? + +48 +00:12:31,394 --> 00:12:34,697 +Возьмите его Hvad ты делаешь? + +49 +00:12:40,302 --> 00:12:43,205 +Остановить его с дерьмом! + +50 +00:12:46,109 --> 00:12:48,811 +Stop держите спокойно. + +51 +00:12:54,416 --> 00:12:58,420 +Постой тегов прямо сейчас!! + +52 +00:13:01,923 --> 00:13:04,894 +Дайте мне skiftenøglen. +Jeg нужно сейчас. + +53 +00:13:05,094 --> 00:13:07,096 +Есть ли? + +54 +00:13:09,198 --> 00:13:13,803 +Поэтому я должен использовать olien. +Mange спасибо! + +55 +00:13:16,906 --> 00:13:20,709 +Хорошо. Так же как и klaret. +Vi закончены. + +56 +00:13:21,310 --> 00:13:26,716 +Извините. Позвольте мне воспользоваться +det. Jeg имени Аль Giordino. + +57 +00:13:27,016 --> 00:13:30,319 +- Ева Rojas. +- Хорошая работа. + +58 +00:13:30,419 --> 00:13:32,822 +Добро пожаловать на борт. + +59 +00:13:33,622 --> 00:13:36,725 +Включите den. Få его. + +60 +00:13:41,497 --> 00:13:45,502 +Привет. Чувствовать себя лучше? +СГЭ Вызывается Руди. + +61 +00:13:45,602 --> 00:13:49,605 +- Простите, а где мы - Мы находимся +на Martha Ann. Это NUMA-офф судна. + +62 +00:13:49,806 --> 00:13:54,310 +Мы не знаем, кто вы, и мы valgte +selv пропатчить себя в руки + +63 +00:13:54,510 --> 00:13:59,815 +я ждал два месяца her. Ødelæg +речь идет не о дать ему утонуть. + +64 +00:14:00,015 --> 00:14:01,818 +Да, адмирал. + +65 +00:14:02,318 --> 00:14:07,924 +Пятый .. Четвёртое .. Третий .. +Второе .. 1! + +66 +00:14:20,302 --> 00:14:22,503 +Дамы и господа, - + +67 +00:14:22,504 --> 00:14:27,709 +- позвольте мне, после 772 лет пребывания +på havbunden представить вам - + +68 +00:14:28,010 --> 00:14:32,614 +- король Батин! + +69 +00:14:34,115 --> 00:14:36,818 +Молодцы, все. + +70 +00:14:36,918 --> 00:14:41,523 +Кроме вас, Ал. Какого черта ты делаешь? +Det 10 тонн игры, а не дверь гаража! + +71 +00:14:41,723 --> 00:14:46,895 +Вы думаете о ваших Томпсон 1291. +Men это 1293rd + +72 +00:14:47,196 --> 00:14:49,086 +Но вы не можете +использовать любой из dem. + +73 +00:14:49,098 --> 00:14:50,999 +Boys? + +74 +00:14:51,099 --> 00:14:54,503 +Короля должна возвышаться над +folket på музей около пяти часов. + +75 +00:14:54,803 --> 00:14:58,808 +- Это будет fremme. +, я надеюсь, тоже. + +76 +00:15:00,309 --> 00:15:04,713 +- Вы на ноги +- Спасибо вам? + +77 +00:15:04,913 --> 00:15:08,116 +К счастью, вы только потеряли taske. +Det это не место, чтобы пойти. + +78 +00:15:08,216 --> 00:15:12,320 +- Черт, я потерял ход taske. +- Это было надеяться не стоит умирать? + +79 +00:15:12,521 --> 00:15:16,424 +- Это жесткий spørgsmål. +- Нет ничего более ценного, чем ваша жизнь. + +80 +00:15:16,624 --> 00:15:20,196 +Получить, что осел здесь и hjælp +med, чтобы очистить его отсюда. + +81 +00:15:20,296 --> 00:15:24,099 +- Простите, "жена" вызов - +Получить его снимают с крючка! + +82 +00:15:24,300 --> 00:15:26,402 +Получить промывают, мы +собираемся на вечеринку. + +83 +00:15:26,502 --> 00:15:28,704 +--Адмирал отставке. + +84 +00:15:28,804 --> 00:15:32,808 +- Джим Sandecker. +- Ева Рохас, я работаю в ВОЗ. + +85 +00:15:34,009 --> 00:15:37,013 +Ты похож на тех, кто +нуждается кофе. + +86 +00:15:38,314 --> 00:15:42,818 +Мне нужна ваша hjælp. Er его +слева направо или наоборот + +87 +00:15:43,018 --> 00:15:46,822 +Это сводит меня с vanvid. Du должна +научить меня, чтобы связать его. + +88 +00:15:47,022 --> 00:15:50,992 +- Это то, что делать с "вокруг дерева." +- I'll быть там через час. + +89 +00:15:51,292 --> 00:15:55,396 +- Что? Дирк? +- Это было Oshodi. + +90 +00:15:55,696 --> 00:15:58,700 +Он считает, что он что-то нашел. + +91 +00:15:58,900 --> 00:16:03,104 +- Есть также. Это здорово! +- Спасибо. + +92 +00:16:03,304 --> 00:16:05,807 +Нет, нет Det велик для меня! + +93 +00:16:05,907 --> 00:16:10,211 +Я рад сказать правду Экер, у вас +не приходят на сегодня музей - + +94 +00:16:10,411 --> 00:16:13,914 +- потому что одна из nigerianske +underverden нашли доказательства - + +95 +00:16:14,014 --> 00:16:18,420 +- о том, что корабль затонул от +borgerkrigen er во время шторма в Африка. + +96 +00:16:18,620 --> 00:16:22,724 +Это то, что вы говорите, +право Sandecker Freak Out! + +97 +00:16:22,924 --> 00:16:25,748 +Я там. Он получает +все красные дюйма .. + +98 +00:16:25,760 --> 00:16:28,596 +лиса в погоне за кроликом +вокруг дерева, - + +99 +00:16:28,796 --> 00:16:33,601 +- в яму. Как завязать пн det. +Tag легко. Я буду там. + +100 +00:16:34,001 --> 00:16:37,606 +Спасибо. + +101 +00:16:38,707 --> 00:16:42,510 +Во-первых, я благодарю Лагос museum +for этот удивительный прием. + +102 +00:16:42,710 --> 00:16:48,816 +Я также хочу поблагодарить наших +hovedsponsor på этого проекта, Ив Massarde. + +103 +00:16:54,022 --> 00:16:59,394 +Мы NUMA. Это Nationale +Undervands Морское Агентство - + +104 +00:16:59,694 --> 00:17:03,999 +- и это, дамы и господа, +hvad что мы делаем. + +105 +00:17:05,600 --> 00:17:08,002 +Kong Батин. + +106 +00:17:10,505 --> 00:17:13,408 +- Он не в буфете +- Черт.! + +107 +00:17:13,508 --> 00:17:15,410 +- Хочешь Кебаб +- Нет, спасибо. + +108 +00:17:15,510 --> 00:17:19,614 +Правительства и private +organisationer, как наша - + +109 +00:17:19,814 --> 00:17:25,820 +- могут совместно содействовать at +historie, которые были потеряны по пути - + +110 +00:17:26,120 --> 00:17:31,592 +- снова вернулся в свою благодарность folk. +Mange. Хорошего вечера. + +111 +00:17:37,798 --> 00:17:40,689 +- Если у вас есть +компакт-дисков на вашем + +112 +00:17:40,701 --> 00:17:43,604 +корабле - Да, я купил у +вас в прошлом месяце? + +113 +00:17:44,405 --> 00:17:46,795 +У меня есть чудесное +произведение искусства + +114 +00:17:46,807 --> 00:17:49,209 +her. Direkte из Иракского +национального музея. + +115 +00:17:49,409 --> 00:17:52,814 +Не показывать мне эти ting. Så +хорошие друзья, это не так. + +116 +00:17:54,315 --> 00:17:57,218 +Вот курса. + +117 +00:17:57,318 --> 00:18:02,022 +Особый stykke. Det +я вам говорил. + +118 +00:18:03,323 --> 00:18:07,194 +Это давит мое hjerte bare, +чтобы показать вам. + +119 +00:18:14,202 --> 00:18:18,606 +- Где ты это +- Не касаясь? + +120 +00:18:19,006 --> 00:18:22,509 +Таким образом, хорошие +друзья, мы бы и нет? + +121 +00:18:37,992 --> 00:18:40,494 +Это большая партия. + +122 +00:18:42,096 --> 00:18:45,299 +- Спасибо за приглашение, admiral. +- удовольствие на моей стороне. + +123 +00:18:45,399 --> 00:18:49,404 +Ив, это женщина jeg fortalte вы, доктор. +Ева Рохас. + +124 +00:18:49,604 --> 00:18:52,707 +Мне очень приятно встретиться Dem. +Mit зовут Ив Massarde. + +125 +00:18:52,807 --> 00:18:55,510 +Это доктор. Фрэнк Хоппер. + +126 +00:18:55,610 --> 00:18:58,512 +Ив делает masse +forretninger в Африке. + +127 +00:18:58,612 --> 00:19:01,115 +Даже некоторые в Мали. + +128 +00:19:01,715 --> 00:19:03,717 +Вы извините меня? + +129 +00:19:03,817 --> 00:19:08,623 +Я понимаю, что я считаю, - дер-это +эпидемия на пути из Мали? + +130 +00:19:08,823 --> 00:19:11,592 +- Мы не будем называть его epidemi. +- Что вы это называете? + +131 +00:19:11,692 --> 00:19:14,595 +Epidemi. Så вы делаете +бизнес в Мали? + +132 +00:19:14,695 --> 00:19:19,299 +Знаете кого-то, кто может помочь нам med at +нажмите ВОЗ направить группу там, внизу? + +133 +00:19:19,499 --> 00:19:23,203 +- Мали находится под контролем +полевых командиров... генерал-Казим? + +134 +00:19:23,403 --> 00:19:25,304 +Ты его знаешь? + +135 +00:19:25,305 --> 00:19:27,333 +Он был лейтенантом в +армии, но дал себя selv en + +136 +00:19:27,345 --> 00:19:29,385 +продвижение по службе, когда +он выстрелил в президента. + +137 +00:19:29,510 --> 00:19:33,313 +- Он дает слово «военачальник» betydning. +И он управляет страной? + +138 +00:19:33,513 --> 00:19:37,117 +Половина. Другие kontrollerer ingen +половины. Я не знаю, что хуже. + +139 +00:19:37,217 --> 00:19:41,221 +Я предупреждаю вас. Это meget farligt +для иностранцев прямо сейчас. + +140 +00:19:41,421 --> 00:19:44,046 +Но, вероятно, более +опасными для indfødte. + +141 +00:19:44,058 --> 00:19:46,694 +Но ваши мертвые будут плохо +выглядеть в газетах. + +142 +00:19:46,894 --> 00:19:49,697 +Это делает эпидемию тоже. + +143 +00:19:51,498 --> 00:19:57,604 +Хорошо. Я пытаюсь позвонить вокруг +немного, мужчин я просто бизнесмен - + +144 +00:19:57,904 --> 00:20:01,708 +- так было tålmodighed. +- Да, это ее сильная сторона. + +145 +00:20:01,808 --> 00:20:05,011 +Он не поможет os. Det была +пустая трата времени. + +146 +00:20:05,211 --> 00:20:11,418 +Вы можете не только баржи в en borgerkrig. +Вы знаете, это слишком опасно. + +147 +00:20:11,718 --> 00:20:13,620 +Я думаю, мы должны вернуться... + +148 +00:20:14,221 --> 00:20:17,391 +Адмирал, вы когда-нибудь видели +en gulddollar от Конфедерации? + +149 +00:20:17,591 --> 00:20:21,294 +- Не начинайте снова - Нет, потому +что они никогда не делали один! + +150 +00:20:21,394 --> 00:20:23,797 +Импринтинг машина +ødelagt ved войны. + +151 +00:20:23,997 --> 00:20:28,602 +- Я молюсь dig., но не раньше, +чем Джефферсон получил пять лет. + +152 +00:20:28,802 --> 00:20:31,905 +Четыре из них он дал +til sine генералов. + +153 +00:20:32,106 --> 00:20:34,496 +Ли Джексон... + +154 +00:20:34,508 --> 00:20:36,910 +Каждый раз, когда мы находимся +в Африке, есть корабль. + +155 +00:20:37,110 --> 00:20:40,714 +А старые havnejournaler. Vi +едет в Австралию завтра. + +156 +00:20:40,814 --> 00:20:45,119 +Четыре из них fundet. +Men пятый не является. + +157 +00:20:45,319 --> 00:20:50,790 +Это был дан друг familien. En умелым +капитаном имени Мейсон гробниц. + +158 +00:20:50,990 --> 00:20:55,094 +Капитан броненосец, CSS Техас. + +159 +00:20:56,796 --> 00:21:00,800 +- Кто это у тебя - Oshodi, +и у него от Endigue? + +160 +00:21:00,900 --> 00:21:04,704 +Важно то, что в Endigue +fandt Labbezanga в Мали. + +161 +00:21:04,904 --> 00:21:09,909 +- Мой отец собирает mønter. +- монета отплыл в Нигере с Техас. + +162 +00:21:10,109 --> 00:21:14,914 +- Невозможно. Она не может с strøm. +- отпусти меня к Labbezanga и нюхать мало. + +163 +00:21:15,114 --> 00:21:18,617 +- Вы не получите моей båd. +- три дня. Всего за три дня! + +164 +00:21:19,418 --> 00:21:21,620 +Представьте себе, что. + +165 +00:21:24,591 --> 00:21:29,796 +Хорошо, если это не удается, snakker Я +никогда не говорить об этом больше! + +166 +00:21:31,698 --> 00:21:34,100 +У вас есть 72 часа. + +167 +00:21:34,200 --> 00:21:38,204 +Не наносекундных længere. I мальчика +только что купили вы на лодке. + +168 +00:21:38,404 --> 00:21:41,408 +Вы джентльмен, uanset, что +говорят другие о тебе! + +169 +00:21:41,508 --> 00:21:44,711 +Бьюсь об заклад, бутылку на +, мы никогда не найти его. + +170 +00:21:44,811 --> 00:21:47,414 +Скажем, целый ящик. + +171 +00:22:17,910 --> 00:22:20,213 +Привет. + +172 +00:22:20,314 --> 00:22:24,718 +Я пришел sent. Jeg сказали, я +хотел бы получить в. Восьмой + +173 +00:22:24,918 --> 00:22:28,421 +- Сказал я в. 9. +- это более 10! + +174 +00:22:30,489 --> 00:22:34,493 +Правда Экер сказал, что мы должны +называть его hvis возникли проблемы. + +175 +00:22:34,693 --> 00:22:36,595 +С чем? + +176 +00:22:36,795 --> 00:22:40,500 +- Я должен взять нас вверх по реке к Mali. +- Что? + +177 +00:22:40,600 --> 00:22:45,704 +- Нет, нет. Мы вас не Mali. +- Подожди! + +178 +00:22:45,905 --> 00:22:49,608 +Существует вспышки в Mali. +Det может начаться эпидемия. + +179 +00:22:49,808 --> 00:22:52,311 +И вы хотите, лифт, доктор? + +180 +00:22:52,411 --> 00:22:56,415 +- ВОЗ сократит свой бюджет +- Это очень важно. + +181 +00:22:56,615 --> 00:23:00,720 +- Иметь достаточно оборудования +- Наверное, нет? + +182 +00:23:05,891 --> 00:23:08,332 +Это небольшой лодке. Нет +конфиденциальность! + +183 +00:23:08,344 --> 00:23:10,796 +Я не стесняюсь. + diff --git a/src/Test/Logic/Forms/RemoveTextForHearImpairedTest.cs b/src/Test/Logic/Forms/RemoveTextForHearImpairedTest.cs index 5c109737d..d39a19c2f 100644 --- a/src/Test/Logic/Forms/RemoveTextForHearImpairedTest.cs +++ b/src/Test/Logic/Forms/RemoveTextForHearImpairedTest.cs @@ -1056,7 +1056,6 @@ namespace Test.Logic.Forms } [TestMethod] - [DeploymentItem("SubtitleEdit.exe")] public void RemoveTextKeepMusicSymbolsButRemoveHI() { RemoveTextForHI target = GetRemoveTextForHiLib(); @@ -1070,7 +1069,6 @@ namespace Test.Logic.Forms } [TestMethod] - [DeploymentItem("SubtitleEdit.exe")] public void RemoveTextRemoveEmdash() { RemoveTextForHI target = GetRemoveTextForHiLib(); @@ -1085,7 +1083,6 @@ namespace Test.Logic.Forms } [TestMethod] - [DeploymentItem("SubtitleEdit.exe")] public void RemoveTextIfUppercaseEmdashRemoveInDialogue() { RemoveTextForHI target = GetRemoveTextForHiLib(); @@ -1098,7 +1095,6 @@ namespace Test.Logic.Forms } [TestMethod] - [DeploymentItem("SubtitleEdit.exe")] public void RemoveTextIfUppercaseEmdashRemoveInDialogueWithSpaces() { RemoveTextForHI target = GetRemoveTextForHiLib(); diff --git a/src/Test/Test.csproj b/src/Test/Test.csproj index 57208b9a9..71cb774a2 100644 --- a/src/Test/Test.csproj +++ b/src/Test/Test.csproj @@ -43,6 +43,7 @@ + @@ -114,6 +115,16 @@ PreserveNewest + + + PreserveNewest + + + + + PreserveNewest + +