mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-10-28 06:52:35 +01:00
Minor refact: more efficient LanguageAutoDetect.GetCount()
This commit is contained in:
parent
be1e4f0d49
commit
c4beb2bac5
@ -11,12 +11,9 @@ namespace Nikse.SubtitleEdit.Core
|
||||
|
||||
private static int GetCount(string text, params string[] words)
|
||||
{
|
||||
int count = 0;
|
||||
for (int i = 0; i < words.Length; i++)
|
||||
{
|
||||
count += Regex.Matches(text, "\\b" + words[i] + "\\b", (RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture)).Count;
|
||||
}
|
||||
return count;
|
||||
var options = RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture;
|
||||
var pattern = "\\b(" + string.Join("|", words) + ")\\b";
|
||||
return Regex.Matches(text, pattern, options).Count;
|
||||
}
|
||||
|
||||
private static int GetCountContains(string text, params string[] words)
|
||||
@ -103,7 +100,7 @@ namespace Nikse.SubtitleEdit.Core
|
||||
private static readonly string[] AutoDetectWordsHebrew = { "אתה", "אולי", "הוא", "בסדר", "יודע", "טוב" };
|
||||
private static readonly string[] AutoDetectWordsVietnamese = { "không", "[Tt]ôi", "anh", "đó", "ông" };
|
||||
private static readonly string[] AutoDetectWordsHungarian = { "hogy", "lesz", "tudom", "vagy", "mondtam", "még" };
|
||||
private static readonly string[] AutoDetectWordsTurkish = { "için", "Tamam", "Hayır", "benim", "daha", "deðil", "önce", "lazým", "benim", "çalýþýyor", "burada", "efendim" };
|
||||
private static readonly string[] AutoDetectWordsTurkish = { "için", "Tamam", "Hayır", "benim", "daha", "deðil", "önce", "lazým", "çalýþýyor", "burada", "efendim" };
|
||||
private static readonly string[] AutoDetectWordsCroatianAndSerbian = { "sam", "ali", "nije", "samo", "ovo", "kako", "dobro", "sve", "tako", "će", "mogu", "ću", "zašto", "nešto", "za" };
|
||||
private static readonly string[] AutoDetectWordsCroatian = { "što", "ovdje", "gdje", "kamo", "tko", "prije", "uvijek", "vrijeme", "vidjeti", "netko",
|
||||
"vidio", "nitko", "bok", "lijepo", "oprosti", "htio", "mjesto", "oprostite", "čovjek", "dolje",
|
||||
@ -548,14 +545,14 @@ namespace Nikse.SubtitleEdit.Core
|
||||
return russianEncoding;
|
||||
|
||||
Encoding thaiEncoding = Encoding.GetEncoding(874); // Thai
|
||||
if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล") + GetCount(thaiEncoding.GetString(buffer), "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5)
|
||||
if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล", "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5)
|
||||
return thaiEncoding;
|
||||
|
||||
Encoding arabicEncoding = Encoding.GetEncoding(28596); // Arabic
|
||||
Encoding hewbrewEncoding = Encoding.GetEncoding(28598); // Hebrew
|
||||
if (GetCount(arabicEncoding.GetString(buffer), "من", "هل", "لا", "فى", "لقد", "ما") > 5)
|
||||
if (GetCount(arabicEncoding.GetString(buffer), AutoDetectWordsArabic) > 5)
|
||||
{
|
||||
if (GetCount(hewbrewEncoding.GetString(buffer), "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10)
|
||||
if (GetCount(hewbrewEncoding.GetString(buffer), AutoDetectWordsHebrew) > 10)
|
||||
return hewbrewEncoding;
|
||||
return arabicEncoding;
|
||||
}
|
||||
@ -643,14 +640,14 @@ namespace Nikse.SubtitleEdit.Core
|
||||
return russianEncoding;
|
||||
|
||||
Encoding thaiEncoding = Encoding.GetEncoding(874); // Thai
|
||||
if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล") + GetCount(thaiEncoding.GetString(buffer), "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5)
|
||||
if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล", "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5)
|
||||
return thaiEncoding;
|
||||
|
||||
Encoding arabicEncoding = Encoding.GetEncoding(28596); // Arabic
|
||||
Encoding hewbrewEncoding = Encoding.GetEncoding(28598); // Hebrew
|
||||
if (GetCount(arabicEncoding.GetString(buffer), "من", "هل", "لا", "فى", "لقد", "ما") > 5)
|
||||
if (GetCount(arabicEncoding.GetString(buffer), AutoDetectWordsArabic) > 5)
|
||||
{
|
||||
if (GetCount(hewbrewEncoding.GetString(buffer), "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10)
|
||||
if (GetCount(hewbrewEncoding.GetString(buffer), AutoDetectWordsHebrew) > 10)
|
||||
return hewbrewEncoding;
|
||||
return arabicEncoding;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user