mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-10-28 06:52:35 +01:00
Minor refact: more efficient LanguageAutoDetect.GetCount()
This commit is contained in:
parent
be1e4f0d49
commit
c4beb2bac5
@ -11,12 +11,9 @@ namespace Nikse.SubtitleEdit.Core
|
|||||||
|
|
||||||
private static int GetCount(string text, params string[] words)
|
private static int GetCount(string text, params string[] words)
|
||||||
{
|
{
|
||||||
int count = 0;
|
var options = RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture;
|
||||||
for (int i = 0; i < words.Length; i++)
|
var pattern = "\\b(" + string.Join("|", words) + ")\\b";
|
||||||
{
|
return Regex.Matches(text, pattern, options).Count;
|
||||||
count += Regex.Matches(text, "\\b" + words[i] + "\\b", (RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture)).Count;
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int GetCountContains(string text, params string[] words)
|
private static int GetCountContains(string text, params string[] words)
|
||||||
@ -103,7 +100,7 @@ namespace Nikse.SubtitleEdit.Core
|
|||||||
private static readonly string[] AutoDetectWordsHebrew = { "אתה", "אולי", "הוא", "בסדר", "יודע", "טוב" };
|
private static readonly string[] AutoDetectWordsHebrew = { "אתה", "אולי", "הוא", "בסדר", "יודע", "טוב" };
|
||||||
private static readonly string[] AutoDetectWordsVietnamese = { "không", "[Tt]ôi", "anh", "đó", "ông" };
|
private static readonly string[] AutoDetectWordsVietnamese = { "không", "[Tt]ôi", "anh", "đó", "ông" };
|
||||||
private static readonly string[] AutoDetectWordsHungarian = { "hogy", "lesz", "tudom", "vagy", "mondtam", "még" };
|
private static readonly string[] AutoDetectWordsHungarian = { "hogy", "lesz", "tudom", "vagy", "mondtam", "még" };
|
||||||
private static readonly string[] AutoDetectWordsTurkish = { "için", "Tamam", "Hayır", "benim", "daha", "deðil", "önce", "lazým", "benim", "çalýþýyor", "burada", "efendim" };
|
private static readonly string[] AutoDetectWordsTurkish = { "için", "Tamam", "Hayır", "benim", "daha", "deðil", "önce", "lazým", "çalýþýyor", "burada", "efendim" };
|
||||||
private static readonly string[] AutoDetectWordsCroatianAndSerbian = { "sam", "ali", "nije", "samo", "ovo", "kako", "dobro", "sve", "tako", "će", "mogu", "ću", "zašto", "nešto", "za" };
|
private static readonly string[] AutoDetectWordsCroatianAndSerbian = { "sam", "ali", "nije", "samo", "ovo", "kako", "dobro", "sve", "tako", "će", "mogu", "ću", "zašto", "nešto", "za" };
|
||||||
private static readonly string[] AutoDetectWordsCroatian = { "što", "ovdje", "gdje", "kamo", "tko", "prije", "uvijek", "vrijeme", "vidjeti", "netko",
|
private static readonly string[] AutoDetectWordsCroatian = { "što", "ovdje", "gdje", "kamo", "tko", "prije", "uvijek", "vrijeme", "vidjeti", "netko",
|
||||||
"vidio", "nitko", "bok", "lijepo", "oprosti", "htio", "mjesto", "oprostite", "čovjek", "dolje",
|
"vidio", "nitko", "bok", "lijepo", "oprosti", "htio", "mjesto", "oprostite", "čovjek", "dolje",
|
||||||
@ -548,14 +545,14 @@ namespace Nikse.SubtitleEdit.Core
|
|||||||
return russianEncoding;
|
return russianEncoding;
|
||||||
|
|
||||||
Encoding thaiEncoding = Encoding.GetEncoding(874); // Thai
|
Encoding thaiEncoding = Encoding.GetEncoding(874); // Thai
|
||||||
if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล") + GetCount(thaiEncoding.GetString(buffer), "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5)
|
if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล", "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5)
|
||||||
return thaiEncoding;
|
return thaiEncoding;
|
||||||
|
|
||||||
Encoding arabicEncoding = Encoding.GetEncoding(28596); // Arabic
|
Encoding arabicEncoding = Encoding.GetEncoding(28596); // Arabic
|
||||||
Encoding hewbrewEncoding = Encoding.GetEncoding(28598); // Hebrew
|
Encoding hewbrewEncoding = Encoding.GetEncoding(28598); // Hebrew
|
||||||
if (GetCount(arabicEncoding.GetString(buffer), "من", "هل", "لا", "فى", "لقد", "ما") > 5)
|
if (GetCount(arabicEncoding.GetString(buffer), AutoDetectWordsArabic) > 5)
|
||||||
{
|
{
|
||||||
if (GetCount(hewbrewEncoding.GetString(buffer), "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10)
|
if (GetCount(hewbrewEncoding.GetString(buffer), AutoDetectWordsHebrew) > 10)
|
||||||
return hewbrewEncoding;
|
return hewbrewEncoding;
|
||||||
return arabicEncoding;
|
return arabicEncoding;
|
||||||
}
|
}
|
||||||
@ -643,14 +640,14 @@ namespace Nikse.SubtitleEdit.Core
|
|||||||
return russianEncoding;
|
return russianEncoding;
|
||||||
|
|
||||||
Encoding thaiEncoding = Encoding.GetEncoding(874); // Thai
|
Encoding thaiEncoding = Encoding.GetEncoding(874); // Thai
|
||||||
if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล") + GetCount(thaiEncoding.GetString(buffer), "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5)
|
if (GetCount(thaiEncoding.GetString(buffer), "โอ", "โรเบิร์ต", "วิตตอเรีย", "ดร", "คุณตำรวจ", "ราเชล", "ไม่", "เลดดิส", "พระเจ้า", "เท็ดดี้", "หัวหน้า", "แอนดรูว์") > 5)
|
||||||
return thaiEncoding;
|
return thaiEncoding;
|
||||||
|
|
||||||
Encoding arabicEncoding = Encoding.GetEncoding(28596); // Arabic
|
Encoding arabicEncoding = Encoding.GetEncoding(28596); // Arabic
|
||||||
Encoding hewbrewEncoding = Encoding.GetEncoding(28598); // Hebrew
|
Encoding hewbrewEncoding = Encoding.GetEncoding(28598); // Hebrew
|
||||||
if (GetCount(arabicEncoding.GetString(buffer), "من", "هل", "لا", "فى", "لقد", "ما") > 5)
|
if (GetCount(arabicEncoding.GetString(buffer), AutoDetectWordsArabic) > 5)
|
||||||
{
|
{
|
||||||
if (GetCount(hewbrewEncoding.GetString(buffer), "אולי", "אולי", "אולי", "אולי", "טוב", "טוב") > 10)
|
if (GetCount(hewbrewEncoding.GetString(buffer), AutoDetectWordsHebrew) > 10)
|
||||||
return hewbrewEncoding;
|
return hewbrewEncoding;
|
||||||
return arabicEncoding;
|
return arabicEncoding;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user