Minor fixes to detected words for Czech and Slovak languages.

This commit is contained in:
mm6502 2016-06-25 18:45:58 +02:00
parent f566c4243e
commit a14b844caf

View File

@ -128,11 +128,11 @@ namespace Nikse.SubtitleEdit.Core
"[Dd]obr[ýáé]", "[Vv]šak", "[Cc]el[ýáé]", "[Nn]ov[ýáé]", "[Dd]ruh[ýáé]" };
// differences between Czech and Slovak languages / Czech words / please keep the words aligned between these languages for better comparison
private static readonly string[] AutoDetectWordsCzechOnly = { ".*[Řř].*", ".*[ů].*", "[Bb]ýt", "[Jj]sem", "[Jj]si", "[Jj]á", "[Mm]ít", "[Aa]no", "[Nn]e", "[Nn]ic", "[Dd]en", "[Jj]en", "[Cc]o", "[Jj]ak[o]?",
"[Nn]ebo", "[Pp]ři", "[Pp]ro", "[Jj](ít|du|de|deme|dou)", "[Pp]řed.*", "[Mm]ezi", "[Jj]eště", "[Čč]lověk", "[Pp]odle", "[Dd]alší" };
private static readonly string[] AutoDetectWordsCzechOnly = { ".*[Řř].*", ".*[ůě].*", "[Bb]ýt", "[Jj]sem", "[Jj]si", "[Jj]á", "[Mm]ít", "[Aa]no", "[Nn]e", "[Nn]ic", "[Dd]en", "[Jj]en", "[Cc]o", "[Jj]ak[o]?",
"[Nn]ebo", "[Pp]ři", "[Pp]ro", "[Jj](ít|du|de|deme|dou)", "[Pp]řed.*", "[Mm]ezi", "[Jj]eště", "[Čč]lověk", "[Pp]odle", "[Dd]alší" };
// differences between Czech and Slovak languages / Slovak words / please keep the words aligned between these languages for better comparison
private static readonly string[] AutoDetectWordsSlovakOnly = { ".*[Ôô].*", ".*[ä].*", "[Bb]yť", "[Ss]om", "[Ss]i", "[Jj]a", "[Mm]ať", "[Áá]no", "[Nn]ie", "[Nn]ič", "[Dd]eň", "[Ll]en", "[Čč]o", "[Aa]ko",
"[Aa]?[Ll]ebo", "[Pp]ri", "[Pp]re", "[Íí](sť|(dem|de|deme|dú))", "[Pp]red.*", "[Mm]edzi", "[Ee]šte", "[Čč]lovek", "[Pp]odľa", "[Ďď]alš(í|ia|ie)" };
private static readonly string[] AutoDetectWordsSlovakOnly = { ".*[Ôô].*", ".*[ä].*", "[Bb]yť", "[Ss]om", "[Ss]i", "[Jj]a", "[Mm]ať", "[Áá]no", "[Nn]ie", "[Nn]ič", "[Dd]eň", "[Ll]en", "[Čč]o", "[Aa]ko",
"[Aa]?[Ll]ebo", "[Pp]ri", "[Pp]re", "([Íí]sť|[Ii](?:dem|de|deme|dú))", "[Pp]red.*", "[Mm]edzi", "[Ee]šte", "[Čč]lovek", "[Pp]odľa", "[Ďď]alš(í|ia|ie)" };
private static string AutoDetectGoogleLanguage(string text, int bestCount)
{
@ -297,9 +297,9 @@ namespace Nikse.SubtitleEdit.Core
count = GetCount(text, AutoDetectWordsCzechAndSlovak);
if (count > bestCount)
{
int czech = GetCount(text, AutoDetectWordsCzechOnly);
int slovak = GetCount(text, AutoDetectWordsSlovakOnly);
if (czech >= slovak)
int czechWordsCount = GetCount(text, AutoDetectWordsCzechOnly);
int slovakWordsCount = GetCount(text, AutoDetectWordsSlovakOnly);
if (czechWordsCount >= slovakWordsCount)
return "cs"; // Czech
else
return "sk"; // Slovak