More CJK detect

This commit is contained in:
niksedk 2022-01-25 13:54:47 +01:00
parent 8e3aebe4ab
commit 122ba7c1a6

View File

@ -64,7 +64,7 @@ namespace Nikse.SubtitleEdit.Core.Common.TextLengthCalculator
else if (ChineseFullWidthPunctuations.Contains(ch) || else if (ChineseFullWidthPunctuations.Contains(ch) ||
LanguageAutoDetect.JapaneseLetters.Contains(ch) || LanguageAutoDetect.JapaneseLetters.Contains(ch) ||
LanguageAutoDetect.KoreanLetters.Contains(ch) || LanguageAutoDetect.KoreanLetters.Contains(ch) ||
IsCjk(ch)) IsCjk(ch))
{ {
count++; count++;
} }
@ -82,14 +82,17 @@ namespace Nikse.SubtitleEdit.Core.Common.TextLengthCalculator
public const string JapaneseHalfWidthCharacters = "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚"; public const string JapaneseHalfWidthCharacters = "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚";
public const string ChineseFullWidthPunctuations = ",。、:;?!…“”—‘’()【】「」『』〔〕《》〈〉"; public const string ChineseFullWidthPunctuations = ",。、:;?!…“”—‘’()【】「」『』〔〕《》〈〉";
public static readonly Regex CjkCharRegex = new Regex(@"\p{IsCJKUnifiedIdeographs}", RegexOptions.Compiled); public static readonly Regex CjkCharRegex = new Regex(@"\p{IsHangulJamo}|" +
@"\p{IsCJKRadicalsSupplement}|" +
@"\p{IsCJKSymbolsandPunctuation}|" +
@"\p{IsEnclosedCJKLettersandMonths}|" +
@"\p{IsCJKCompatibility}|" +
@"\p{IsCJKUnifiedIdeographsExtensionA}|" +
@"\p{IsCJKUnifiedIdeographs}|" +
@"\p{IsHangulSyllables}|" +
@"\p{IsCJKCompatibilityForms}", RegexOptions.Compiled);
public static bool IsCjk(char c) public static bool IsCjk(char c)
{ {
if (c == '。' || c == '')
{
return true;
}
return CjkCharRegex.IsMatch(c.ToString()); return CjkCharRegex.IsMatch(c.ToString());
} }
} }