From 122ba7c1a6313f326390a6fd4d0a915e7993dcfe Mon Sep 17 00:00:00 2001 From: niksedk Date: Tue, 25 Jan 2022 13:54:47 +0100 Subject: [PATCH] More CJK detect --- .../Common/TextLengthCalculator/CalcCJK.cs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/libse/Common/TextLengthCalculator/CalcCJK.cs b/src/libse/Common/TextLengthCalculator/CalcCJK.cs index 153f4f916..f377a9274 100644 --- a/src/libse/Common/TextLengthCalculator/CalcCJK.cs +++ b/src/libse/Common/TextLengthCalculator/CalcCJK.cs @@ -64,7 +64,7 @@ namespace Nikse.SubtitleEdit.Core.Common.TextLengthCalculator else if (ChineseFullWidthPunctuations.Contains(ch) || LanguageAutoDetect.JapaneseLetters.Contains(ch) || LanguageAutoDetect.KoreanLetters.Contains(ch) || - IsCjk(ch)) + IsCjk(ch)) { count++; } @@ -82,14 +82,17 @@ namespace Nikse.SubtitleEdit.Core.Common.TextLengthCalculator public const string JapaneseHalfWidthCharacters = "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚"; public const string ChineseFullWidthPunctuations = ",。、:;?!…“”—‘’()【】「」『』〔〕《》〈〉"; - public static readonly Regex CjkCharRegex = new Regex(@"\p{IsCJKUnifiedIdeographs}", RegexOptions.Compiled); + public static readonly Regex CjkCharRegex = new Regex(@"\p{IsHangulJamo}|" + + @"\p{IsCJKRadicalsSupplement}|" + + @"\p{IsCJKSymbolsandPunctuation}|" + + @"\p{IsEnclosedCJKLettersandMonths}|" + + @"\p{IsCJKCompatibility}|" + + @"\p{IsCJKUnifiedIdeographsExtensionA}|" + + @"\p{IsCJKUnifiedIdeographs}|" + + @"\p{IsHangulSyllables}|" + + @"\p{IsCJKCompatibilityForms}", RegexOptions.Compiled); public static bool IsCjk(char c) { - if (c == '。' || c == ',') - { - return true; - } - return CjkCharRegex.IsMatch(c.ToString()); } }