Merge pull request #4152 from Flitskikker/feature/continuation-style-arabic-inserts

Detect and uncheck inserts in quotes for Arabic
This commit is contained in:
Nikolaj Olsson 2020-04-26 20:51:47 +02:00 committed by GitHub
commit d4a8c63f0e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 63 additions and 22 deletions

View File

@ -15,6 +15,12 @@ namespace Nikse.SubtitleEdit.Core
private static readonly string MusicSymbols = "♪♫#*¶";
private static readonly string ExplanationQuotes = "'\"“”‘’«»‹›";
private static readonly List<string> LanguagesWithoutCaseDistinction = new List<string>
{
"am", "ar", "as", "az", "bn", "my", "zh", "ka", "gu", "he", "hi", "ja", "kn", "ks", "km", "ko", "ku", "lo",
"ml", "ps", "fa", "pa", "sd", "si", "su", "ta", "te", "th", "bo", "ti", "ur", "ug", "yi"
};
private static readonly Dictionary<char, char> QuotePairs = new Dictionary<char, char>
{
{'\'', '\''},
@ -846,14 +852,14 @@ namespace Nikse.SubtitleEdit.Core
public static bool IsFullLineTag(string input, int position)
{
input = ExtractParagraphOnly(input);
// Return if empty string
if (string.IsNullOrEmpty(input))
{
return false;
}
input = ExtractParagraphOnly(input);
var lineStartIndex = (position > 0 && position < input.Length) ? input.LastIndexOf("\n", position, StringComparison.Ordinal) : 0;
if (lineStartIndex == -1)
{
@ -906,14 +912,14 @@ namespace Nikse.SubtitleEdit.Core
public static bool IsFullLineQuote(string originalInput, int position, char quoteStart, char quoteEnd)
{
string input = ExtractParagraphOnly(originalInput);
// Return if empty string
if (string.IsNullOrEmpty(originalInput))
{
return false;
}
string input = ExtractParagraphOnly(originalInput);
// Shift index if needed after deleting { } tags
position -= Math.Max(0, originalInput.IndexOf(input, StringComparison.Ordinal));
@ -1195,6 +1201,27 @@ namespace Nikse.SubtitleEdit.Core
return input.Replace(",", "،").Replace("?", "؟");
}
public static bool IsArabicInsert(string originalInput, string sanitizedInput)
{
string input = ExtractParagraphOnly(originalInput);
input = Regex.Replace(input, "<.*?>", string.Empty);
if (input.Length >= 2)
{
if (Quotes.Contains(input[0]) && Quotes.Contains(input[input.Length - 1]) && !sanitizedInput.EndsWith(",") && !IsEndOfSentence(sanitizedInput))
{
return true;
}
}
return false;
}
public static bool IsLanguageWithoutCaseDistinction(string language)
{
return LanguagesWithoutCaseDistinction.Contains(language);
}
public static int GetMinimumGapMs()
{
return Math.Max(Configuration.Settings.General.MinimumMillisecondsBetweenLines + 5, 300);

View File

@ -17,6 +17,8 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
string fixAction = string.Format(language.FixContinuationStyleX, ContinuationUtilities.GetContinuationStyleName(Configuration.Settings.General.ContinuationStyle));
int fixCount = 0;
var isLanguageWithoutCaseDistinction = ContinuationUtilities.IsLanguageWithoutCaseDistinction(callbacks.Language);
// Check continuation profile
if (_continuationProfile == null)
{
@ -55,28 +57,40 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
// If ends with nothing...
if (!ContinuationUtilities.IsEndOfSentence(text))
{
// ...ignore inserts
if (Configuration.Settings.General.FixContinuationStyleUncheckInsertsAllCaps)
if (!isLanguageWithoutCaseDistinction)
{
if (ContinuationUtilities.IsAllCaps(text) || ContinuationUtilities.IsAllCaps(textNext))
// ...ignore inserts
if (Configuration.Settings.General.FixContinuationStyleUncheckInsertsAllCaps)
{
isChecked = false;
if (ContinuationUtilities.IsAllCaps(text) || ContinuationUtilities.IsAllCaps(textNext))
{
isChecked = false;
}
}
// ...and italic lyrics
if (Configuration.Settings.General.FixContinuationStyleUncheckInsertsItalic)
{
if (ContinuationUtilities.IsItalic(oldText) && !ContinuationUtilities.IsNewSentence(text, true) && inItalicSentence == false)
{
isChecked = false;
}
}
// ...and smallcaps inserts or non-italic lyrics
if (Configuration.Settings.General.FixContinuationStyleUncheckInsertsLowercase)
{
if (!ContinuationUtilities.IsNewSentence(text, true) && !inSentence)
{
isChecked = false;
}
}
}
// ...and italic lyrics
if (Configuration.Settings.General.FixContinuationStyleUncheckInsertsItalic)
// ...ignore Arabic inserts
if (callbacks.Language == "ar")
{
if (ContinuationUtilities.IsItalic(oldText) && !ContinuationUtilities.IsNewSentence(text, true) && inItalicSentence == false)
{
isChecked = false;
}
}
// ...and smallcaps inserts or non-italic lyrics
if (Configuration.Settings.General.FixContinuationStyleUncheckInsertsLowercase)
{
if (!ContinuationUtilities.IsNewSentence(text, true) && !inSentence)
if (ContinuationUtilities.IsArabicInsert(oldText, text) || ContinuationUtilities.IsArabicInsert(oldTextNext, textNext))
{
isChecked = false;
}
@ -99,7 +113,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
// If ends with dots (possible interruptions), or nothing, check if next sentence is new sentence, otherwise don't check by default
if (text.EndsWith("..") || text.EndsWith("…") || ContinuationUtilities.EndsWithNothing(text, _continuationProfile))
{
if (!HasPrefix(textNext) && (ContinuationUtilities.IsNewSentence(textNext, true) || string.IsNullOrEmpty(textNext)))
if (!HasPrefix(textNext) && ((!isLanguageWithoutCaseDistinction && ContinuationUtilities.IsNewSentence(textNext, true)) || string.IsNullOrEmpty(textNext)))
{
isChecked = false;