Fix #8184 - thx Codling/ivandrofly :)

This commit is contained in:
Nikolaj Olsson 2024-07-02 06:52:30 +02:00
parent 9dad06922b
commit dc37f04338
2 changed files with 51 additions and 4 deletions

View File

@ -563,6 +563,28 @@ namespace Test.FixCommonErrors
}
}
[TestMethod]
public void FixCommonOcrErrorsFrenchHardCodedRuleNoChange2()
{
using (var target = GetFixCommonErrorsLib())
{
InitializeFixCommonErrorsLine(target, "Je connaîtrai la peur." + Environment.NewLine + "La peur tue l'esprit.");
target.FixOcrErrorsViaReplaceList("fra");
Assert.AreEqual("Je connaîtrai la peur." + Environment.NewLine + "La peur tue l'esprit.", target.Subtitle.Paragraphs[0].Text);
}
}
[TestMethod]
public void FixCommonOcrErrorsFrenchHardCodedRuleNoChange3()
{
using (var target = GetFixCommonErrorsLib())
{
InitializeFixCommonErrorsLine(target, "Je connaîtrai la peur." + Environment.NewLine + "La peur tue l'Esprit tue et.");
target.FixOcrErrorsViaReplaceList("fra");
Assert.AreEqual("Je connaîtrai la peur." + Environment.NewLine + "La peur tue l'Esprit tue et.", target.Subtitle.Paragraphs[0].Text);
}
}
#endregion Fix OCR errors
#region Fix missing spaces

View File

@ -666,7 +666,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
private static bool IsToKeepCasing(string sentence)
{
// related to https://github.com/SubtitleEdit/subtitleedit/issues/8052
if (sentence.Length > 2)
if (sentence.Length > 2 && IsCurrentWordUpperCase(sentence))
{
// do not change 'L' to lowercase in text like "L'ASSASSIN"
return char.IsUpper(sentence[2]) && (sentence[1] == '\'' || sentence[1] == '');
@ -675,6 +675,31 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
return false;
}
private static bool IsCurrentWordUpperCase(string sentence)
{
var maxMinusOne = sentence.Length - 1;
for (var i = 2; i <= maxMinusOne; i++)
{
var ch = sentence[i];
if (!char.IsLetter(ch))
{
return true;
}
if (!char.IsUpper(ch))
{
return false;
}
if (i == maxMinusOne)
{
return true;
}
}
return false;
}
public static string FixFrenchLApostrophe(string input, string affix, string prevLine)
{
var text = input;