Merge pull request #8087 from ivandrofly/issues/ocr-engine-8052

Fixes #8052
This commit is contained in:
Nikolaj Olsson 2024-03-28 18:06:26 +01:00 committed by GitHub
commit 21a6f5bebb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 35 additions and 1 deletions

View File

@ -540,6 +540,28 @@ namespace Test.FixCommonErrors
Assert.AreEqual("…but never could.", target.Subtitle.Paragraphs[0].Text);
}
}
[TestMethod]
public void FixCommonOcrErrorsFrenchHardCodedRuleNoChange()
{
using (var target = GetFixCommonErrorsLib())
{
InitializeFixCommonErrorsLine(target, "ENCORE UNE VICTIME\r\nDE L'ASSASSIN MYSTERIEUX.");
target.FixOcrErrorsViaReplaceList("fra");
Assert.AreEqual("ENCORE UNE VICTIME\r\nDE L'ASSASSIN MYSTERIEUX.", target.Subtitle.Paragraphs[0].Text);
}
}
[TestMethod]
public void FixCommonOcrErrorsFrenchHardCodedRuleChange()
{
using (var target = GetFixCommonErrorsLib())
{
InitializeFixCommonErrorsLine(target, "Encore une victime\r\nde L'assassin mysterieux.");
target.FixOcrErrorsViaReplaceList("fra");
Assert.AreEqual("Encore une victime\r\nde l'assassin mysterieux.", target.Subtitle.Paragraphs[0].Text);
}
}
#endregion Fix OCR errors

View File

@ -665,6 +665,18 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
return word;
}
private static bool IsToKeepCasing(string sentence)
{
// related to https://github.com/SubtitleEdit/subtitleedit/issues/8052
if (sentence.Length > 2)
{
// do not change 'L' to lowercase in text like "L'ASSASSIN"
return char.IsUpper(sentence[2]) && (sentence[1] == '\'' || sentence[1] == '');
}
return false;
}
public static string FixFrenchLApostrophe(string input, string tag, string lastLine)
{
var text = input;
@ -711,7 +723,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
isPreviousLineClose = beforeThis.EndsWith('.') || beforeThis.EndsWith('!') || beforeThis.EndsWith('?');
}
if (isPreviousLineClose)
if (isPreviousLineClose || IsToKeepCasing(text.Substring(start + 1)))
{
text = text.Remove(start + 1, 1).Insert(start + 1, "L");
}