From 7c55a71a2a51c9f351030b02fdad2b5820c024b7 Mon Sep 17 00:00:00 2001 From: Nikolaj Olsson Date: Tue, 24 Sep 2024 17:17:23 +0200 Subject: [PATCH] Fix for wrong OCR italic detection - thx Boulder08 :) Fix #8851 --- .../Ocr/MatchesToItalicStringConverterTest.cs | 67 ++++++++++++++++++- src/ui/Forms/Ocr/VobSubOcr.cs | 2 +- .../Ocr/MatchesToItalicStringConverter.cs | 16 +++-- 3 files changed, 78 insertions(+), 7 deletions(-) diff --git a/src/Test/Logic/Ocr/MatchesToItalicStringConverterTest.cs b/src/Test/Logic/Ocr/MatchesToItalicStringConverterTest.cs index 03318d72d..c620df1eb 100644 --- a/src/Test/Logic/Ocr/MatchesToItalicStringConverterTest.cs +++ b/src/Test/Logic/Ocr/MatchesToItalicStringConverterTest.cs @@ -71,10 +71,75 @@ namespace Test.Logic.Ocr new VobSubOcr.CompareMatch("'", true, 0, Guid.NewGuid().ToString()), }; - string result = MatchesToItalicStringConverter.GetStringWithItalicTags(matches); + var result = MatchesToItalicStringConverter.GetStringWithItalicTags(matches); Assert.AreEqual("He said: ''Go now!''", result); } + [TestMethod] + public void TestItalicAndColon() + { + var matches = new List + { + new VobSubOcr.CompareMatch("L", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("e", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("o", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("n", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("a", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("r", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("d", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch(":", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("T", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("h", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("y", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("'", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("r", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch(" ", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("h", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("r", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch(".", true, 0, Guid.NewGuid().ToString()), + }; + + var result = MatchesToItalicStringConverter.GetStringWithItalicTags(matches); + Assert.AreEqual("Leonard:They're here.", result); + } + + [TestMethod] + public void TestItalicAndBrackets() + { + var matches = new List + { + new VobSubOcr.CompareMatch("[", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("L", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("e", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("o", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("n", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("a", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("r", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("d", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("]", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("T", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("h", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("y", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("'", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("r", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch(" ", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("h", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("r", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch(".", true, 0, Guid.NewGuid().ToString()), + }; + + var result = MatchesToItalicStringConverter.GetStringWithItalicTags(matches); + Assert.AreEqual("[Leonard]They're here.", result); + } + [TestMethod] public void TestWordInItalic() { diff --git a/src/ui/Forms/Ocr/VobSubOcr.cs b/src/ui/Forms/Ocr/VobSubOcr.cs index 0cf4d2f1e..408a400d1 100644 --- a/src/ui/Forms/Ocr/VobSubOcr.cs +++ b/src/ui/Forms/Ocr/VobSubOcr.cs @@ -1080,7 +1080,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr for (int i = 0; i < 20; i++) { - System.Threading.Thread.Sleep(25); + Thread.Sleep(25); Application.DoEvents(); } } diff --git a/src/ui/Logic/Ocr/MatchesToItalicStringConverter.cs b/src/ui/Logic/Ocr/MatchesToItalicStringConverter.cs index 428758275..aadce8515 100644 --- a/src/ui/Logic/Ocr/MatchesToItalicStringConverter.cs +++ b/src/ui/Logic/Ocr/MatchesToItalicStringConverter.cs @@ -72,7 +72,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr for (var i = 0; i < lineMatches.Count; i++) { var m = lineMatches[i]; - if (m.Text == " " || m.Text == "-" || m.Text == "'") // chars that allow change of italic + if (m.Text == " " || m.Text == "-" || m.Text == "'" || m.Text == ":" || m.Text == "[" || m.Text == "]") // chars that allow change of italic { if (sbWord.Length > 0) { @@ -139,6 +139,12 @@ namespace Nikse.SubtitleEdit.Logic.Ocr private static bool AddWord(StringBuilder sb, int italicCount, ref bool italicOn, StringBuilder sbWord, string prevSpace) { var w = sbWord.ToString(); + if (prevSpace.Length == 1 && w.StartsWith(prevSpace)) + { + w = prevSpace + w; + prevSpace = string.Empty; + } + var wordIsItalic = italicCount > w.Length / 2.0; if (!wordIsItalic && Math.Abs(italicCount - w.Length / 2.0) < 0.3 && italicOn) { @@ -147,21 +153,21 @@ namespace Nikse.SubtitleEdit.Logic.Ocr if (wordIsItalic && italicOn) { - sb.Append(prevSpace + sbWord); + sb.Append(prevSpace + w); } else if (wordIsItalic) { - sb.Append(prevSpace + "" + sbWord); + sb.Append(prevSpace + "" + w); italicOn = true; } else if (italicOn) { - sb.Append("" + prevSpace + sbWord); + sb.Append("" + prevSpace + w); italicOn = false; } else { - sb.Append(prevSpace + sbWord); + sb.Append(prevSpace + w); } return italicOn;