Fix for wrong OCR italic detection - thx Boulder08 :)

Fix #8851
This commit is contained in:
Nikolaj Olsson 2024-09-24 17:17:23 +02:00
parent f667fef5c7
commit 7c55a71a2a
3 changed files with 78 additions and 7 deletions

View File

@ -71,10 +71,75 @@ namespace Test.Logic.Ocr
new VobSubOcr.CompareMatch("'", true, 0, Guid.NewGuid().ToString()), new VobSubOcr.CompareMatch("'", true, 0, Guid.NewGuid().ToString()),
}; };
string result = MatchesToItalicStringConverter.GetStringWithItalicTags(matches); var result = MatchesToItalicStringConverter.GetStringWithItalicTags(matches);
Assert.AreEqual("He said: <i>''Go now!''</i>", result); Assert.AreEqual("He said: <i>''Go now!''</i>", result);
} }
[TestMethod]
public void TestItalicAndColon()
{
var matches = new List<VobSubOcr.CompareMatch>
{
new VobSubOcr.CompareMatch("L", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("e", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("o", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("n", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("a", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("r", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("d", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch(":", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("T", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("h", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("y", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("'", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("r", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch(" ", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("h", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("r", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch(".", true, 0, Guid.NewGuid().ToString()),
};
var result = MatchesToItalicStringConverter.GetStringWithItalicTags(matches);
Assert.AreEqual("Leonard:<i>They're here.</i>", result);
}
[TestMethod]
public void TestItalicAndBrackets()
{
var matches = new List<VobSubOcr.CompareMatch>
{
new VobSubOcr.CompareMatch("[", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("L", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("e", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("o", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("n", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("a", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("r", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("d", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("]", false, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("T", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("h", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("y", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("'", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("r", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch(" ", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("h", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("r", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()),
new VobSubOcr.CompareMatch(".", true, 0, Guid.NewGuid().ToString()),
};
var result = MatchesToItalicStringConverter.GetStringWithItalicTags(matches);
Assert.AreEqual("[Leonard]<i>They're here.</i>", result);
}
[TestMethod] [TestMethod]
public void TestWordInItalic() public void TestWordInItalic()
{ {

View File

@ -1080,7 +1080,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
for (int i = 0; i < 20; i++) for (int i = 0; i < 20; i++)
{ {
System.Threading.Thread.Sleep(25); Thread.Sleep(25);
Application.DoEvents(); Application.DoEvents();
} }
} }

View File

@ -72,7 +72,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
for (var i = 0; i < lineMatches.Count; i++) for (var i = 0; i < lineMatches.Count; i++)
{ {
var m = lineMatches[i]; var m = lineMatches[i];
if (m.Text == " " || m.Text == "-" || m.Text == "'") // chars that allow change of italic if (m.Text == " " || m.Text == "-" || m.Text == "'" || m.Text == ":" || m.Text == "[" || m.Text == "]") // chars that allow change of italic
{ {
if (sbWord.Length > 0) if (sbWord.Length > 0)
{ {
@ -139,6 +139,12 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
private static bool AddWord(StringBuilder sb, int italicCount, ref bool italicOn, StringBuilder sbWord, string prevSpace) private static bool AddWord(StringBuilder sb, int italicCount, ref bool italicOn, StringBuilder sbWord, string prevSpace)
{ {
var w = sbWord.ToString(); var w = sbWord.ToString();
if (prevSpace.Length == 1 && w.StartsWith(prevSpace))
{
w = prevSpace + w;
prevSpace = string.Empty;
}
var wordIsItalic = italicCount > w.Length / 2.0; var wordIsItalic = italicCount > w.Length / 2.0;
if (!wordIsItalic && Math.Abs(italicCount - w.Length / 2.0) < 0.3 && italicOn) if (!wordIsItalic && Math.Abs(italicCount - w.Length / 2.0) < 0.3 && italicOn)
{ {
@ -147,21 +153,21 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
if (wordIsItalic && italicOn) if (wordIsItalic && italicOn)
{ {
sb.Append(prevSpace + sbWord); sb.Append(prevSpace + w);
} }
else if (wordIsItalic) else if (wordIsItalic)
{ {
sb.Append(prevSpace + "<i>" + sbWord); sb.Append(prevSpace + "<i>" + w);
italicOn = true; italicOn = true;
} }
else if (italicOn) else if (italicOn)
{ {
sb.Append("</i>" + prevSpace + sbWord); sb.Append("</i>" + prevSpace + w);
italicOn = false; italicOn = false;
} }
else else
{ {
sb.Append(prevSpace + sbWord); sb.Append(prevSpace + w);
} }
return italicOn; return italicOn;