diff --git a/src/Test/Logic/Ocr/MatchesToItalicStringConverterTest.cs b/src/Test/Logic/Ocr/MatchesToItalicStringConverterTest.cs index c620df1eb..489fe2ef4 100644 --- a/src/Test/Logic/Ocr/MatchesToItalicStringConverterTest.cs +++ b/src/Test/Logic/Ocr/MatchesToItalicStringConverterTest.cs @@ -107,6 +107,37 @@ namespace Test.Logic.Ocr Assert.AreEqual("Leonard:They're here.", result); } + [TestMethod] + public void TestItalicAndColon2() + { + var matches = new List + { + new VobSubOcr.CompareMatch("C", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("A", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("E", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("S", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("A", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("R", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch(":", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch(" ", false, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("I", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch(" ", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("l", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("i", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("v", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch(" ", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("h", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("r", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()), + new VobSubOcr.CompareMatch(".", true, 0, Guid.NewGuid().ToString()), + }; + + var result = MatchesToItalicStringConverter.GetStringWithItalicTags(matches); + Assert.AreEqual("CAESAR: I live here.", result); + } + [TestMethod] public void TestItalicAndBrackets() { diff --git a/src/ui/Logic/Ocr/MatchesToItalicStringConverter.cs b/src/ui/Logic/Ocr/MatchesToItalicStringConverter.cs index aadce8515..c1ff4dda1 100644 --- a/src/ui/Logic/Ocr/MatchesToItalicStringConverter.cs +++ b/src/ui/Logic/Ocr/MatchesToItalicStringConverter.cs @@ -6,9 +6,15 @@ using System.Text; namespace Nikse.SubtitleEdit.Logic.Ocr { + public class SplitItem + { + public List Matches { get; set; } + public string Separator { get; set; } + } + public static class MatchesToItalicStringConverter { - private static readonly string[] Separators = { "-", "—", ".", "'", "\"", " ", "!", "\r", "\n", "\r\n" }; + private static readonly string[] Separators = { "-", "—", ".", "'", "\"", " ", "\r", "\n", "\r\n" }; public static string GetStringWithItalicTags(List matches) { @@ -18,24 +24,34 @@ namespace Nikse.SubtitleEdit.Logic.Ocr } var sb = new StringBuilder(); - foreach (var lineMatches in SplitMatchesToLines(matches)) + foreach (var lineMatches in SplitMatchesToLineParts(matches)) { - var numberOfLetters = GetNumberOfLetters(lineMatches); - var numberOfItalicLetters = GetNumberOfItalicLetters(lineMatches); + var numberOfLetters = GetNumberOfLetters(lineMatches.Matches); + var numberOfItalicLetters = GetNumberOfItalicLetters(lineMatches.Matches); if (numberOfItalicLetters == numberOfLetters || numberOfItalicLetters > 3 && numberOfLetters - numberOfItalicLetters < 2 && ItalicIsInsideWord(matches)) { - sb.AppendLine("" + GetRawString(lineMatches) + ""); + sb.AppendLine("" + GetRawString(lineMatches.Matches) + ""); } else if (numberOfItalicLetters == 0 || numberOfLetters > 2 && numberOfItalicLetters < 2) { - sb.AppendLine(GetRawString(lineMatches)); + sb.Append(GetRawString(lineMatches.Matches)); + sb.Append(lineMatches.Separator); } else { - sb.AppendLine(GetStringWithItalicTagsMixed(lineMatches)); + sb.Append(GetStringWithItalicTagsMixed(lineMatches.Matches)); + sb.Append(lineMatches.Separator); } } - return sb.ToString().TrimEnd().Replace("" + Environment.NewLine + "", Environment.NewLine); + + var text = sb.ToString().TrimEnd().Replace("" + Environment.NewLine + "", Environment.NewLine); + + text = text.Replace(" ", " "); + text = text.Replace(" ", " "); + text = text.Replace(" ", " "); + text = text.Replace(" ", " "); + + return text.Trim(); } private static bool ItalicIsInsideWord(List matches) @@ -72,7 +88,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr for (var i = 0; i < lineMatches.Count; i++) { var m = lineMatches[i]; - if (m.Text == " " || m.Text == "-" || m.Text == "'" || m.Text == ":" || m.Text == "[" || m.Text == "]") // chars that allow change of italic + if (m.Text == " " || m.Text == "-" || m.Text == "'") // chars that allow change of italic { if (sbWord.Length > 0) { @@ -133,6 +149,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr .Replace(".", ".") .Replace("...", "...") .Replace("...", "..."); + return text; } @@ -173,17 +190,28 @@ namespace Nikse.SubtitleEdit.Logic.Ocr return italicOn; } - private static List> SplitMatchesToLines(List matches) + private static List SplitMatchesToLineParts(List matches) { - var result = new List>(); + var result = new List(); var line = new List(); + foreach (var t in matches) { if (t.Text == Environment.NewLine) { if (line.Count > 0) { - result.Add(line); + result.Add(new SplitItem { Matches = line, Separator = Environment.NewLine }); + line = new List(); + } + } + else if (t.Text == ":" || t.Text == ")" || t.Text == "]") + { + if (line.Count > 0) + { + line.Add(t); + + result.Add(new SplitItem { Matches = line, Separator = string.Empty }); line = new List(); } } @@ -192,10 +220,12 @@ namespace Nikse.SubtitleEdit.Logic.Ocr line.Add(t); } } + if (line.Count > 0) { - result.Add(line); + result.Add(new SplitItem { Matches = line, Separator = string.Empty }); } + return result; } @@ -211,7 +241,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr } } - return sb.ToString().Trim(); + return sb.ToString().Replace(" ", " ").Replace(" ", " "); } private static int GetNumberOfLetters(List matches)