diff --git a/src/Test/Logic/Ocr/MatchesToItalicStringConverterTest.cs b/src/Test/Logic/Ocr/MatchesToItalicStringConverterTest.cs
index c620df1eb..489fe2ef4 100644
--- a/src/Test/Logic/Ocr/MatchesToItalicStringConverterTest.cs
+++ b/src/Test/Logic/Ocr/MatchesToItalicStringConverterTest.cs
@@ -107,6 +107,37 @@ namespace Test.Logic.Ocr
Assert.AreEqual("Leonard:They're here.", result);
}
+ [TestMethod]
+ public void TestItalicAndColon2()
+ {
+ var matches = new List
+ {
+ new VobSubOcr.CompareMatch("C", false, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch("A", false, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch("E", false, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch("S", false, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch("A", false, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch("R", false, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch(":", false, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch(" ", false, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch("I", true, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch(" ", true, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch("l", true, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch("i", true, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch("v", true, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch(" ", true, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch("h", true, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch("r", true, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()),
+ new VobSubOcr.CompareMatch(".", true, 0, Guid.NewGuid().ToString()),
+ };
+
+ var result = MatchesToItalicStringConverter.GetStringWithItalicTags(matches);
+ Assert.AreEqual("CAESAR: I live here.", result);
+ }
+
[TestMethod]
public void TestItalicAndBrackets()
{
diff --git a/src/ui/Logic/Ocr/MatchesToItalicStringConverter.cs b/src/ui/Logic/Ocr/MatchesToItalicStringConverter.cs
index aadce8515..c1ff4dda1 100644
--- a/src/ui/Logic/Ocr/MatchesToItalicStringConverter.cs
+++ b/src/ui/Logic/Ocr/MatchesToItalicStringConverter.cs
@@ -6,9 +6,15 @@ using System.Text;
namespace Nikse.SubtitleEdit.Logic.Ocr
{
+ public class SplitItem
+ {
+ public List Matches { get; set; }
+ public string Separator { get; set; }
+ }
+
public static class MatchesToItalicStringConverter
{
- private static readonly string[] Separators = { "-", "—", ".", "'", "\"", " ", "!", "\r", "\n", "\r\n" };
+ private static readonly string[] Separators = { "-", "—", ".", "'", "\"", " ", "\r", "\n", "\r\n" };
public static string GetStringWithItalicTags(List matches)
{
@@ -18,24 +24,34 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
}
var sb = new StringBuilder();
- foreach (var lineMatches in SplitMatchesToLines(matches))
+ foreach (var lineMatches in SplitMatchesToLineParts(matches))
{
- var numberOfLetters = GetNumberOfLetters(lineMatches);
- var numberOfItalicLetters = GetNumberOfItalicLetters(lineMatches);
+ var numberOfLetters = GetNumberOfLetters(lineMatches.Matches);
+ var numberOfItalicLetters = GetNumberOfItalicLetters(lineMatches.Matches);
if (numberOfItalicLetters == numberOfLetters || numberOfItalicLetters > 3 && numberOfLetters - numberOfItalicLetters < 2 && ItalicIsInsideWord(matches))
{
- sb.AppendLine("" + GetRawString(lineMatches) + "");
+ sb.AppendLine("" + GetRawString(lineMatches.Matches) + "");
}
else if (numberOfItalicLetters == 0 || numberOfLetters > 2 && numberOfItalicLetters < 2)
{
- sb.AppendLine(GetRawString(lineMatches));
+ sb.Append(GetRawString(lineMatches.Matches));
+ sb.Append(lineMatches.Separator);
}
else
{
- sb.AppendLine(GetStringWithItalicTagsMixed(lineMatches));
+ sb.Append(GetStringWithItalicTagsMixed(lineMatches.Matches));
+ sb.Append(lineMatches.Separator);
}
}
- return sb.ToString().TrimEnd().Replace("" + Environment.NewLine + "", Environment.NewLine);
+
+ var text = sb.ToString().TrimEnd().Replace("" + Environment.NewLine + "", Environment.NewLine);
+
+ text = text.Replace(" ", " ");
+ text = text.Replace(" ", " ");
+ text = text.Replace(" ", " ");
+ text = text.Replace(" ", " ");
+
+ return text.Trim();
}
private static bool ItalicIsInsideWord(List matches)
@@ -72,7 +88,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
for (var i = 0; i < lineMatches.Count; i++)
{
var m = lineMatches[i];
- if (m.Text == " " || m.Text == "-" || m.Text == "'" || m.Text == ":" || m.Text == "[" || m.Text == "]") // chars that allow change of italic
+ if (m.Text == " " || m.Text == "-" || m.Text == "'") // chars that allow change of italic
{
if (sbWord.Length > 0)
{
@@ -133,6 +149,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
.Replace(".", ".")
.Replace("...", "...")
.Replace("...", "...");
+
return text;
}
@@ -173,17 +190,28 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
return italicOn;
}
- private static List> SplitMatchesToLines(List matches)
+ private static List SplitMatchesToLineParts(List matches)
{
- var result = new List>();
+ var result = new List();
var line = new List();
+
foreach (var t in matches)
{
if (t.Text == Environment.NewLine)
{
if (line.Count > 0)
{
- result.Add(line);
+ result.Add(new SplitItem { Matches = line, Separator = Environment.NewLine });
+ line = new List();
+ }
+ }
+ else if (t.Text == ":" || t.Text == ")" || t.Text == "]")
+ {
+ if (line.Count > 0)
+ {
+ line.Add(t);
+
+ result.Add(new SplitItem { Matches = line, Separator = string.Empty });
line = new List();
}
}
@@ -192,10 +220,12 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
line.Add(t);
}
}
+
if (line.Count > 0)
{
- result.Add(line);
+ result.Add(new SplitItem { Matches = line, Separator = string.Empty });
}
+
return result;
}
@@ -211,7 +241,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
}
}
- return sb.ToString().Trim();
+ return sb.ToString().Replace(" ", " ").Replace(" ", " ");
}
private static int GetNumberOfLetters(List matches)