mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-21 18:52:36 +01:00
More italic detection - thx Zoltan :)
This commit is contained in:
parent
95e4c2450b
commit
e11977d438
@ -107,6 +107,37 @@ namespace Test.Logic.Ocr
|
||||
Assert.AreEqual("Leonard:<i>They're here.</i>", result);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void TestItalicAndColon2()
|
||||
{
|
||||
var matches = new List<VobSubOcr.CompareMatch>
|
||||
{
|
||||
new VobSubOcr.CompareMatch("C", false, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch("A", false, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch("E", false, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch("S", false, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch("A", false, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch("R", false, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch(":", false, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch(" ", false, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch("I", true, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch(" ", true, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch("l", true, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch("i", true, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch("v", true, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch(" ", true, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch("h", true, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch("r", true, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch("e", true, 0, Guid.NewGuid().ToString()),
|
||||
new VobSubOcr.CompareMatch(".", true, 0, Guid.NewGuid().ToString()),
|
||||
};
|
||||
|
||||
var result = MatchesToItalicStringConverter.GetStringWithItalicTags(matches);
|
||||
Assert.AreEqual("CAESAR: <i>I live here.</i>", result);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void TestItalicAndBrackets()
|
||||
{
|
||||
|
@ -6,9 +6,15 @@ using System.Text;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
{
|
||||
public class SplitItem
|
||||
{
|
||||
public List<VobSubOcr.CompareMatch> Matches { get; set; }
|
||||
public string Separator { get; set; }
|
||||
}
|
||||
|
||||
public static class MatchesToItalicStringConverter
|
||||
{
|
||||
private static readonly string[] Separators = { "-", "—", ".", "'", "\"", " ", "!", "\r", "\n", "\r\n" };
|
||||
private static readonly string[] Separators = { "-", "—", ".", "'", "\"", " ", "\r", "\n", "\r\n" };
|
||||
|
||||
public static string GetStringWithItalicTags(List<VobSubOcr.CompareMatch> matches)
|
||||
{
|
||||
@ -18,24 +24,34 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
}
|
||||
|
||||
var sb = new StringBuilder();
|
||||
foreach (var lineMatches in SplitMatchesToLines(matches))
|
||||
foreach (var lineMatches in SplitMatchesToLineParts(matches))
|
||||
{
|
||||
var numberOfLetters = GetNumberOfLetters(lineMatches);
|
||||
var numberOfItalicLetters = GetNumberOfItalicLetters(lineMatches);
|
||||
var numberOfLetters = GetNumberOfLetters(lineMatches.Matches);
|
||||
var numberOfItalicLetters = GetNumberOfItalicLetters(lineMatches.Matches);
|
||||
if (numberOfItalicLetters == numberOfLetters || numberOfItalicLetters > 3 && numberOfLetters - numberOfItalicLetters < 2 && ItalicIsInsideWord(matches))
|
||||
{
|
||||
sb.AppendLine("<i>" + GetRawString(lineMatches) + "</i>");
|
||||
sb.AppendLine("<i>" + GetRawString(lineMatches.Matches) + "</i>");
|
||||
}
|
||||
else if (numberOfItalicLetters == 0 || numberOfLetters > 2 && numberOfItalicLetters < 2)
|
||||
{
|
||||
sb.AppendLine(GetRawString(lineMatches));
|
||||
sb.Append(GetRawString(lineMatches.Matches));
|
||||
sb.Append(lineMatches.Separator);
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.AppendLine(GetStringWithItalicTagsMixed(lineMatches));
|
||||
sb.Append(GetStringWithItalicTagsMixed(lineMatches.Matches));
|
||||
sb.Append(lineMatches.Separator);
|
||||
}
|
||||
}
|
||||
return sb.ToString().TrimEnd().Replace("</i>" + Environment.NewLine + "<i>", Environment.NewLine);
|
||||
|
||||
var text = sb.ToString().TrimEnd().Replace("</i>" + Environment.NewLine + "<i>", Environment.NewLine);
|
||||
|
||||
text = text.Replace(" ", " ");
|
||||
text = text.Replace("<i> ", " <i>");
|
||||
text = text.Replace(" </i>", "</i> ");
|
||||
text = text.Replace(" ", " ");
|
||||
|
||||
return text.Trim();
|
||||
}
|
||||
|
||||
private static bool ItalicIsInsideWord(List<VobSubOcr.CompareMatch> matches)
|
||||
@ -72,7 +88,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
for (var i = 0; i < lineMatches.Count; i++)
|
||||
{
|
||||
var m = lineMatches[i];
|
||||
if (m.Text == " " || m.Text == "-" || m.Text == "'" || m.Text == ":" || m.Text == "[" || m.Text == "]") // chars that allow change of italic
|
||||
if (m.Text == " " || m.Text == "-" || m.Text == "'") // chars that allow change of italic
|
||||
{
|
||||
if (sbWord.Length > 0)
|
||||
{
|
||||
@ -133,6 +149,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
.Replace("</i>.<i>", ".")
|
||||
.Replace("<i>...</i>", "...")
|
||||
.Replace("</i>...<i>", "...");
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
@ -173,17 +190,28 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
return italicOn;
|
||||
}
|
||||
|
||||
private static List<List<VobSubOcr.CompareMatch>> SplitMatchesToLines(List<VobSubOcr.CompareMatch> matches)
|
||||
private static List<SplitItem> SplitMatchesToLineParts(List<VobSubOcr.CompareMatch> matches)
|
||||
{
|
||||
var result = new List<List<VobSubOcr.CompareMatch>>();
|
||||
var result = new List<SplitItem>();
|
||||
var line = new List<VobSubOcr.CompareMatch>();
|
||||
|
||||
foreach (var t in matches)
|
||||
{
|
||||
if (t.Text == Environment.NewLine)
|
||||
{
|
||||
if (line.Count > 0)
|
||||
{
|
||||
result.Add(line);
|
||||
result.Add(new SplitItem { Matches = line, Separator = Environment.NewLine });
|
||||
line = new List<VobSubOcr.CompareMatch>();
|
||||
}
|
||||
}
|
||||
else if (t.Text == ":" || t.Text == ")" || t.Text == "]")
|
||||
{
|
||||
if (line.Count > 0)
|
||||
{
|
||||
line.Add(t);
|
||||
|
||||
result.Add(new SplitItem { Matches = line, Separator = string.Empty });
|
||||
line = new List<VobSubOcr.CompareMatch>();
|
||||
}
|
||||
}
|
||||
@ -192,10 +220,12 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
line.Add(t);
|
||||
}
|
||||
}
|
||||
|
||||
if (line.Count > 0)
|
||||
{
|
||||
result.Add(line);
|
||||
result.Add(new SplitItem { Matches = line, Separator = string.Empty });
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -211,7 +241,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
}
|
||||
}
|
||||
|
||||
return sb.ToString().Trim();
|
||||
return sb.ToString().Replace(" ", " ").Replace(" ", " ");
|
||||
}
|
||||
|
||||
private static int GetNumberOfLetters(List<VobSubOcr.CompareMatch> matches)
|
||||
|
Loading…
Reference in New Issue
Block a user