Add tests and improve 'MissingOpenBracket' fixer

The update introduces test cases for the 'MissingOpenBracket' fixer to ensure it behaves as expected. A significant re-work has also been done on the 'MissingOpenBracket' fixer, refining its handling of various scenarios to resolve potential issues more effectively. The updated fixer also checks for per line restoration and deals with 2 or more lines even if their adjacent line is not closed correctly.
This commit is contained in:
Ivandro Jao 2024-02-22 21:41:56 +00:00
parent bc1a9dc331
commit 6e715a5045
2 changed files with 154 additions and 67 deletions

View File

@ -8,6 +8,7 @@ using Nikse.SubtitleEdit.Logic;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace Test.FixCommonErrors
{
@ -3532,5 +3533,55 @@ namespace Test.FixCommonErrors
Assert.AreEqual("<i>It is I this illustrious illiteration. It's this...</i>", _subtitle.Paragraphs[0].Text);
}
}
[TestMethod]
public void FixMissingOpenBracketOneTest()
{
var engine = new FixMissingOpenBracket();
var sub = GetGenericSub();
sub.Paragraphs.First().Text = "Hey, FOO).";
engine.Fix(sub, new EmptyFixCallback());
Assert.AreEqual("(Hey, FOO).", sub.Paragraphs.First().Text);
}
[TestMethod]
public void FixMissingOpenBracketTwoTest()
{
var engine = new FixMissingOpenBracket();
var sub = GetGenericSub();
sub.Paragraphs.First().Text = "Reaper, hostiles, 100 meters\neast. Two hundred meters south).";
engine.Fix(sub, new EmptyFixCallback());
Assert.AreEqual("(Reaper, hostiles, 100 meters\neast. Two hundred meters south).", sub.Paragraphs.First().Text);
}
[TestMethod]
public void FixMissingOpenBracketThreeTest()
{
var engine = new FixMissingOpenBracket();
var sub = GetGenericSub();
sub.Paragraphs.First().Text = "- Foobar bar zzz).\n- Foo bar Zz";
engine.Fix(sub, new EmptyFixCallback());
Assert.AreEqual( "- (Foobar bar zzz).\n- Foo bar Zz", sub.Paragraphs.First().Text);
}
[TestMethod]
public void FixMissingOpenBracketFourTest()
{
var engine = new FixMissingOpenBracket();
var sub = GetGenericSub();
sub.Paragraphs.First().Text = "Foobar THIS IS A NOISE)";
engine.Fix(sub, new EmptyFixCallback());
Assert.AreEqual( "Foobar (THIS IS A NOISE)", sub.Paragraphs.First().Text);
}
private static Subtitle GetGenericSub()
{
return new Subtitle()
{
Paragraphs =
{
new Paragraph("Hello World!", 1000, 2000)
}
};
}
}
}

View File

@ -1,6 +1,7 @@
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Core.Interfaces;
using System;
using System.Linq;
namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
{
@ -11,96 +12,131 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
public static string FixMissingOpenBracket { get; set; } = "Fix missing [ or ( in line";
}
private static string Fix(string text, string openB)
{
string pre = string.Empty;
string closeB = openB == "(" ? ")" : "]";
private static bool IsIgnorable(char ch) => char.IsWhiteSpace(ch) || ch == '-';
if (text.Contains(" " + closeB))
private static char GetOpeningPair(char tag) => tag == ')' ? '(' : '[';
private static int CalcInsertPositionFromBeginning(string input)
{
var len = input.Length;
var i = 0;
// skip asa tag
while (i < len && input[i] == '{')
{
openB = openB + " ";
i = input.IndexOf('}', i + 1) + 1;
if (i == 0) break;
}
do
{
if (text.Length > 1 && text.StartsWith('-'))
{
pre += "- ";
if (text[1] == ' ')
{
text = text.Substring(2);
}
else
{
text = text.Substring(1);
}
}
if (text.Length > 3 && text.StartsWith("<i>", StringComparison.OrdinalIgnoreCase))
{
pre += "<i>";
if (text[3] == ' ')
{
text = text.Substring(4);
}
else
{
text = text.Substring(3);
}
}
if (text.Length > 1 && (text[0] == ' ' || text[0] == '.'))
{
pre += text[0] == '.' ? '.' : ' ';
text = text.Substring(1);
while (text.Length > 0 && text[0] == '.')
{
pre += ".";
text = text.Substring(1);
}
text = text.TrimStart(' ');
}
} while (text.StartsWith("<i>", StringComparison.Ordinal) || text.StartsWith('-'));
while (i < len && IsIgnorable(input[i])) i++;
text = pre + openB + text;
return text;
// skip html tags
while (i < len && input[i] == '<')
{
i = input.IndexOf('>', i + 1) + 1;
if (i == 0) break;
}
// skip anything that is not a letter or digit
while (i < len && !char.IsLetterOrDigit(input[i])) i++;
return i;
}
private static string RestoreMissingOpenParenthesis(string input)
{
var len = input.Length;
// empty string
if (len == 0) return input;
var closeTags = new[] { ']', ')' };
// ignore line if contains opening
if (input.Any(ch => ch == '(' || ch == '[')) return input;
var ci = input.IndexOfAny(closeTags);
// invalid position
if (ci < 1) return input;
var k = ci - 1;
// jump backward if uppercase or any one of the ignorable chars
while (k > 0 && char.IsUpper(input[k]) || IsIgnorable(input[k])) k--;
// note if we have case like: "Hey, FOO)." then we want to insert the open before the "Hey"
if (k > 0 && input[k] == ',')
{
k--;
while (k > 0 && char.IsLetterOrDigit(input[k])) k--;
}
// try landing on white-space char
if (k >= 0 && k + 1 < len && input[k] != ' ' && input[k + 1] == ' ') k++;
// try finding first valid char (this is used to not insert '(' or '[') in to left of a white-space
while (k < ci && char.IsWhiteSpace(input[k])) k++;
// FO) => (FO)
if (ci - k > 1)
{
input = input.Insert(k, GetOpeningPair(input[ci]).ToString());
}
else
{
// recalculate value of k from beginning
k = CalcInsertPositionFromBeginning(input);
if (k < ci)
{
input = input.Insert(k, GetOpeningPair(input[ci]).ToString());
}
}
return input;
}
private static bool IsPerLineRestoration(string[] lines) => lines.Length == 1 || lines.All(l => l.HasSentenceEnding());
public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
{
string fixAction = Language.FixMissingOpenBracket;
int fixCount = 0;
for (int i = 0; i < subtitle.Paragraphs.Count; i++)
var fixAction = Language.FixMissingOpenBracket;
var fixCount = 0;
for (var i = 0; i < subtitle.Paragraphs.Count; i++)
{
var p = subtitle.Paragraphs[i];
if (callbacks.AllowFix(p, fixAction))
{
var hit = false;
string oldText = p.Text;
var openIdx = p.Text.IndexOf('(');
var closeIdx = p.Text.IndexOf(')');
if (closeIdx >= 0 && (closeIdx < openIdx || openIdx < 0))
var oldText = p.Text;
var text = p.Text;
// split only if both lines are closed
var lines = p.Text.SplitToLines().ToArray();
// logic to perform for when line is/are closed.
if (IsPerLineRestoration(lines))
{
p.Text = Fix(p.Text, "(");
hit = true;
var count = lines.Length;
for (var j = 0; j < count; j++)
{
lines[j] = RestoreMissingOpenParenthesis(lines[j]);
}
// rebuild the text
text = count > 1 ? string.Join(Environment.NewLine, lines) : lines[0];
}
else
{
// handles 2+ lines even if the their adjacent is not closed
text = RestoreMissingOpenParenthesis(text);
}
openIdx = p.Text.IndexOf('[');
closeIdx = p.Text.IndexOf(']');
if (closeIdx >= 0 && (closeIdx < openIdx || openIdx < 0))
{
p.Text = Fix(p.Text, "[");
hit = true;
}
if (hit)
if (oldText.Length != text.Length)
{
fixCount++;
p.Text = text;
callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
}
}
}
callbacks.UpdateFixStatus(fixCount, Language.FixMissingOpenBracket);
}
}
}
}