Merge pull request #7967 from ivandrofly/feature/fixmissingopenbracket

Add tests and improve 'MissingOpenBracket' fixer
This commit is contained in:
Nikolaj Olsson 2024-02-26 07:06:55 +01:00 committed by GitHub
commit 994ee0f165
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 154 additions and 67 deletions

View File

@ -8,6 +8,7 @@ using Nikse.SubtitleEdit.Logic;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace Test.FixCommonErrors
{
@ -3532,5 +3533,55 @@ namespace Test.FixCommonErrors
Assert.AreEqual("<i>It is I this illustrious illiteration. It's this...</i>", _subtitle.Paragraphs[0].Text);
}
}
[TestMethod]
public void FixMissingOpenBracketOneTest()
{
var engine = new FixMissingOpenBracket();
var sub = GetGenericSub();
sub.Paragraphs.First().Text = "Hey, FOO).";
engine.Fix(sub, new EmptyFixCallback());
Assert.AreEqual("(Hey, FOO).", sub.Paragraphs.First().Text);
}
[TestMethod]
public void FixMissingOpenBracketTwoTest()
{
var engine = new FixMissingOpenBracket();
var sub = GetGenericSub();
sub.Paragraphs.First().Text = "Reaper, hostiles, 100 meters\neast. Two hundred meters south).";
engine.Fix(sub, new EmptyFixCallback());
Assert.AreEqual("(Reaper, hostiles, 100 meters\neast. Two hundred meters south).", sub.Paragraphs.First().Text);
}
[TestMethod]
public void FixMissingOpenBracketThreeTest()
{
var engine = new FixMissingOpenBracket();
var sub = GetGenericSub();
sub.Paragraphs.First().Text = "- Foobar bar zzz).\n- Foo bar Zz";
engine.Fix(sub, new EmptyFixCallback());
Assert.AreEqual( "- (Foobar bar zzz).\n- Foo bar Zz", sub.Paragraphs.First().Text);
}
[TestMethod]
public void FixMissingOpenBracketFourTest()
{
var engine = new FixMissingOpenBracket();
var sub = GetGenericSub();
sub.Paragraphs.First().Text = "Foobar THIS IS A NOISE)";
engine.Fix(sub, new EmptyFixCallback());
Assert.AreEqual( "Foobar (THIS IS A NOISE)", sub.Paragraphs.First().Text);
}
private static Subtitle GetGenericSub()
{
return new Subtitle()
{
Paragraphs =
{
new Paragraph("Hello World!", 1000, 2000)
}
};
}
}
}

View File

@ -1,6 +1,7 @@
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Core.Interfaces;
using System;
using System.Linq;
namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
{
@ -11,96 +12,131 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
public static string FixMissingOpenBracket { get; set; } = "Fix missing [ or ( in line";
}
private static string Fix(string text, string openB)
{
string pre = string.Empty;
string closeB = openB == "(" ? ")" : "]";
private static bool IsIgnorable(char ch) => char.IsWhiteSpace(ch) || ch == '-';
if (text.Contains(" " + closeB))
private static char GetOpeningPair(char tag) => tag == ')' ? '(' : '[';
private static int CalcInsertPositionFromBeginning(string input)
{
var len = input.Length;
var i = 0;
// skip asa tag
while (i < len && input[i] == '{')
{
openB = openB + " ";
i = input.IndexOf('}', i + 1) + 1;
if (i == 0) break;
}
do
{
if (text.Length > 1 && text.StartsWith('-'))
{
pre += "- ";
if (text[1] == ' ')
{
text = text.Substring(2);
}
else
{
text = text.Substring(1);
}
}
if (text.Length > 3 && text.StartsWith("<i>", StringComparison.OrdinalIgnoreCase))
{
pre += "<i>";
if (text[3] == ' ')
{
text = text.Substring(4);
}
else
{
text = text.Substring(3);
}
}
if (text.Length > 1 && (text[0] == ' ' || text[0] == '.'))
{
pre += text[0] == '.' ? '.' : ' ';
text = text.Substring(1);
while (text.Length > 0 && text[0] == '.')
{
pre += ".";
text = text.Substring(1);
}
text = text.TrimStart(' ');
}
} while (text.StartsWith("<i>", StringComparison.Ordinal) || text.StartsWith('-'));
while (i < len && IsIgnorable(input[i])) i++;
text = pre + openB + text;
return text;
// skip html tags
while (i < len && input[i] == '<')
{
i = input.IndexOf('>', i + 1) + 1;
if (i == 0) break;
}
// skip anything that is not a letter or digit
while (i < len && !char.IsLetterOrDigit(input[i])) i++;
return i;
}
private static string RestoreMissingOpenParenthesis(string input)
{
var len = input.Length;
// empty string
if (len == 0) return input;
var closeTags = new[] { ']', ')' };
// ignore line if contains opening
if (input.Any(ch => ch == '(' || ch == '[')) return input;
var ci = input.IndexOfAny(closeTags);
// invalid position
if (ci < 1) return input;
var k = ci - 1;
// jump backward if uppercase or any one of the ignorable chars
while (k > 0 && char.IsUpper(input[k]) || IsIgnorable(input[k])) k--;
// note if we have case like: "Hey, FOO)." then we want to insert the open before the "Hey"
if (k > 0 && input[k] == ',')
{
k--;
while (k > 0 && char.IsLetterOrDigit(input[k])) k--;
}
// try landing on white-space char
if (k >= 0 && k + 1 < len && input[k] != ' ' && input[k + 1] == ' ') k++;
// try finding first valid char (this is used to not insert '(' or '[') in to left of a white-space
while (k < ci && char.IsWhiteSpace(input[k])) k++;
// FO) => (FO)
if (ci - k > 1)
{
input = input.Insert(k, GetOpeningPair(input[ci]).ToString());
}
else
{
// recalculate value of k from beginning
k = CalcInsertPositionFromBeginning(input);
if (k < ci)
{
input = input.Insert(k, GetOpeningPair(input[ci]).ToString());
}
}
return input;
}
private static bool IsPerLineRestoration(string[] lines) => lines.Length == 1 || lines.All(l => l.HasSentenceEnding());
public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
{
string fixAction = Language.FixMissingOpenBracket;
int fixCount = 0;
for (int i = 0; i < subtitle.Paragraphs.Count; i++)
var fixAction = Language.FixMissingOpenBracket;
var fixCount = 0;
for (var i = 0; i < subtitle.Paragraphs.Count; i++)
{
var p = subtitle.Paragraphs[i];
if (callbacks.AllowFix(p, fixAction))
{
var hit = false;
string oldText = p.Text;
var openIdx = p.Text.IndexOf('(');
var closeIdx = p.Text.IndexOf(')');
if (closeIdx >= 0 && (closeIdx < openIdx || openIdx < 0))
var oldText = p.Text;
var text = p.Text;
// split only if both lines are closed
var lines = p.Text.SplitToLines().ToArray();
// logic to perform for when line is/are closed.
if (IsPerLineRestoration(lines))
{
p.Text = Fix(p.Text, "(");
hit = true;
var count = lines.Length;
for (var j = 0; j < count; j++)
{
lines[j] = RestoreMissingOpenParenthesis(lines[j]);
}
// rebuild the text
text = count > 1 ? string.Join(Environment.NewLine, lines) : lines[0];
}
else
{
// handles 2+ lines even if the their adjacent is not closed
text = RestoreMissingOpenParenthesis(text);
}
openIdx = p.Text.IndexOf('[');
closeIdx = p.Text.IndexOf(']');
if (closeIdx >= 0 && (closeIdx < openIdx || openIdx < 0))
{
p.Text = Fix(p.Text, "[");
hit = true;
}
if (hit)
if (oldText.Length != text.Length)
{
fixCount++;
p.Text = text;
callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
}
}
}
callbacks.UpdateFixStatus(fixCount, Language.FixMissingOpenBracket);
}
}
}
}