From 6e715a5045ffc440ea501225806e17ff068f2841 Mon Sep 17 00:00:00 2001 From: Ivandro Jao Date: Thu, 22 Feb 2024 21:41:56 +0000 Subject: [PATCH] Add tests and improve 'MissingOpenBracket' fixer The update introduces test cases for the 'MissingOpenBracket' fixer to ensure it behaves as expected. A significant re-work has also been done on the 'MissingOpenBracket' fixer, refining its handling of various scenarios to resolve potential issues more effectively. The updated fixer also checks for per line restoration and deals with 2 or more lines even if their adjacent line is not closed correctly. --- .../FixCommonErrors/FixCommonErrorsTest.cs | 51 ++++++ .../FixCommonErrors/FixMissingOpenBracket.cs | 170 +++++++++++------- 2 files changed, 154 insertions(+), 67 deletions(-) diff --git a/src/Test/FixCommonErrors/FixCommonErrorsTest.cs b/src/Test/FixCommonErrors/FixCommonErrorsTest.cs index 234afef17..85d4a6578 100644 --- a/src/Test/FixCommonErrors/FixCommonErrorsTest.cs +++ b/src/Test/FixCommonErrors/FixCommonErrorsTest.cs @@ -8,6 +8,7 @@ using Nikse.SubtitleEdit.Logic; using System; using System.Collections.Generic; using System.IO; +using System.Linq; namespace Test.FixCommonErrors { @@ -3532,5 +3533,55 @@ namespace Test.FixCommonErrors Assert.AreEqual("It is I this illustrious illiteration. It's this...", _subtitle.Paragraphs[0].Text); } } + + [TestMethod] + public void FixMissingOpenBracketOneTest() + { + var engine = new FixMissingOpenBracket(); + var sub = GetGenericSub(); + sub.Paragraphs.First().Text = "Hey, FOO)."; + engine.Fix(sub, new EmptyFixCallback()); + Assert.AreEqual("(Hey, FOO).", sub.Paragraphs.First().Text); + } + + [TestMethod] + public void FixMissingOpenBracketTwoTest() + { + var engine = new FixMissingOpenBracket(); + var sub = GetGenericSub(); + sub.Paragraphs.First().Text = "Reaper, hostiles, 100 meters\neast. Two hundred meters south)."; + engine.Fix(sub, new EmptyFixCallback()); + Assert.AreEqual("(Reaper, hostiles, 100 meters\neast. Two hundred meters south).", sub.Paragraphs.First().Text); + } + + [TestMethod] + public void FixMissingOpenBracketThreeTest() + { + var engine = new FixMissingOpenBracket(); + var sub = GetGenericSub(); + sub.Paragraphs.First().Text = "- Foobar bar zzz).\n- Foo bar Zz"; + engine.Fix(sub, new EmptyFixCallback()); + Assert.AreEqual( "- (Foobar bar zzz).\n- Foo bar Zz", sub.Paragraphs.First().Text); + } + [TestMethod] + public void FixMissingOpenBracketFourTest() + { + var engine = new FixMissingOpenBracket(); + var sub = GetGenericSub(); + sub.Paragraphs.First().Text = "Foobar THIS IS A NOISE)"; + engine.Fix(sub, new EmptyFixCallback()); + Assert.AreEqual( "Foobar (THIS IS A NOISE)", sub.Paragraphs.First().Text); + } + + private static Subtitle GetGenericSub() + { + return new Subtitle() + { + Paragraphs = + { + new Paragraph("Hello World!", 1000, 2000) + } + }; + } } } diff --git a/src/libse/Forms/FixCommonErrors/FixMissingOpenBracket.cs b/src/libse/Forms/FixCommonErrors/FixMissingOpenBracket.cs index fd80939ab..4603be7b3 100644 --- a/src/libse/Forms/FixCommonErrors/FixMissingOpenBracket.cs +++ b/src/libse/Forms/FixCommonErrors/FixMissingOpenBracket.cs @@ -1,6 +1,7 @@ using Nikse.SubtitleEdit.Core.Common; using Nikse.SubtitleEdit.Core.Interfaces; using System; +using System.Linq; namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors { @@ -11,96 +12,131 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors public static string FixMissingOpenBracket { get; set; } = "Fix missing [ or ( in line"; } - private static string Fix(string text, string openB) - { - string pre = string.Empty; - string closeB = openB == "(" ? ")" : "]"; + private static bool IsIgnorable(char ch) => char.IsWhiteSpace(ch) || ch == '-'; - if (text.Contains(" " + closeB)) + private static char GetOpeningPair(char tag) => tag == ')' ? '(' : '['; + + private static int CalcInsertPositionFromBeginning(string input) + { + var len = input.Length; + var i = 0; + + // skip asa tag + while (i < len && input[i] == '{') { - openB = openB + " "; + i = input.IndexOf('}', i + 1) + 1; + if (i == 0) break; } - do - { - if (text.Length > 1 && text.StartsWith('-')) - { - pre += "- "; - if (text[1] == ' ') - { - text = text.Substring(2); - } - else - { - text = text.Substring(1); - } - } - if (text.Length > 3 && text.StartsWith("", StringComparison.OrdinalIgnoreCase)) - { - pre += ""; - if (text[3] == ' ') - { - text = text.Substring(4); - } - else - { - text = text.Substring(3); - } - } - if (text.Length > 1 && (text[0] == ' ' || text[0] == '.')) - { - pre += text[0] == '.' ? '.' : ' '; - text = text.Substring(1); - while (text.Length > 0 && text[0] == '.') - { - pre += "."; - text = text.Substring(1); - } - text = text.TrimStart(' '); - } - } while (text.StartsWith("", StringComparison.Ordinal) || text.StartsWith('-')); + while (i < len && IsIgnorable(input[i])) i++; - text = pre + openB + text; - return text; + // skip html tags + while (i < len && input[i] == '<') + { + i = input.IndexOf('>', i + 1) + 1; + if (i == 0) break; + } + + // skip anything that is not a letter or digit + while (i < len && !char.IsLetterOrDigit(input[i])) i++; + + return i; } + private static string RestoreMissingOpenParenthesis(string input) + { + var len = input.Length; + + // empty string + if (len == 0) return input; + + var closeTags = new[] { ']', ')' }; + // ignore line if contains opening + if (input.Any(ch => ch == '(' || ch == '[')) return input; + + var ci = input.IndexOfAny(closeTags); + // invalid position + if (ci < 1) return input; + + var k = ci - 1; + // jump backward if uppercase or any one of the ignorable chars + while (k > 0 && char.IsUpper(input[k]) || IsIgnorable(input[k])) k--; + + // note if we have case like: "Hey, FOO)." then we want to insert the open before the "Hey" + if (k > 0 && input[k] == ',') + { + k--; + while (k > 0 && char.IsLetterOrDigit(input[k])) k--; + } + + // try landing on white-space char + if (k >= 0 && k + 1 < len && input[k] != ' ' && input[k + 1] == ' ') k++; + // try finding first valid char (this is used to not insert '(' or '[') in to left of a white-space + while (k < ci && char.IsWhiteSpace(input[k])) k++; + + // FO) => (FO) + if (ci - k > 1) + { + input = input.Insert(k, GetOpeningPair(input[ci]).ToString()); + } + else + { + // recalculate value of k from beginning + k = CalcInsertPositionFromBeginning(input); + if (k < ci) + { + input = input.Insert(k, GetOpeningPair(input[ci]).ToString()); + } + } + + return input; + } + + private static bool IsPerLineRestoration(string[] lines) => lines.Length == 1 || lines.All(l => l.HasSentenceEnding()); + public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { - string fixAction = Language.FixMissingOpenBracket; - int fixCount = 0; - for (int i = 0; i < subtitle.Paragraphs.Count; i++) + var fixAction = Language.FixMissingOpenBracket; + var fixCount = 0; + for (var i = 0; i < subtitle.Paragraphs.Count; i++) { var p = subtitle.Paragraphs[i]; if (callbacks.AllowFix(p, fixAction)) { - var hit = false; - string oldText = p.Text; - var openIdx = p.Text.IndexOf('('); - var closeIdx = p.Text.IndexOf(')'); - if (closeIdx >= 0 && (closeIdx < openIdx || openIdx < 0)) + var oldText = p.Text; + var text = p.Text; + + // split only if both lines are closed + var lines = p.Text.SplitToLines().ToArray(); + // logic to perform for when line is/are closed. + if (IsPerLineRestoration(lines)) { - p.Text = Fix(p.Text, "("); - hit = true; + var count = lines.Length; + for (var j = 0; j < count; j++) + { + lines[j] = RestoreMissingOpenParenthesis(lines[j]); + } + + // rebuild the text + text = count > 1 ? string.Join(Environment.NewLine, lines) : lines[0]; + } + else + { + // handles 2+ lines even if the their adjacent is not closed + text = RestoreMissingOpenParenthesis(text); } - openIdx = p.Text.IndexOf('['); - closeIdx = p.Text.IndexOf(']'); - if (closeIdx >= 0 && (closeIdx < openIdx || openIdx < 0)) - { - p.Text = Fix(p.Text, "["); - hit = true; - } - - if (hit) + if (oldText.Length != text.Length) { fixCount++; + p.Text = text; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } + callbacks.UpdateFixStatus(fixCount, Language.FixMissingOpenBracket); } - } -} +} \ No newline at end of file