Refactor line break removal using new RemoveRecursiveLineBreaks method

This commit replaces the previous method of removing repeated line breaks in the text with a new RemoveRecursiveLineBreaks method. The previous implementation consisted of a loop that executed a Replace method each time a repeated line break was found in the text. This was not an optimal solution as it resulted in unnecessary overhead due to the continuous replacement process.

Now, the RemoveRecursiveLineBreaks method performs this operation more efficiently, without the need to continuously call the Replace method. The codebase has been updated to use the new method, improving execution speed and efficiency. Unit tests have also been added to confirm the functionality of this new method.
This commit is contained in:
Ivandro Jao 2023-07-23 01:51:42 +01:00
parent e46f4e4092
commit 48e75d2c23
4 changed files with 65 additions and 25 deletions

View File

@ -185,6 +185,22 @@ namespace Test.Core
var res = input.FixExtraSpaces();
Assert.AreEqual("a" + Environment.NewLine + "b", res);
}
[TestMethod]
public void RemoveRecursiveLineBreakTest()
{
Assert.AreEqual("foo\r\nfoo", "foo\r\n\r\nfoo".RemoveRecursiveLineBreaks());
Assert.AreEqual("foo\r\nfoo", "foo\r\nfoo".RemoveRecursiveLineBreaks());
Assert.AreEqual("foo\r\nfoo", "foo\r\n\r\n\r\nfoo".RemoveRecursiveLineBreaks());
}
[TestMethod]
public void RemoveRecursiveLineBreakNonWindowsStyleTest(string input)
{
Assert.AreEqual("foo\nfoo", "foo\nfoo".RemoveRecursiveLineBreaks());
Assert.AreEqual("foo\n\foo", "foo\n\n\nfoo".RemoveRecursiveLineBreaks());
Assert.AreEqual("foo\n.\nfoo", "foo\n.\n\n\nfoo".RemoveRecursiveLineBreaks());
}
[TestMethod]
public void RemoveChar1()

View File

@ -292,6 +292,46 @@ namespace Nikse.SubtitleEdit.Core.Common
return s;
}
// note: replace both input and output variable type with ReadOnlySpan<char> when in more modern .NET
// that will make it allocation free
public static string RemoveRecursiveLineBreaks(this string input)
{
var len = input.Length;
var writeIndex = len - 1;
var isLineBreakAdjacent = false;
var buffer = new char[len];
// windows line break style
var hasCarriageReturn = input.Contains('\r');
for (int i = len - 1; i >= 0; i--)
{
var charAtIndex = input[i];
// carriage return line feed
if ((hasCarriageReturn && charAtIndex == '\r') || charAtIndex == '\n')
{
// line break is adjacent but we found another line break - ignore it
if (isLineBreakAdjacent)
{
continue;
}
// write into buffer and update the flag
buffer[writeIndex--] = charAtIndex;
isLineBreakAdjacent = charAtIndex == '\r' || (!hasCarriageReturn && charAtIndex == '\n');
}
else
{
// write current character to the buffer and decrement the write-index
buffer[writeIndex--] = charAtIndex;
// update adjacent line break flag
isLineBreakAdjacent = false;
}
}
return new string(buffer, writeIndex + 1, len - (writeIndex + 1));
}
public static bool ContainsLetter(this string s)
{
if (s != null)

View File

@ -745,11 +745,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
text = text.Replace(" " + Environment.NewLine, Environment.NewLine);
text = text.Replace(Environment.NewLine + " ", Environment.NewLine);
while (text.Contains(Environment.NewLine + Environment.NewLine))
{
text = text.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine);
}
text = text.RemoveRecursiveLineBreaks();
if (Utilities.GetNumberOfLines(text) > 2)
{
text = Utilities.AutoBreakLine(text);
@ -1042,11 +1039,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
text = text.Replace(" " + Environment.NewLine, Environment.NewLine);
text = text.Replace(Environment.NewLine + " ", Environment.NewLine);
while (text.Contains(Environment.NewLine + Environment.NewLine))
{
text = text.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine);
}
text = text.RemoveRecursiveLineBreaks();
if (Utilities.GetNumberOfLines(text) > 2)
{
text = Utilities.AutoBreakLine(text);
@ -4973,11 +4967,9 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
text = text.Replace(" " + Environment.NewLine, Environment.NewLine);
text = text.Replace(Environment.NewLine + " ", Environment.NewLine);
while (text.Contains(Environment.NewLine + Environment.NewLine))
{
text = text.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine);
}
text = text.RemoveRecursiveLineBreaks();
if (Utilities.GetNumberOfLines(text) > 2)
{
text = Utilities.AutoBreakLine(text);
@ -5135,10 +5127,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
text = text.Replace(" " + Environment.NewLine, Environment.NewLine);
text = text.Replace(Environment.NewLine + " ", Environment.NewLine);
while (text.Contains(Environment.NewLine + Environment.NewLine))
{
text = text.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine);
}
text = text.RemoveRecursiveLineBreaks();
if (Utilities.GetNumberOfLines(text) > 2)
{

View File

@ -445,13 +445,8 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
text = text.Replace(" " + Environment.NewLine, Environment.NewLine);
}
while (text.Contains(Environment.NewLine + Environment.NewLine, StringComparison.Ordinal))
{
text = text.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine);
}
text = text.Trim();
text = text.RemoveRecursiveLineBreaks().Trim();
var textNoAssa = Utilities.RemoveSsaTags(text, true);
if (textNoAssa.Length == 0)
{