mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-25 20:52:44 +01:00
Refactor line break removal using new RemoveRecursiveLineBreaks method
This commit replaces the previous method of removing repeated line breaks in the text with a new RemoveRecursiveLineBreaks method. The previous implementation consisted of a loop that executed a Replace method each time a repeated line break was found in the text. This was not an optimal solution as it resulted in unnecessary overhead due to the continuous replacement process. Now, the RemoveRecursiveLineBreaks method performs this operation more efficiently, without the need to continuously call the Replace method. The codebase has been updated to use the new method, improving execution speed and efficiency. Unit tests have also been added to confirm the functionality of this new method.
This commit is contained in:
parent
e46f4e4092
commit
48e75d2c23
@ -185,6 +185,22 @@ namespace Test.Core
|
||||
var res = input.FixExtraSpaces();
|
||||
Assert.AreEqual("a" + Environment.NewLine + "b", res);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void RemoveRecursiveLineBreakTest()
|
||||
{
|
||||
Assert.AreEqual("foo\r\nfoo", "foo\r\n\r\nfoo".RemoveRecursiveLineBreaks());
|
||||
Assert.AreEqual("foo\r\nfoo", "foo\r\nfoo".RemoveRecursiveLineBreaks());
|
||||
Assert.AreEqual("foo\r\nfoo", "foo\r\n\r\n\r\nfoo".RemoveRecursiveLineBreaks());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void RemoveRecursiveLineBreakNonWindowsStyleTest(string input)
|
||||
{
|
||||
Assert.AreEqual("foo\nfoo", "foo\nfoo".RemoveRecursiveLineBreaks());
|
||||
Assert.AreEqual("foo\n\foo", "foo\n\n\nfoo".RemoveRecursiveLineBreaks());
|
||||
Assert.AreEqual("foo\n.\nfoo", "foo\n.\n\n\nfoo".RemoveRecursiveLineBreaks());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void RemoveChar1()
|
||||
|
@ -292,6 +292,46 @@ namespace Nikse.SubtitleEdit.Core.Common
|
||||
return s;
|
||||
}
|
||||
|
||||
// note: replace both input and output variable type with ReadOnlySpan<char> when in more modern .NET
|
||||
// that will make it allocation free
|
||||
public static string RemoveRecursiveLineBreaks(this string input)
|
||||
{
|
||||
var len = input.Length;
|
||||
var writeIndex = len - 1;
|
||||
var isLineBreakAdjacent = false;
|
||||
var buffer = new char[len];
|
||||
|
||||
// windows line break style
|
||||
var hasCarriageReturn = input.Contains('\r');
|
||||
|
||||
for (int i = len - 1; i >= 0; i--)
|
||||
{
|
||||
var charAtIndex = input[i];
|
||||
// carriage return line feed
|
||||
if ((hasCarriageReturn && charAtIndex == '\r') || charAtIndex == '\n')
|
||||
{
|
||||
// line break is adjacent but we found another line break - ignore it
|
||||
if (isLineBreakAdjacent)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// write into buffer and update the flag
|
||||
buffer[writeIndex--] = charAtIndex;
|
||||
isLineBreakAdjacent = charAtIndex == '\r' || (!hasCarriageReturn && charAtIndex == '\n');
|
||||
}
|
||||
else
|
||||
{
|
||||
// write current character to the buffer and decrement the write-index
|
||||
buffer[writeIndex--] = charAtIndex;
|
||||
// update adjacent line break flag
|
||||
isLineBreakAdjacent = false;
|
||||
}
|
||||
}
|
||||
|
||||
return new string(buffer, writeIndex + 1, len - (writeIndex + 1));
|
||||
}
|
||||
|
||||
public static bool ContainsLetter(this string s)
|
||||
{
|
||||
if (s != null)
|
||||
|
@ -745,11 +745,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
{
|
||||
text = text.Replace(" " + Environment.NewLine, Environment.NewLine);
|
||||
text = text.Replace(Environment.NewLine + " ", Environment.NewLine);
|
||||
while (text.Contains(Environment.NewLine + Environment.NewLine))
|
||||
{
|
||||
text = text.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine);
|
||||
}
|
||||
|
||||
text = text.RemoveRecursiveLineBreaks();
|
||||
|
||||
if (Utilities.GetNumberOfLines(text) > 2)
|
||||
{
|
||||
text = Utilities.AutoBreakLine(text);
|
||||
@ -1042,11 +1039,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
{
|
||||
text = text.Replace(" " + Environment.NewLine, Environment.NewLine);
|
||||
text = text.Replace(Environment.NewLine + " ", Environment.NewLine);
|
||||
while (text.Contains(Environment.NewLine + Environment.NewLine))
|
||||
{
|
||||
text = text.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine);
|
||||
}
|
||||
|
||||
text = text.RemoveRecursiveLineBreaks();
|
||||
|
||||
if (Utilities.GetNumberOfLines(text) > 2)
|
||||
{
|
||||
text = Utilities.AutoBreakLine(text);
|
||||
@ -4973,11 +4967,9 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
{
|
||||
text = text.Replace(" " + Environment.NewLine, Environment.NewLine);
|
||||
text = text.Replace(Environment.NewLine + " ", Environment.NewLine);
|
||||
while (text.Contains(Environment.NewLine + Environment.NewLine))
|
||||
{
|
||||
text = text.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine);
|
||||
}
|
||||
|
||||
text = text.RemoveRecursiveLineBreaks();
|
||||
|
||||
if (Utilities.GetNumberOfLines(text) > 2)
|
||||
{
|
||||
text = Utilities.AutoBreakLine(text);
|
||||
@ -5135,10 +5127,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
{
|
||||
text = text.Replace(" " + Environment.NewLine, Environment.NewLine);
|
||||
text = text.Replace(Environment.NewLine + " ", Environment.NewLine);
|
||||
while (text.Contains(Environment.NewLine + Environment.NewLine))
|
||||
{
|
||||
text = text.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine);
|
||||
}
|
||||
text = text.RemoveRecursiveLineBreaks();
|
||||
|
||||
if (Utilities.GetNumberOfLines(text) > 2)
|
||||
{
|
||||
|
@ -445,13 +445,8 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
text = text.Replace(" " + Environment.NewLine, Environment.NewLine);
|
||||
}
|
||||
|
||||
while (text.Contains(Environment.NewLine + Environment.NewLine, StringComparison.Ordinal))
|
||||
{
|
||||
text = text.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine);
|
||||
}
|
||||
|
||||
text = text.Trim();
|
||||
|
||||
text = text.RemoveRecursiveLineBreaks().Trim();
|
||||
|
||||
var textNoAssa = Utilities.RemoveSsaTags(text, true);
|
||||
if (textNoAssa.Length == 0)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user