From df955a1e6237a106dac3bb0e975687f2ead375e1 Mon Sep 17 00:00:00 2001 From: Nikolaj Olsson Date: Fri, 12 Feb 2021 16:05:35 +0100 Subject: [PATCH] Improve fix invalid italics tags - thx uckthis :) Fix #4794 --- src/libse/Common/HtmlUtil.cs | 417 ++++++++++++++++++----------------- 1 file changed, 210 insertions(+), 207 deletions(-) diff --git a/src/libse/Common/HtmlUtil.cs b/src/libse/Common/HtmlUtil.cs index 59577dc66..7e501ad46 100644 --- a/src/libse/Common/HtmlUtil.cs +++ b/src/libse/Common/HtmlUtil.cs @@ -625,258 +625,261 @@ namespace Nikse.SubtitleEdit.Core.Common int italicBeginTagCount = Utilities.CountTagInText(text, beginTag); int italicEndTagCount = Utilities.CountTagInText(text, endTag); int noOfLines = Utilities.GetNumberOfLines(text); - if (italicBeginTagCount + italicEndTagCount > 0) + if (italicBeginTagCount + italicEndTagCount == 0) { - if (italicBeginTagCount == 1 && italicEndTagCount == 1 && text.IndexOf(beginTag, StringComparison.Ordinal) > text.IndexOf(endTag, StringComparison.Ordinal)) - { - const string pattern = "___________@"; - text = text.Replace(beginTag, pattern); - text = text.Replace(endTag, beginTag); - text = text.Replace(pattern, endTag); - } + return preTags + text; + } - if (italicBeginTagCount == 2 && italicEndTagCount == 0) + if (italicBeginTagCount == 1 && italicEndTagCount == 1 && text.IndexOf(beginTag, StringComparison.Ordinal) > text.IndexOf(endTag, StringComparison.Ordinal)) + { + const string pattern = "___________@"; + text = text.Replace(beginTag, pattern); + text = text.Replace(endTag, beginTag); + text = text.Replace(pattern, endTag); + } + + if (italicBeginTagCount == 2 && italicEndTagCount == 0) + { + int firstIndex = text.IndexOf(beginTag, StringComparison.Ordinal); + int lastIndex = text.LastIndexOf(beginTag, StringComparison.Ordinal); + int lastIndexWithNewLine = text.LastIndexOf(Environment.NewLine + beginTag, StringComparison.Ordinal) + Environment.NewLine.Length; + if (noOfLines == 2 && lastIndex == lastIndexWithNewLine && firstIndex < 2) + { + text = text.Replace(Environment.NewLine, endTag + Environment.NewLine) + endTag; + } + else + { + text = text.Remove(lastIndex, beginTag.Length).Insert(lastIndex, endTag); + } + } + + if (italicBeginTagCount == 1 && italicEndTagCount == 2) + { + int firstIndex = text.IndexOf(endTag, StringComparison.Ordinal); + if (text.StartsWith("--", StringComparison.Ordinal) || + text.StartsWith("- -", StringComparison.Ordinal) || + text.StartsWith("- -", StringComparison.Ordinal) || + text.StartsWith("- -", StringComparison.Ordinal)) + { + text = text.Remove(0, 5); + } + else if (firstIndex == 0) + { + text = text.Remove(0, 4); + } + else + { + text = text.Substring(0, firstIndex) + text.Substring(firstIndex + endTag.Length); + } + } + + if (italicBeginTagCount == 2 && italicEndTagCount == 1) + { + var lines = text.SplitToLines(); + if (lines.Count == 2 && lines[0].StartsWith(beginTag, StringComparison.Ordinal) && lines[0].EndsWith(endTag, StringComparison.Ordinal) && + lines[1].StartsWith(beginTag, StringComparison.Ordinal)) + { + text = text.TrimEnd() + endTag; + } + else { - int firstIndex = text.IndexOf(beginTag, StringComparison.Ordinal); int lastIndex = text.LastIndexOf(beginTag, StringComparison.Ordinal); - int lastIndexWithNewLine = text.LastIndexOf(Environment.NewLine + beginTag, StringComparison.Ordinal) + Environment.NewLine.Length; - if (noOfLines == 2 && lastIndex == lastIndexWithNewLine && firstIndex < 2) + if (text.Length > lastIndex + endTag.Length) { - text = text.Replace(Environment.NewLine, endTag + Environment.NewLine) + endTag; + text = text.Substring(0, lastIndex) + text.Substring(lastIndex - 1 + endTag.Length); } else { - text = text.Remove(lastIndex, beginTag.Length).Insert(lastIndex, endTag); + text = text.Substring(0, lastIndex - 1) + endTag; } } - - if (italicBeginTagCount == 1 && italicEndTagCount == 2) + if (text.StartsWith(beginTag, StringComparison.Ordinal) && text.EndsWith(endTag, StringComparison.Ordinal) && text.Contains(endTag + Environment.NewLine + beginTag)) { - int firstIndex = text.IndexOf(endTag, StringComparison.Ordinal); - if (text.StartsWith("--", StringComparison.Ordinal) || - text.StartsWith("- -", StringComparison.Ordinal) || - text.StartsWith("- -", StringComparison.Ordinal) || - text.StartsWith("- -", StringComparison.Ordinal)) - { - text = text.Remove(0, 5); - } - else if (firstIndex == 0) - { - text = text.Remove(0, 4); - } - else - { - text = text.Substring(0, firstIndex) + text.Substring(firstIndex + endTag.Length); - } + text = text.Replace(endTag + Environment.NewLine + beginTag, Environment.NewLine); + } + } + + if (italicBeginTagCount == 1 && italicEndTagCount == 0) + { + int lastIndexWithNewLine = text.LastIndexOf(Environment.NewLine + beginTag, StringComparison.Ordinal) + Environment.NewLine.Length; + int lastIndex = text.LastIndexOf(beginTag, StringComparison.Ordinal); + + if (text.StartsWith(beginTag, StringComparison.Ordinal)) + { + text += endTag; + } + else if (noOfLines == 2 && lastIndex == lastIndexWithNewLine) + { + text += endTag; + } + else + { + text = text.Replace(beginTag, string.Empty); + } + } + + if (italicBeginTagCount == 0 && italicEndTagCount == 1) + { + var cleanText = RemoveOpenCloseTags(text, TagItalic, TagBold, TagUnderline, TagCyrillicI); + bool isFixed = false; + + // Foo. + if (text.EndsWith(endTag, StringComparison.Ordinal) && !cleanText.StartsWith('-') && !cleanText.Contains(Environment.NewLine + "-")) + { + text = beginTag + text; + isFixed = true; } - if (italicBeginTagCount == 2 && italicEndTagCount == 1) + // - Foo | - Foo. + // - Bar. | - Foo. + if (!isFixed && Utilities.GetNumberOfLines(cleanText) == 2) { - var lines = text.SplitToLines(); - if (lines.Count == 2 && lines[0].StartsWith(beginTag, StringComparison.Ordinal) && lines[0].EndsWith(endTag, StringComparison.Ordinal) && - lines[1].StartsWith(beginTag, StringComparison.Ordinal)) + int newLineIndex = text.IndexOf(Environment.NewLine, StringComparison.Ordinal); + if (newLineIndex > 0) { - text = text.TrimEnd() + endTag; - } - else - { - int lastIndex = text.LastIndexOf(beginTag, StringComparison.Ordinal); - if (text.Length > lastIndex + endTag.Length) + var firstLine = text.Substring(0, newLineIndex).Trim(); + var secondLine = text.Substring(newLineIndex + 2).Trim(); + if (firstLine.EndsWith(endTag, StringComparison.Ordinal)) { - text = text.Substring(0, lastIndex) + text.Substring(lastIndex - 1 + endTag.Length); + firstLine = beginTag + firstLine; + isFixed = true; } - else + if (secondLine.EndsWith(endTag, StringComparison.Ordinal)) { - text = text.Substring(0, lastIndex - 1) + endTag; + secondLine = beginTag + secondLine; + isFixed = true; } - } - if (text.StartsWith(beginTag, StringComparison.Ordinal) && text.EndsWith(endTag, StringComparison.Ordinal) && text.Contains(endTag + Environment.NewLine + beginTag)) - { - text = text.Replace(endTag + Environment.NewLine + beginTag, Environment.NewLine); + text = firstLine + Environment.NewLine + secondLine; } } - - if (italicBeginTagCount == 1 && italicEndTagCount == 0) - { - int lastIndexWithNewLine = text.LastIndexOf(Environment.NewLine + beginTag, StringComparison.Ordinal) + Environment.NewLine.Length; - int lastIndex = text.LastIndexOf(beginTag, StringComparison.Ordinal); - - if (text.StartsWith(beginTag, StringComparison.Ordinal)) - { - text += endTag; - } - else if (noOfLines == 2 && lastIndex == lastIndexWithNewLine) - { - text += endTag; - } - else - { - text = text.Replace(beginTag, string.Empty); - } - } - - if (italicBeginTagCount == 0 && italicEndTagCount == 1) - { - var cleanText = RemoveOpenCloseTags(text, TagItalic, TagBold, TagUnderline, TagCyrillicI); - bool isFixed = false; - - // Foo. - if (text.EndsWith(endTag, StringComparison.Ordinal) && !cleanText.StartsWith('-') && !cleanText.Contains(Environment.NewLine + "-")) - { - text = beginTag + text; - isFixed = true; - } - - // - Foo | - Foo. - // - Bar. | - Foo. - if (!isFixed && Utilities.GetNumberOfLines(cleanText) == 2) - { - int newLineIndex = text.IndexOf(Environment.NewLine, StringComparison.Ordinal); - if (newLineIndex > 0) - { - var firstLine = text.Substring(0, newLineIndex).Trim(); - var secondLine = text.Substring(newLineIndex + 2).Trim(); - if (firstLine.EndsWith(endTag, StringComparison.Ordinal)) - { - firstLine = beginTag + firstLine; - isFixed = true; - } - if (secondLine.EndsWith(endTag, StringComparison.Ordinal)) - { - secondLine = beginTag + secondLine; - isFixed = true; - } - text = firstLine + Environment.NewLine + secondLine; - } - } - if (!isFixed) - { - text = text.Replace(endTag, string.Empty); - } - } - - // - foo. - // - bar. - if (italicBeginTagCount == 0 && italicEndTagCount == 2 && text.Contains(endTag + Environment.NewLine, StringComparison.Ordinal) && text.EndsWith(endTag, StringComparison.Ordinal)) + if (!isFixed) { text = text.Replace(endTag, string.Empty); - text = beginTag + text + endTag; } + } - if (italicBeginTagCount == 0 && italicEndTagCount == 2 && text.StartsWith(endTag, StringComparison.Ordinal) && text.EndsWith(endTag, StringComparison.Ordinal)) - { - int firstIndex = text.IndexOf(endTag, StringComparison.Ordinal); - text = text.Remove(firstIndex, endTag.Length).Insert(firstIndex, beginTag); - } + // - foo. + // - bar. + if (italicBeginTagCount == 0 && italicEndTagCount == 2 && text.Contains(endTag + Environment.NewLine, StringComparison.Ordinal) && text.EndsWith(endTag, StringComparison.Ordinal)) + { + text = text.Replace(endTag, string.Empty); + text = beginTag + text + endTag; + } - // Foo - // Bar - if (italicBeginTagCount == 2 && italicEndTagCount == 2 && noOfLines == 2) + if (italicBeginTagCount == 0 && italicEndTagCount == 2) + { + int firstIndex = text.IndexOf(endTag, StringComparison.Ordinal); + text = text.Remove(firstIndex, endTag.Length).Insert(firstIndex, beginTag); + } + + // Foo + // Bar + if (italicBeginTagCount == 2 && italicEndTagCount == 2 && noOfLines == 2) + { + int index = text.IndexOf(Environment.NewLine, StringComparison.Ordinal); + if (index > 0 && text.Length > index + (beginTag.Length + endTag.Length)) { - int index = text.IndexOf(Environment.NewLine, StringComparison.Ordinal); - if (index > 0 && text.Length > index + (beginTag.Length + endTag.Length)) + var firstLine = text.Substring(0, index).Trim(); + var secondLine = text.Substring(index + Environment.NewLine.Length).Trim(); + + if (firstLine.Length > 10 && firstLine.StartsWith("- ", StringComparison.Ordinal) && firstLine.EndsWith(endTag, StringComparison.Ordinal)) { - var firstLine = text.Substring(0, index).Trim(); - var secondLine = text.Substring(index + Environment.NewLine.Length).Trim(); - - if (firstLine.Length > 10 && firstLine.StartsWith("- ", StringComparison.Ordinal) && firstLine.EndsWith(endTag, StringComparison.Ordinal)) - { - text = "- " + firstLine.Remove(0, 5) + Environment.NewLine + secondLine; - text = text.Replace("- ", "- "); - index = text.IndexOf(Environment.NewLine, StringComparison.Ordinal); - firstLine = text.Substring(0, index).Trim(); - secondLine = text.Substring(index + Environment.NewLine.Length).Trim(); - } - if (secondLine.Length > 10 && secondLine.StartsWith("- ", StringComparison.Ordinal) && secondLine.EndsWith(endTag, StringComparison.Ordinal)) - { - text = firstLine + Environment.NewLine + "- " + secondLine.Remove(0, 5); - text = text.Replace("- ", "- "); - index = text.IndexOf(Environment.NewLine, StringComparison.Ordinal); - firstLine = text.Substring(0, index).Trim(); - secondLine = text.Substring(index + Environment.NewLine.Length).Trim(); - } - - if (Utilities.StartsAndEndsWithTag(firstLine, beginTag, endTag) && Utilities.StartsAndEndsWithTag(secondLine, beginTag, endTag)) - { - text = text.Replace(beginTag, string.Empty).Replace(endTag, string.Empty).Trim(); - text = beginTag + text + endTag; - } + text = "- " + firstLine.Remove(0, 5) + Environment.NewLine + secondLine; + text = text.Replace("- ", "- "); + index = text.IndexOf(Environment.NewLine, StringComparison.Ordinal); + firstLine = text.Substring(0, index).Trim(); + secondLine = text.Substring(index + Environment.NewLine.Length).Trim(); + } + if (secondLine.Length > 10 && secondLine.StartsWith("- ", StringComparison.Ordinal) && secondLine.EndsWith(endTag, StringComparison.Ordinal)) + { + text = firstLine + Environment.NewLine + "- " + secondLine.Remove(0, 5); + text = text.Replace("- ", "- "); + index = text.IndexOf(Environment.NewLine, StringComparison.Ordinal); + firstLine = text.Substring(0, index).Trim(); + secondLine = text.Substring(index + Environment.NewLine.Length).Trim(); } - //FALCONE: I didn't think
it was going to be you, - var colIdx = text.IndexOf(':'); - if (colIdx >= 0 && Utilities.CountTagInText(text, beginTag) + Utilities.CountTagInText(text, endTag) == 4 && text.Length > colIdx + 1 && !char.IsDigit(text[colIdx + 1])) + if (Utilities.StartsAndEndsWithTag(firstLine, beginTag, endTag) && Utilities.StartsAndEndsWithTag(secondLine, beginTag, endTag)) { - var firstLine = text.Substring(0, index); - var secondLine = text.Substring(index).TrimStart(); - - var secIdxCol = secondLine.IndexOf(':'); - if (secIdxCol < 0 || !Utilities.IsBetweenNumbers(secondLine, secIdxCol)) - { - var idx = firstLine.IndexOf(':'); - if (idx > 1) - { - var pre = text.Substring(0, idx + 1).TrimStart(); - text = text.Remove(0, idx + 1); - text = FixInvalidItalicTags(text).Trim(); - if (text.StartsWith(" ", StringComparison.OrdinalIgnoreCase)) - { - text = Utilities.RemoveSpaceBeforeAfterTag(text, beginTag); - } - - text = pre + " " + text; - } - } - } - } - - //- You think they're they gone? - //- That can't be. - if (italicBeginTagCount == 3 && italicEndTagCount == 1 && noOfLines == 2) - { - var newLineIdx = text.IndexOf(Environment.NewLine, StringComparison.Ordinal); - var firstLine = text.Substring(0, newLineIdx).Trim(); - var secondLine = text.Substring(newLineIdx).Trim(); - - if ((Utilities.StartsAndEndsWithTag(firstLine, beginTag, beginTag) && Utilities.StartsAndEndsWithTag(secondLine, beginTag, endTag)) || - (Utilities.StartsAndEndsWithTag(secondLine, beginTag, beginTag) && Utilities.StartsAndEndsWithTag(firstLine, beginTag, endTag))) - { - text = text.Replace(beginTag, string.Empty); - text = text.Replace(endTag, string.Empty); - text = text.Replace(" ", " ").Trim(); + text = text.Replace(beginTag, string.Empty).Replace(endTag, string.Empty).Trim(); text = beginTag + text + endTag; } } - if (noOfLines == 3) + //FALCONE: I didn't think
it was going to be you, + var colIdx = text.IndexOf(':'); + if (colIdx >= 0 && Utilities.CountTagInText(text, beginTag) + Utilities.CountTagInText(text, endTag) == 4 && text.Length > colIdx + 1 && !char.IsDigit(text[colIdx + 1])) { - var lines = text.SplitToLines(); - if ((italicBeginTagCount == 3 && italicEndTagCount == 2) || (italicBeginTagCount == 2 && italicEndTagCount == 3)) + var firstLine = text.Substring(0, index); + var secondLine = text.Substring(index).TrimStart(); + + var secIdxCol = secondLine.IndexOf(':'); + if (secIdxCol < 0 || !Utilities.IsBetweenNumbers(secondLine, secIdxCol)) { - int numberOfItalics = 0; - foreach (var line in lines) + var idx = firstLine.IndexOf(':'); + if (idx > 1) { - if (line.StartsWith(beginTag, StringComparison.Ordinal)) + var pre = text.Substring(0, idx + 1).TrimStart(); + text = text.Remove(0, idx + 1); + text = FixInvalidItalicTags(text).Trim(); + if (text.StartsWith(" ", StringComparison.OrdinalIgnoreCase)) { - numberOfItalics++; + text = Utilities.RemoveSpaceBeforeAfterTag(text, beginTag); } - if (line.EndsWith(endTag, StringComparison.Ordinal)) - { - numberOfItalics++; - } - } - if (numberOfItalics == 5) - { // fix missing tag - text = "" + text.Replace("", string.Empty).Replace("", string.Empty) + ""; + text = pre + " " + text; } } } - - text = text.Replace("", string.Empty); - text = text.Replace(" ", string.Empty); - text = text.Replace(" ", string.Empty); } + + //- You think they're they gone? + //- That can't be. + if (italicBeginTagCount == 3 && italicEndTagCount == 1 && noOfLines == 2) + { + var newLineIdx = text.IndexOf(Environment.NewLine, StringComparison.Ordinal); + var firstLine = text.Substring(0, newLineIdx).Trim(); + var secondLine = text.Substring(newLineIdx).Trim(); + + if ((Utilities.StartsAndEndsWithTag(firstLine, beginTag, beginTag) && Utilities.StartsAndEndsWithTag(secondLine, beginTag, endTag)) || + (Utilities.StartsAndEndsWithTag(secondLine, beginTag, beginTag) && Utilities.StartsAndEndsWithTag(firstLine, beginTag, endTag))) + { + text = text.Replace(beginTag, string.Empty); + text = text.Replace(endTag, string.Empty); + text = text.Replace(" ", " ").Trim(); + text = beginTag + text + endTag; + } + } + + if (noOfLines == 3) + { + var lines = text.SplitToLines(); + if ((italicBeginTagCount == 3 && italicEndTagCount == 2) || (italicBeginTagCount == 2 && italicEndTagCount == 3)) + { + int numberOfItalics = 0; + foreach (var line in lines) + { + if (line.StartsWith(beginTag, StringComparison.Ordinal)) + { + numberOfItalics++; + } + + if (line.EndsWith(endTag, StringComparison.Ordinal)) + { + numberOfItalics++; + } + } + if (numberOfItalics == 5) + { // fix missing tag + text = "" + text.Replace("", string.Empty).Replace("", string.Empty) + ""; + } + } + } + + text = text.Replace("", string.Empty); + text = text.Replace(" ", string.Empty); + text = text.Replace(" ", string.Empty); + return preTags + text; }