using System; using System.Collections.Generic; using System.Globalization; using System.Text.RegularExpressions; namespace Nikse.SubtitleEdit.Core.Forms { public class InterjectionRemoveContext { /// /// True if interjection should be removed only if exists in separeted line otherwise false. /// public bool OnlySeparetedLines { get; set; } /// /// The check list that will be used to check interjections. /// public IList Interjections { get; set; } /// /// Text from which the interjections will be removed from. /// public string Text { get; set; } } public class RemoveInterjection { // https://github.com/SubtitleEdit/subtitleedit/issues/1421 private IList _ignoreList = new List(); public string Invoke(InterjectionRemoveContext context) { if (string.IsNullOrWhiteSpace(context.Text)) { return context.Text; } string text = context.Text; string oldText = text; bool doRepeat = true; while (doRepeat) { doRepeat = false; foreach (string s in context.Interjections) { if (text.Contains(s)) { var regex = new Regex("\\b" + Regex.Escape(s) + "\\b"); var match = regex.Match(text); if (match.Success) { int index = match.Index; string temp = text.Remove(index, s.Length); if (index == 0 && temp.StartsWith("... ", StringComparison.Ordinal)) { temp = temp.Remove(0, 4); } if (index == 3 && temp.StartsWith("... ", StringComparison.Ordinal)) { temp = temp.Remove(3, 4); } if (index > 2 && " \r\n".Contains(text.Substring(index - 1, 1)) && temp.Substring(index).StartsWith("... ", StringComparison.Ordinal)) { temp = temp.Remove(index, 4); } if (index > 4 && temp.Substring(index - 4).StartsWith("\n... ", StringComparison.Ordinal)) { temp = temp.Remove(index, 4); } if (temp.Remove(0, index) == " —" && temp.EndsWith("— —", StringComparison.Ordinal)) { temp = temp.Remove(temp.Length - 3); if (temp.EndsWith(Environment.NewLine + "—", StringComparison.Ordinal)) { temp = temp.Remove(temp.Length - 1).TrimEnd(); } } else if (temp.Remove(0, index) == " —" && temp.EndsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(temp.Length - 3); if (temp.EndsWith(Environment.NewLine + "-", StringComparison.Ordinal)) { temp = temp.Remove(temp.Length - 1).TrimEnd(); } } else if (index == 2 && temp.StartsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(2, 2); } else if (index == 2 && temp.StartsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(2, 1); } else if (index == 0 && temp.StartsWith(" —", StringComparison.Ordinal)) { temp = temp.Remove(0, 2); } else if (index == 0 && temp.StartsWith('—')) { temp = temp.Remove(0, 1); } else if (index > 3 && (temp.Substring(index - 2) == ". —" || temp.Substring(index - 2) == "! —" || temp.Substring(index - 2) == "? —")) { temp = temp.Remove(index - 2, 1).Replace(" ", " "); } string pre = string.Empty; if (index > 0) { doRepeat = true; } bool removeAfter = true; if (index > s.Length) { if (temp.Length > index - s.Length + 3) { int subIndex = index - s.Length + 1; string subTemp = temp.Substring(subIndex, 3); if (subTemp == ", !" || subTemp == ", ?" || subTemp == ", .") { temp = temp.Remove(subIndex, 2); removeAfter = false; } else if (subIndex > 3 && ".!?".Contains(temp.Substring(subIndex - 1, 1))) { subTemp = temp.Substring(subIndex); if (subTemp == " ..." || subTemp.StartsWith(" ..." + Environment.NewLine, StringComparison.InvariantCulture)) { temp = temp.Remove(subIndex, 4).Trim(); removeAfter = false; } } } if (removeAfter && temp.Length > index - s.Length + 2) { int subIndex = index - s.Length; string subTemp = temp.Substring(subIndex, 3); if (subTemp == ", !" || subTemp == ", ?" || subTemp == ", .") { temp = temp.Remove(subIndex, 2); removeAfter = false; } else { subTemp = temp.Substring(subIndex); if (subTemp.StartsWith(", -—", StringComparison.Ordinal)) { temp = temp.Remove(subIndex, 3); removeAfter = false; } else if (subTemp.StartsWith(", --", StringComparison.Ordinal)) { temp = temp.Remove(subIndex, 2); removeAfter = false; } else if (index > 2 && subTemp.StartsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(subIndex + 2, 2).Replace(" ", " "); removeAfter = false; } } } if (removeAfter && temp.Length > index - s.Length + 2) { int subIndex = index - s.Length + 1; string subTemp = temp.Substring(subIndex, 2); if (subTemp == "-!" || subTemp == "-?" || subTemp == "-.") { temp = temp.Remove(subIndex, 1); removeAfter = false; } subTemp = temp.Substring(subIndex); if (subTemp == " !" || subTemp == " ?" || subTemp == " .") { temp = temp.Remove(subIndex, 1); removeAfter = false; } } } if (index > 3 && index - 2 < temp.Length) { string subTemp = temp.Substring(index - 2); if (subTemp.StartsWith(", —", StringComparison.Ordinal) || subTemp.StartsWith(", —", StringComparison.Ordinal)) { temp = temp.Remove(index - 2, 1); index--; } if (subTemp.StartsWith("- ...", StringComparison.Ordinal)) { removeAfter = false; } } if (removeAfter) { if (index == 0) { if (temp.StartsWith('-')) { temp = temp.Remove(0, 1).Trim(); } } else if (index == 3 && temp.StartsWith("-", StringComparison.Ordinal)) { temp = temp.Remove(3, 1); } else if (index > 0 && temp.Length > index) { pre = text.Substring(0, index); temp = temp.Remove(0, index); if (temp.StartsWith('-') && pre.EndsWith('-')) { temp = temp.Remove(0, 1); } if (temp.StartsWith('-') && pre.EndsWith("- ", StringComparison.Ordinal)) { temp = temp.Remove(0, 1); } } if (temp.StartsWith("...", StringComparison.Ordinal)) { pre = pre.Trim(); } else { while (temp.Length > 0 && " ,.?!".Contains(temp[0])) { temp = temp.Remove(0, 1); doRepeat = true; } } if (temp.Length > 0 && s[0].ToString(CultureInfo.InvariantCulture) != s[0].ToString(CultureInfo.InvariantCulture).ToLowerInvariant()) { temp = char.ToUpper(temp[0]) + temp.Substring(1); doRepeat = true; } if (temp.StartsWith('-') && pre.EndsWith(' ')) { temp = temp.Remove(0, 1); } if (temp.StartsWith('—') && pre.EndsWith(',')) { pre = pre.TrimEnd(',') + " "; } temp = pre + temp; } if (temp.EndsWith(Environment.NewLine + "- ", StringComparison.Ordinal)) { temp = temp.Remove(temp.Length - 2).TrimEnd(); } var st = new StrippableText(temp); if (st.StrippedText.Length == 0) { return string.Empty; } if (temp.StartsWith('-') && !temp.Contains(Environment.NewLine) && text.Contains(Environment.NewLine)) { temp = temp.Remove(0, 1).Trim(); } text = temp; } } } } var lines = text.SplitToLines(); if (lines.Count == 2 && text != oldText) { if (lines[0] == "-" && lines[1] == "-") { return string.Empty; } if (lines[0].Length > 1 && lines[0][0] == '-' && lines[1].Trim() == "-") { return lines[0].Remove(0, 1).Trim(); } if (lines[1].Length > 1 && lines[1][0] == '-' && lines[0].Trim() == "-") { return lines[1].Remove(0, 1).Trim(); } if (lines[1].Length > 4 && lines[1].StartsWith("-", StringComparison.Ordinal) && lines[0].Trim() == "-") { return "" + lines[1].Remove(0, 4).Trim(); } if (lines[0].Length > 1 && lines[1] == "-" || lines[1] == "." || lines[1] == "!" || lines[1] == "?") { if (lines[0].StartsWith('-') && oldText.Contains(Environment.NewLine + "-")) { lines[0] = lines[0].Remove(0, 1); } return lines[0].Trim(); } var noTags0 = HtmlUtil.RemoveHtmlTags(lines[0]).Trim(); var noTags1 = HtmlUtil.RemoveHtmlTags(lines[1]).Trim(); if (noTags0 == "-") { if (noTags1 == noTags0) { return string.Empty; } if (lines[1].Length > 1 && lines[1][0] == '-') { return lines[1].Remove(0, 1).Trim(); } if (lines[1].Length > 4 && lines[1].StartsWith("-", StringComparison.Ordinal)) { return "" + lines[1].Remove(0, 4).Trim(); } return lines[1]; } if (noTags1 == "-") { if (lines[0].Length > 1 && lines[0][0] == '-') { return lines[0].Remove(0, 1).Trim(); } if (lines[0].Length > 4 && lines[0].StartsWith("-", StringComparison.Ordinal)) { if (!lines[0].Contains("") && lines[1].Contains("")) { return "" + lines[0].Remove(0, 4).Trim() + ""; } return "" + lines[0].Remove(0, 4).Trim(); } return lines[0]; } } if (lines.Count == 2) { if (string.IsNullOrWhiteSpace(lines[1].RemoveChar('.').RemoveChar('?').RemoveChar('!').RemoveChar('-').RemoveChar('—'))) { text = lines[0]; lines = text.SplitToLines(); } else if (string.IsNullOrWhiteSpace(lines[0].RemoveChar('.').RemoveChar('?').RemoveChar('!').RemoveChar('-').RemoveChar('—'))) { text = lines[1]; lines = text.SplitToLines(); } } if (lines.Count == 1 && text != oldText && Utilities.GetNumberOfLines(oldText) == 2) { if ((oldText.StartsWith('-') || oldText.StartsWith("-", StringComparison.Ordinal)) && (oldText.Contains("." + Environment.NewLine) || oldText.Contains("." + Environment.NewLine) || oldText.Contains("!" + Environment.NewLine) || oldText.Contains("!" + Environment.NewLine) || oldText.Contains("?" + Environment.NewLine) || oldText.Contains("?" + Environment.NewLine))) { if (text.StartsWith("-", StringComparison.Ordinal)) { text = "" + text.Remove(0, 4).TrimStart(); } else { text = text.TrimStart('-').TrimStart(); } } else if ((oldText.Contains(Environment.NewLine + "-") || oldText.Contains(Environment.NewLine + "-")) && (oldText.Contains("." + Environment.NewLine) || oldText.Contains("." + Environment.NewLine) || oldText.Contains("!" + Environment.NewLine) || oldText.Contains("!" + Environment.NewLine) || oldText.Contains("?" + Environment.NewLine) || oldText.Contains("?" + Environment.NewLine))) { if (text.StartsWith("-", StringComparison.Ordinal)) { text = "" + text.Remove(0, 4).TrimStart(); } else { text = text.TrimStart('-').TrimStart(); } } } if (oldText != text) { text = text.Replace(Environment.NewLine + "" + Environment.NewLine, Environment.NewLine + ""); text = text.Replace(Environment.NewLine + "" + Environment.NewLine, "" + Environment.NewLine); if (text.StartsWith("" + Environment.NewLine, StringComparison.Ordinal)) { text = text.Remove(3, Environment.NewLine.Length); } if (text.EndsWith(Environment.NewLine + "", StringComparison.Ordinal)) { text = text.Remove(text.Length - (Environment.NewLine.Length + 4), Environment.NewLine.Length); } text = text.Replace(Environment.NewLine + "" + Environment.NewLine, "" + Environment.NewLine); if (context.OnlySeparetedLines) { if (string.IsNullOrEmpty(text)) { return text; } var oldLines = oldText.SplitToLines(); var newLines = text.SplitToLines(); if (oldLines.Count == 2 && newLines.Count == 1 && (oldLines[0] == newLines[0] || oldLines[1] == newLines[0])) { return text; } return oldText; } } return text; } } }