diff --git a/src/Forms/FixCommonErrors.cs b/src/Forms/FixCommonErrors.cs index 1cc829047..2a194bcdd 100644 --- a/src/Forms/FixCommonErrors.cs +++ b/src/Forms/FixCommonErrors.cs @@ -1148,9 +1148,6 @@ namespace Nikse.SubtitleEdit.Forms public void FixUnneededSpaces() { - const string zeroWhiteSpace = "\u200B"; - const string zeroWidthNoBreakSpace = "\uFEFF"; - const string noBreakSpace = "\u00A0"; const string char160 = " "; // Convert.ToChar(160).ToString() string fixAction = _language.UnneededSpace; @@ -1160,155 +1157,7 @@ namespace Nikse.SubtitleEdit.Forms Paragraph p = _subtitle.Paragraphs[i]; string oldText = p.Text; - p.Text = p.Text.Trim(); - - p.Text = p.Text.Replace(zeroWhiteSpace, string.Empty); - p.Text = p.Text.Replace(zeroWidthNoBreakSpace, string.Empty); - p.Text = p.Text.Replace(noBreakSpace, string.Empty); - p.Text = p.Text.Replace(char160, " "); - - p.Text = p.Text.Replace("", string.Empty); // some kind of hidden space!!! - while (p.Text.Contains(" ")) - p.Text = p.Text.Replace(" ", " "); - - if (p.Text.Contains(" " + Environment.NewLine)) - p.Text = p.Text.Replace(" " + Environment.NewLine, Environment.NewLine); - - if (p.Text.EndsWith(" ")) - p.Text = p.Text.TrimEnd(' '); - - p.Text = p.Text.Replace(". . ..", "..."); - p.Text = p.Text.Replace(". ...", "..."); - p.Text = p.Text.Replace(". .. .", "..."); - p.Text = p.Text.Replace(". . .", "..."); - p.Text = p.Text.Replace(". ..", "..."); - p.Text = p.Text.Replace(".. .", "..."); - p.Text = p.Text.Replace("....", "..."); - p.Text = p.Text.Replace("....", "..."); - p.Text = p.Text.Replace("....", "..."); - p.Text = p.Text.Replace(" ..." + Environment.NewLine, "..." + Environment.NewLine); - p.Text = p.Text.Replace(Environment.NewLine + "... ", Environment.NewLine + "..."); - if (p.Text.StartsWith("... ")) - p.Text = p.Text.Remove(3, 1); - if (p.Text.EndsWith(" ...")) - p.Text = p.Text.Remove(p.Text.Length - 4, 1); - if (p.Text.EndsWith(" ...")) - p.Text = p.Text.Remove(p.Text.Length - 8, 1); - - if (Language != "fr") // special rules for French - { - p.Text = p.Text.Replace("... ?", "...?"); - p.Text = p.Text.Replace("... !", "...!"); - - p.Text = p.Text.Replace(" :", ":"); - p.Text = p.Text.Replace(" :", ":"); - } - - if (!p.Text.Contains("- ...")) - p.Text = p.Text.Replace(" ... ", "... "); - - while (p.Text.Contains(" ,")) - p.Text = p.Text.Replace(" ,", ","); - - if (p.Text.EndsWith(" .")) - p.Text = p.Text.Substring(0, p.Text.Length - " .".Length) + "."; - - if (p.Text.EndsWith(" \"")) - p.Text = p.Text.Remove(p.Text.Length - 2, 1); - - if (p.Text.Contains(" \"" + Environment.NewLine)) - p.Text = p.Text.Replace(" \"" + Environment.NewLine, "\"" + Environment.NewLine); - - if (p.Text.Contains(" ." + Environment.NewLine)) - p.Text = p.Text.Replace(" ." + Environment.NewLine, "." + Environment.NewLine); - - - if (Language != "fr") // special rules for French - { - if (p.Text.Contains(" !")) - p.Text = p.Text.Replace(" !", "!"); - - if (p.Text.Contains(" ?")) - p.Text = p.Text.Replace(" ?", "?"); - } - - while (p.Text.Contains("¿ ")) - p.Text = p.Text.Replace("¿ ", "¿"); - - while (p.Text.Contains("¡ ")) - p.Text = p.Text.Replace("¡ ", "¡"); - - if (p.Text.Contains("! " + Environment.NewLine)) - p.Text = p.Text.Replace("! " + Environment.NewLine, "!" + Environment.NewLine); - - if (p.Text.Contains("? " + Environment.NewLine)) - p.Text = p.Text.Replace("? " + Environment.NewLine, "?" + Environment.NewLine); - - if (p.Text.EndsWith(" ")) - p.Text = p.Text.Substring(0, p.Text.Length - " ".Length) + ""; - - if (p.Text.Contains(" " + Environment.NewLine)) - p.Text = p.Text.Replace(" " + Environment.NewLine, "" + Environment.NewLine); - - if (p.Text.EndsWith(" ")) - p.Text = p.Text.Substring(0, p.Text.Length - " ".Length) + ""; - - if (p.Text.Contains(" " + Environment.NewLine)) - p.Text = p.Text.Replace(" " + Environment.NewLine, "" + Environment.NewLine); - - if (p.Text.StartsWith(" ")) - p.Text = "" + p.Text.Substring(" ".Length); - - if (p.Text.Contains(Environment.NewLine + " ")) - - p.Text = p.Text.Replace(Environment.NewLine + " ", Environment.NewLine + ""); - - p.Text = p.Text.Trim(); - p.Text = p.Text.Replace(Environment.NewLine + " ", Environment.NewLine); - if (p.Text.StartsWith(" ")) - p.Text = "" + p.Text.Substring(" ".Length); - - if (p.Text.Contains(Environment.NewLine + " ")) - p.Text = p.Text.Replace(Environment.NewLine + " ", Environment.NewLine + ""); - - p.Text = p.Text.Trim(); - p.Text = p.Text.Replace(Environment.NewLine + " ", Environment.NewLine); - - - if (p.Text.Contains("- ") && p.Text.Length > 5) - { - int idx = p.Text.IndexOf("- ", 2); - if (p.Text.ToLower().StartsWith("")) - idx = p.Text.IndexOf("- ", 5); - while (idx > 0) - { - if (idx > 0 && idx < p.Text.Length - 2) - { - string before = string.Empty; - int k = idx - 1; - while (k >= 0 && Utilities.AllLettersAndNumbers.Contains(p.Text[k].ToString())) - { - before = p.Text[k].ToString() + before; - k--; - } - string after = string.Empty; - k = idx + 2; - while (k < p.Text.Length && Utilities.AllLetters.Contains(p.Text[k].ToString())) - { - after = after + p.Text[k].ToString(); - k++; - } - if (after.Length > 0 && after.ToLower() == before.ToLower()) - p.Text = p.Text.Remove(idx + 1, 1); - else if (before.Length > 0) - p.Text = p.Text.Remove(idx + 1, 1); - } - if (idx + 1 < p.Text.Length && idx != -1) - idx = p.Text.IndexOf("- ", idx + 1); - else - idx = -1; - } - } + p.Text = Utilities.RemoveUnneededSpaces(p.Text, Language); if (p.Text.Length != oldText.Length && Utilities.CountTagInText(p.Text, " ") != Utilities.CountTagInText(oldText, " ") + Utilities.CountTagInText(oldText, char160)) { diff --git a/src/Logic/Utilities.cs b/src/Logic/Utilities.cs index de4b79525..515ab869a 100644 --- a/src/Logic/Utilities.cs +++ b/src/Logic/Utilities.cs @@ -4178,5 +4178,187 @@ namespace Nikse.SubtitleEdit.Logic return sb.ToString(); } + /// + /// Remove unneeded spaces + /// + /// text string to remove unneeded spaces from + /// two letter language id string + /// text with unneeded spaces removed + public static string RemoveUnneededSpaces(string text, string language) + { + const string zeroWhiteSpace = "\u200B"; + const string zeroWidthNoBreakSpace = "\uFEFF"; + const string noBreakSpace = "\u00A0"; + const string char160 = " "; // Convert.ToChar(160).ToString() + + text = text.Trim(); + + text = text.Replace(zeroWhiteSpace, string.Empty); + text = text.Replace(zeroWidthNoBreakSpace, string.Empty); + text = text.Replace(noBreakSpace, string.Empty); + text = text.Replace(char160, " "); + + text = text.Replace("", string.Empty); // some kind of hidden space!!! + while (text.Contains(" ")) + text = text.Replace(" ", " "); + + if (text.Contains(" " + Environment.NewLine)) + text = text.Replace(" " + Environment.NewLine, Environment.NewLine); + + if (text.EndsWith(" ")) + text = text.TrimEnd(' '); + + text = text.Replace(". . ..", "..."); + text = text.Replace(". ...", "..."); + text = text.Replace(". .. .", "..."); + text = text.Replace(". . .", "..."); + text = text.Replace(". ..", "..."); + text = text.Replace(".. .", "..."); + text = text.Replace("....", "..."); + text = text.Replace("....", "..."); + text = text.Replace("....", "..."); + text = text.Replace(" ..." + Environment.NewLine, "..." + Environment.NewLine); + text = text.Replace(Environment.NewLine + "... ", Environment.NewLine + "..."); + if (text.StartsWith("... ")) + text = text.Remove(3, 1); + if (text.EndsWith(" ...")) + text = text.Remove(text.Length - 4, 1); + if (text.EndsWith(" ...")) + text = text.Remove(text.Length - 8, 1); + + if (language != "fr") // special rules for French + { + text = text.Replace("... ?", "...?"); + text = text.Replace("... !", "...!"); + + text = text.Replace(" :", ":"); + text = text.Replace(" :", ":"); + } + + if (!text.Contains("- ...")) + text = text.Replace(" ... ", "... "); + + while (text.Contains(" ,")) + text = text.Replace(" ,", ","); + + if (text.EndsWith(" .")) + text = text.Substring(0, text.Length - " .".Length) + "."; + + if (text.EndsWith(" \"")) + text = text.Remove(text.Length - 2, 1); + + if (text.Contains(" \"" + Environment.NewLine)) + text = text.Replace(" \"" + Environment.NewLine, "\"" + Environment.NewLine); + + if (text.Contains(" ." + Environment.NewLine)) + text = text.Replace(" ." + Environment.NewLine, "." + Environment.NewLine); + + + if (language != "fr") // special rules for French + { + if (text.Contains(" !")) + text = text.Replace(" !", "!"); + + if (text.Contains(" ?")) + text = text.Replace(" ?", "?"); + } + + while (text.Contains("¿ ")) + text = text.Replace("¿ ", "¿"); + + while (text.Contains("¡ ")) + text = text.Replace("¡ ", "¡"); + + if (text.Contains("! " + Environment.NewLine)) + text = text.Replace("! " + Environment.NewLine, "!" + Environment.NewLine); + + if (text.Contains("? " + Environment.NewLine)) + text = text.Replace("? " + Environment.NewLine, "?" + Environment.NewLine); + + if (text.EndsWith(" ")) + text = text.Substring(0, text.Length - " ".Length) + ""; + + if (text.Contains(" " + Environment.NewLine)) + text = text.Replace(" " + Environment.NewLine, "" + Environment.NewLine); + + if (text.EndsWith(" ")) + text = text.Substring(0, text.Length - " ".Length) + ""; + + if (text.Contains(" " + Environment.NewLine)) + text = text.Replace(" " + Environment.NewLine, "" + Environment.NewLine); + + if (text.StartsWith(" ")) + text = "" + text.Substring(" ".Length); + + if (text.Contains(Environment.NewLine + " ")) + + text = text.Replace(Environment.NewLine + " ", Environment.NewLine + ""); + + text = text.Trim(); + text = text.Replace(Environment.NewLine + " ", Environment.NewLine); + if (text.StartsWith(" ")) + text = "" + text.Substring(" ".Length); + + if (text.Contains(Environment.NewLine + " ")) + text = text.Replace(Environment.NewLine + " ", Environment.NewLine + ""); + + text = text.Trim(); + text = text.Replace(Environment.NewLine + " ", Environment.NewLine); + + + if (text.Contains("- ") && text.Length > 5) + { + int idx = text.IndexOf("- ", 2); + if (text.ToLower().StartsWith("")) + idx = text.IndexOf("- ", 5); + while (idx > 0) + { + if (idx > 0 && idx < text.Length - 2) + { + string before = string.Empty; + int k = idx - 1; + while (k >= 0 && Utilities.AllLettersAndNumbers.Contains(text[k].ToString())) + { + before = text[k].ToString() + before; + k--; + } + string after = string.Empty; + k = idx + 2; + while (k < text.Length && Utilities.AllLetters.Contains(text[k].ToString())) + { + after = after + text[k].ToString(); + k++; + } + if (after.Length > 0 && after.ToLower() == before.ToLower()) + text = text.Remove(idx + 1, 1); + else if (before.Length > 0) + { + if ((language == "en" && (after.ToLower() == "and" || after.ToLower() == "or")) || + (language == "es" && (after.ToLower() == "y" || after.ToLower() == "o")) || + (language == "da" && (after.ToLower() == "og" || after.ToLower() == "eller")) || + (language == "de" && (after.ToLower() == "und" || after.ToLower() == "oder")) || + (language == "fi" && (after.ToLower() == "ja" || after.ToLower() == "tai")) || + (language == "fr" && (after.ToLower() == "et" || after.ToLower() == "ou")) || + (language == "it" && (after.ToLower() == "e" || after.ToLower() == "o")) || + (language == "nl" && (after.ToLower() == "en" || after.ToLower() == "of")) || + (language == "pl" && (after.ToLower() == "i" || after.ToLower() == "czy")) || + (language == "pt" && (after.ToLower() == "e" || after.ToLower() == "ou"))) + { + } + else + { + text = text.Remove(idx + 1, 1); + } + } + } + if (idx + 1 < text.Length && idx != -1) + idx = text.IndexOf("- ", idx + 1); + else + idx = -1; + } + } + return text; + } + } } \ No newline at end of file diff --git a/src/Test/UtilitiesTest.cs b/src/Test/UtilitiesTest.cs index 09ed98263..48d073ac4 100644 --- a/src/Test/UtilitiesTest.cs +++ b/src/Test/UtilitiesTest.cs @@ -71,7 +71,104 @@ namespace Test Assert.AreEqual(s2, "Line 1." + Environment.NewLine + "Line 2."); } + [TestMethod] + [DeploymentItem("SubtitleEdit.exe")] + public void FixUnneededSpacesDoubleSpace1() + { + string s1 = "This is a test"; + string s2 = Utilities.RemoveUnneededSpaces(s1, "en"); + Assert.AreEqual(s2, "This is a test"); + } + [TestMethod] + [DeploymentItem("SubtitleEdit.exe")] + public void FixUnneededSpacesDoubleSpace2() + { + string s1 = "This is a test "; + string s2 = Utilities.RemoveUnneededSpaces(s1, "en"); + Assert.AreEqual(s2, "This is a test"); + } + [TestMethod] + [DeploymentItem("SubtitleEdit.exe")] + public void FixUnneededSpacesItalics1() + { + string s1 = " This is a test"; + string s2 = Utilities.RemoveUnneededSpaces(s1, "en"); + Assert.AreEqual(s2, "This is a test"); + } + + [TestMethod] + [DeploymentItem("SubtitleEdit.exe")] + public void FixUnneededSpacesItalics2() + { + string s1 = "This is a test "; + string s2 = Utilities.RemoveUnneededSpaces(s1, "en"); + Assert.AreEqual(s2, "This is a test"); + } + + [TestMethod] + [DeploymentItem("SubtitleEdit.exe")] + public void FixUnneededSpacesHyphen1() + { + string s1 = "This is a low- budget job"; + string s2 = Utilities.RemoveUnneededSpaces(s1, "en"); + Assert.AreEqual(s2, "This is a low-budget job"); + } + + [TestMethod] + [DeploymentItem("SubtitleEdit.exe")] + public void FixUnneededSpacesHyphen2() + { + string s1 = "This is a low- budget job"; + string s2 = Utilities.RemoveUnneededSpaces(s1, "en"); + Assert.AreEqual(s2, "This is a low-budget job"); + } + + [TestMethod] + [DeploymentItem("SubtitleEdit.exe")] + public void FixUnneededSpacesHyphenDoNotChange1() + { + string s1 = "This is it - and he likes it!"; + string s2 = Utilities.RemoveUnneededSpaces(s1, "en"); + Assert.AreEqual(s2, s1); + } + + [TestMethod] + [DeploymentItem("SubtitleEdit.exe")] + public void FixUnneededSpacesHyphenDoNotChange2() + { + string s1 = "What are your long- and altitude stats?"; + string s2 = Utilities.RemoveUnneededSpaces(s1, "en"); + Assert.AreEqual(s2, s1); + } + + [TestMethod] + [DeploymentItem("SubtitleEdit.exe")] + public void FixUnneededSpacesHyphenDoNotChange3() + { + string s1 = "Did you buy that first- or second-handed?"; + string s2 = Utilities.RemoveUnneededSpaces(s1, "en"); + Assert.AreEqual(s2, s1); + } + + [TestMethod] + [DeploymentItem("SubtitleEdit.exe")] + public void FixUnneededSpacesHyphenDoNotChangeDutch1() + { + string s1 = "Wat zijn je voor- en familienaam?"; + string s2 = Utilities.RemoveUnneededSpaces(s1, "nl"); + Assert.AreEqual(s2, s1); + } + + [TestMethod] + [DeploymentItem("SubtitleEdit.exe")] + public void FixUnneededSpacesHyphenDoNotChangeDutch2() + { + string s1 = "Was het in het voor- of najaar?"; + string s2 = Utilities.RemoveUnneededSpaces(s1, "nl"); + Assert.AreEqual(s2, s1); + } + } }