Fix #217 (improvement for "Fix unneeded spaces") - thx Betsy25 :)

This commit is contained in:
niksedk 2014-08-03 20:52:07 +02:00
parent decef1a960
commit 0f28edfa80
3 changed files with 280 additions and 152 deletions

View File

@ -1148,9 +1148,6 @@ namespace Nikse.SubtitleEdit.Forms
public void FixUnneededSpaces() public void FixUnneededSpaces()
{ {
const string zeroWhiteSpace = "\u200B";
const string zeroWidthNoBreakSpace = "\uFEFF";
const string noBreakSpace = "\u00A0";
const string char160 = " "; // Convert.ToChar(160).ToString() const string char160 = " "; // Convert.ToChar(160).ToString()
string fixAction = _language.UnneededSpace; string fixAction = _language.UnneededSpace;
@ -1160,155 +1157,7 @@ namespace Nikse.SubtitleEdit.Forms
Paragraph p = _subtitle.Paragraphs[i]; Paragraph p = _subtitle.Paragraphs[i];
string oldText = p.Text; string oldText = p.Text;
p.Text = p.Text.Trim(); p.Text = Utilities.RemoveUnneededSpaces(p.Text, Language);
p.Text = p.Text.Replace(zeroWhiteSpace, string.Empty);
p.Text = p.Text.Replace(zeroWidthNoBreakSpace, string.Empty);
p.Text = p.Text.Replace(noBreakSpace, string.Empty);
p.Text = p.Text.Replace(char160, " ");
p.Text = p.Text.Replace("", string.Empty); // some kind of hidden space!!!
while (p.Text.Contains(" "))
p.Text = p.Text.Replace(" ", " ");
if (p.Text.Contains(" " + Environment.NewLine))
p.Text = p.Text.Replace(" " + Environment.NewLine, Environment.NewLine);
if (p.Text.EndsWith(" "))
p.Text = p.Text.TrimEnd(' ');
p.Text = p.Text.Replace(". . ..", "...");
p.Text = p.Text.Replace(". ...", "...");
p.Text = p.Text.Replace(". .. .", "...");
p.Text = p.Text.Replace(". . .", "...");
p.Text = p.Text.Replace(". ..", "...");
p.Text = p.Text.Replace(".. .", "...");
p.Text = p.Text.Replace("....", "...");
p.Text = p.Text.Replace("....", "...");
p.Text = p.Text.Replace("....", "...");
p.Text = p.Text.Replace(" ..." + Environment.NewLine, "..." + Environment.NewLine);
p.Text = p.Text.Replace(Environment.NewLine + "... ", Environment.NewLine + "...");
if (p.Text.StartsWith("... "))
p.Text = p.Text.Remove(3, 1);
if (p.Text.EndsWith(" ..."))
p.Text = p.Text.Remove(p.Text.Length - 4, 1);
if (p.Text.EndsWith(" ...</i>"))
p.Text = p.Text.Remove(p.Text.Length - 8, 1);
if (Language != "fr") // special rules for French
{
p.Text = p.Text.Replace("... ?", "...?");
p.Text = p.Text.Replace("... !", "...!");
p.Text = p.Text.Replace(" :", ":");
p.Text = p.Text.Replace(" :", ":");
}
if (!p.Text.Contains("- ..."))
p.Text = p.Text.Replace(" ... ", "... ");
while (p.Text.Contains(" ,"))
p.Text = p.Text.Replace(" ,", ",");
if (p.Text.EndsWith(" ."))
p.Text = p.Text.Substring(0, p.Text.Length - " .".Length) + ".";
if (p.Text.EndsWith(" \""))
p.Text = p.Text.Remove(p.Text.Length - 2, 1);
if (p.Text.Contains(" \"" + Environment.NewLine))
p.Text = p.Text.Replace(" \"" + Environment.NewLine, "\"" + Environment.NewLine);
if (p.Text.Contains(" ." + Environment.NewLine))
p.Text = p.Text.Replace(" ." + Environment.NewLine, "." + Environment.NewLine);
if (Language != "fr") // special rules for French
{
if (p.Text.Contains(" !"))
p.Text = p.Text.Replace(" !", "!");
if (p.Text.Contains(" ?"))
p.Text = p.Text.Replace(" ?", "?");
}
while (p.Text.Contains("¿ "))
p.Text = p.Text.Replace("¿ ", "¿");
while (p.Text.Contains("¡ "))
p.Text = p.Text.Replace("¡ ", "¡");
if (p.Text.Contains("! </i>" + Environment.NewLine))
p.Text = p.Text.Replace("! </i>" + Environment.NewLine, "!</i>" + Environment.NewLine);
if (p.Text.Contains("? </i>" + Environment.NewLine))
p.Text = p.Text.Replace("? </i>" + Environment.NewLine, "?</i>" + Environment.NewLine);
if (p.Text.EndsWith(" </i>"))
p.Text = p.Text.Substring(0, p.Text.Length - " </i>".Length) + "</i>";
if (p.Text.Contains(" </i>" + Environment.NewLine))
p.Text = p.Text.Replace(" </i>" + Environment.NewLine, "</i>" + Environment.NewLine);
if (p.Text.EndsWith(" </I>"))
p.Text = p.Text.Substring(0, p.Text.Length - " </I>".Length) + "</I>";
if (p.Text.Contains(" </I>" + Environment.NewLine))
p.Text = p.Text.Replace(" </I>" + Environment.NewLine, "</I>" + Environment.NewLine);
if (p.Text.StartsWith("<i> "))
p.Text = "<i>" + p.Text.Substring("<i> ".Length);
if (p.Text.Contains(Environment.NewLine + "<i> "))
p.Text = p.Text.Replace(Environment.NewLine + "<i> ", Environment.NewLine + "<i>");
p.Text = p.Text.Trim();
p.Text = p.Text.Replace(Environment.NewLine + " ", Environment.NewLine);
if (p.Text.StartsWith("<I> "))
p.Text = "<I>" + p.Text.Substring("<I> ".Length);
if (p.Text.Contains(Environment.NewLine + "<I> "))
p.Text = p.Text.Replace(Environment.NewLine + "<I> ", Environment.NewLine + "<I>");
p.Text = p.Text.Trim();
p.Text = p.Text.Replace(Environment.NewLine + " ", Environment.NewLine);
if (p.Text.Contains("- ") && p.Text.Length > 5)
{
int idx = p.Text.IndexOf("- ", 2);
if (p.Text.ToLower().StartsWith("<i>"))
idx = p.Text.IndexOf("- ", 5);
while (idx > 0)
{
if (idx > 0 && idx < p.Text.Length - 2)
{
string before = string.Empty;
int k = idx - 1;
while (k >= 0 && Utilities.AllLettersAndNumbers.Contains(p.Text[k].ToString()))
{
before = p.Text[k].ToString() + before;
k--;
}
string after = string.Empty;
k = idx + 2;
while (k < p.Text.Length && Utilities.AllLetters.Contains(p.Text[k].ToString()))
{
after = after + p.Text[k].ToString();
k++;
}
if (after.Length > 0 && after.ToLower() == before.ToLower())
p.Text = p.Text.Remove(idx + 1, 1);
else if (before.Length > 0)
p.Text = p.Text.Remove(idx + 1, 1);
}
if (idx + 1 < p.Text.Length && idx != -1)
idx = p.Text.IndexOf("- ", idx + 1);
else
idx = -1;
}
}
if (p.Text.Length != oldText.Length && Utilities.CountTagInText(p.Text, " ") != Utilities.CountTagInText(oldText, " ") + Utilities.CountTagInText(oldText, char160)) if (p.Text.Length != oldText.Length && Utilities.CountTagInText(p.Text, " ") != Utilities.CountTagInText(oldText, " ") + Utilities.CountTagInText(oldText, char160))
{ {

View File

@ -4178,5 +4178,187 @@ namespace Nikse.SubtitleEdit.Logic
return sb.ToString(); return sb.ToString();
} }
/// <summary>
/// Remove unneeded spaces
/// </summary>
/// <param name="text">text string to remove unneeded spaces from</param>
/// <param name="language">two letter language id string</param>
/// <returns>text with unneeded spaces removed</returns>
public static string RemoveUnneededSpaces(string text, string language)
{
const string zeroWhiteSpace = "\u200B";
const string zeroWidthNoBreakSpace = "\uFEFF";
const string noBreakSpace = "\u00A0";
const string char160 = " "; // Convert.ToChar(160).ToString()
text = text.Trim();
text = text.Replace(zeroWhiteSpace, string.Empty);
text = text.Replace(zeroWidthNoBreakSpace, string.Empty);
text = text.Replace(noBreakSpace, string.Empty);
text = text.Replace(char160, " ");
text = text.Replace("", string.Empty); // some kind of hidden space!!!
while (text.Contains(" "))
text = text.Replace(" ", " ");
if (text.Contains(" " + Environment.NewLine))
text = text.Replace(" " + Environment.NewLine, Environment.NewLine);
if (text.EndsWith(" "))
text = text.TrimEnd(' ');
text = text.Replace(". . ..", "...");
text = text.Replace(". ...", "...");
text = text.Replace(". .. .", "...");
text = text.Replace(". . .", "...");
text = text.Replace(". ..", "...");
text = text.Replace(".. .", "...");
text = text.Replace("....", "...");
text = text.Replace("....", "...");
text = text.Replace("....", "...");
text = text.Replace(" ..." + Environment.NewLine, "..." + Environment.NewLine);
text = text.Replace(Environment.NewLine + "... ", Environment.NewLine + "...");
if (text.StartsWith("... "))
text = text.Remove(3, 1);
if (text.EndsWith(" ..."))
text = text.Remove(text.Length - 4, 1);
if (text.EndsWith(" ...</i>"))
text = text.Remove(text.Length - 8, 1);
if (language != "fr") // special rules for French
{
text = text.Replace("... ?", "...?");
text = text.Replace("... !", "...!");
text = text.Replace(" :", ":");
text = text.Replace(" :", ":");
}
if (!text.Contains("- ..."))
text = text.Replace(" ... ", "... ");
while (text.Contains(" ,"))
text = text.Replace(" ,", ",");
if (text.EndsWith(" ."))
text = text.Substring(0, text.Length - " .".Length) + ".";
if (text.EndsWith(" \""))
text = text.Remove(text.Length - 2, 1);
if (text.Contains(" \"" + Environment.NewLine))
text = text.Replace(" \"" + Environment.NewLine, "\"" + Environment.NewLine);
if (text.Contains(" ." + Environment.NewLine))
text = text.Replace(" ." + Environment.NewLine, "." + Environment.NewLine);
if (language != "fr") // special rules for French
{
if (text.Contains(" !"))
text = text.Replace(" !", "!");
if (text.Contains(" ?"))
text = text.Replace(" ?", "?");
}
while (text.Contains("¿ "))
text = text.Replace("¿ ", "¿");
while (text.Contains("¡ "))
text = text.Replace("¡ ", "¡");
if (text.Contains("! </i>" + Environment.NewLine))
text = text.Replace("! </i>" + Environment.NewLine, "!</i>" + Environment.NewLine);
if (text.Contains("? </i>" + Environment.NewLine))
text = text.Replace("? </i>" + Environment.NewLine, "?</i>" + Environment.NewLine);
if (text.EndsWith(" </i>"))
text = text.Substring(0, text.Length - " </i>".Length) + "</i>";
if (text.Contains(" </i>" + Environment.NewLine))
text = text.Replace(" </i>" + Environment.NewLine, "</i>" + Environment.NewLine);
if (text.EndsWith(" </I>"))
text = text.Substring(0, text.Length - " </I>".Length) + "</I>";
if (text.Contains(" </I>" + Environment.NewLine))
text = text.Replace(" </I>" + Environment.NewLine, "</I>" + Environment.NewLine);
if (text.StartsWith("<i> "))
text = "<i>" + text.Substring("<i> ".Length);
if (text.Contains(Environment.NewLine + "<i> "))
text = text.Replace(Environment.NewLine + "<i> ", Environment.NewLine + "<i>");
text = text.Trim();
text = text.Replace(Environment.NewLine + " ", Environment.NewLine);
if (text.StartsWith("<I> "))
text = "<I>" + text.Substring("<I> ".Length);
if (text.Contains(Environment.NewLine + "<I> "))
text = text.Replace(Environment.NewLine + "<I> ", Environment.NewLine + "<I>");
text = text.Trim();
text = text.Replace(Environment.NewLine + " ", Environment.NewLine);
if (text.Contains("- ") && text.Length > 5)
{
int idx = text.IndexOf("- ", 2);
if (text.ToLower().StartsWith("<i>"))
idx = text.IndexOf("- ", 5);
while (idx > 0)
{
if (idx > 0 && idx < text.Length - 2)
{
string before = string.Empty;
int k = idx - 1;
while (k >= 0 && Utilities.AllLettersAndNumbers.Contains(text[k].ToString()))
{
before = text[k].ToString() + before;
k--;
}
string after = string.Empty;
k = idx + 2;
while (k < text.Length && Utilities.AllLetters.Contains(text[k].ToString()))
{
after = after + text[k].ToString();
k++;
}
if (after.Length > 0 && after.ToLower() == before.ToLower())
text = text.Remove(idx + 1, 1);
else if (before.Length > 0)
{
if ((language == "en" && (after.ToLower() == "and" || after.ToLower() == "or")) ||
(language == "es" && (after.ToLower() == "y" || after.ToLower() == "o")) ||
(language == "da" && (after.ToLower() == "og" || after.ToLower() == "eller")) ||
(language == "de" && (after.ToLower() == "und" || after.ToLower() == "oder")) ||
(language == "fi" && (after.ToLower() == "ja" || after.ToLower() == "tai")) ||
(language == "fr" && (after.ToLower() == "et" || after.ToLower() == "ou")) ||
(language == "it" && (after.ToLower() == "e" || after.ToLower() == "o")) ||
(language == "nl" && (after.ToLower() == "en" || after.ToLower() == "of")) ||
(language == "pl" && (after.ToLower() == "i" || after.ToLower() == "czy")) ||
(language == "pt" && (after.ToLower() == "e" || after.ToLower() == "ou")))
{
}
else
{
text = text.Remove(idx + 1, 1);
}
}
}
if (idx + 1 < text.Length && idx != -1)
idx = text.IndexOf("- ", idx + 1);
else
idx = -1;
}
}
return text;
}
} }
} }

View File

@ -71,7 +71,104 @@ namespace Test
Assert.AreEqual(s2, "<i>Line 1." + Environment.NewLine + "Line 2.</i>"); Assert.AreEqual(s2, "<i>Line 1." + Environment.NewLine + "Line 2.</i>");
} }
[TestMethod]
[DeploymentItem("SubtitleEdit.exe")]
public void FixUnneededSpacesDoubleSpace1()
{
string s1 = "This is a test";
string s2 = Utilities.RemoveUnneededSpaces(s1, "en");
Assert.AreEqual(s2, "This is a test");
}
[TestMethod]
[DeploymentItem("SubtitleEdit.exe")]
public void FixUnneededSpacesDoubleSpace2()
{
string s1 = "This is a test ";
string s2 = Utilities.RemoveUnneededSpaces(s1, "en");
Assert.AreEqual(s2, "This is a test");
}
[TestMethod]
[DeploymentItem("SubtitleEdit.exe")]
public void FixUnneededSpacesItalics1()
{
string s1 = "<i> This is a test</i>";
string s2 = Utilities.RemoveUnneededSpaces(s1, "en");
Assert.AreEqual(s2, "<i>This is a test</i>");
}
[TestMethod]
[DeploymentItem("SubtitleEdit.exe")]
public void FixUnneededSpacesItalics2()
{
string s1 = "<i>This is a test </i>";
string s2 = Utilities.RemoveUnneededSpaces(s1, "en");
Assert.AreEqual(s2, "<i>This is a test</i>");
}
[TestMethod]
[DeploymentItem("SubtitleEdit.exe")]
public void FixUnneededSpacesHyphen1()
{
string s1 = "This is a low- budget job";
string s2 = Utilities.RemoveUnneededSpaces(s1, "en");
Assert.AreEqual(s2, "This is a low-budget job");
}
[TestMethod]
[DeploymentItem("SubtitleEdit.exe")]
public void FixUnneededSpacesHyphen2()
{
string s1 = "This is a low- budget job";
string s2 = Utilities.RemoveUnneededSpaces(s1, "en");
Assert.AreEqual(s2, "This is a low-budget job");
}
[TestMethod]
[DeploymentItem("SubtitleEdit.exe")]
public void FixUnneededSpacesHyphenDoNotChange1()
{
string s1 = "This is it - and he likes it!";
string s2 = Utilities.RemoveUnneededSpaces(s1, "en");
Assert.AreEqual(s2, s1);
}
[TestMethod]
[DeploymentItem("SubtitleEdit.exe")]
public void FixUnneededSpacesHyphenDoNotChange2()
{
string s1 = "What are your long- and altitude stats?";
string s2 = Utilities.RemoveUnneededSpaces(s1, "en");
Assert.AreEqual(s2, s1);
}
[TestMethod]
[DeploymentItem("SubtitleEdit.exe")]
public void FixUnneededSpacesHyphenDoNotChange3()
{
string s1 = "Did you buy that first- or second-handed?";
string s2 = Utilities.RemoveUnneededSpaces(s1, "en");
Assert.AreEqual(s2, s1);
}
[TestMethod]
[DeploymentItem("SubtitleEdit.exe")]
public void FixUnneededSpacesHyphenDoNotChangeDutch1()
{
string s1 = "Wat zijn je voor- en familienaam?";
string s2 = Utilities.RemoveUnneededSpaces(s1, "nl");
Assert.AreEqual(s2, s1);
}
[TestMethod]
[DeploymentItem("SubtitleEdit.exe")]
public void FixUnneededSpacesHyphenDoNotChangeDutch2()
{
string s1 = "Was het in het voor- of najaar?";
string s2 = Utilities.RemoveUnneededSpaces(s1, "nl");
Assert.AreEqual(s2, s1);
}
} }
} }