From 8a2f260fc5fe278f591936718c0521304e2285dc Mon Sep 17 00:00:00 2001 From: Nikolaj Olsson Date: Wed, 15 Apr 2020 18:44:54 +0200 Subject: [PATCH] Add FCE rule about commas - thx Jamakmake :) --- LanguageMaster.xml | 1 + libse/Forms/FixCommonErrors/FixCommas.cs | 56 +++++++++++++++++ libse/Language.cs | 1 + libse/LanguageDeserializer.cs | 3 + libse/LanguageStructure.cs | 1 + libse/Settings.cs | 9 +++ src/Forms/FixCommonErrors.cs | 57 +++++++++-------- .../FixCommonErrors/FixCommonErrorsTest.cs | 62 +++++++++++++++++-- 8 files changed, 157 insertions(+), 33 deletions(-) create mode 100644 libse/Forms/FixCommonErrors/FixCommas.cs diff --git a/LanguageMaster.xml b/LanguageMaster.xml index aed2af150..13890c7a3 100644 --- a/LanguageMaster.xml +++ b/LanguageMaster.xml @@ -598,6 +598,7 @@ Note: Do check free disk space. Fix invalid italic tags Remove unneeded spaces Remove unneeded periods + Fix commas Fix missing spaces Break long lines Remove line breaks in short texts with only one sentence diff --git a/libse/Forms/FixCommonErrors/FixCommas.cs b/libse/Forms/FixCommonErrors/FixCommas.cs new file mode 100644 index 000000000..01e6ef428 --- /dev/null +++ b/libse/Forms/FixCommonErrors/FixCommas.cs @@ -0,0 +1,56 @@ +using Nikse.SubtitleEdit.Core.Interfaces; +using System.Text.RegularExpressions; + +namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors +{ + public class FixCommas : IFixCommonError + { + public void Fix(Subtitle subtitle, IFixCallbacks callbacks) + { + var commaDouble = new Regex(@"([\p{L}\d\s])(,,)([\p{L}\d\s])"); + var commaTriple = new Regex(@"([\p{L}\d\s])(, *, *,)([\p{L}\d\s])"); + var commaTripleEndOfLine = new Regex(@"([\p{L}\d\s])(, *, *,)$"); + var commaWhiteSpaceBetween = new Regex(@"([\p{L}\d\s])(,\s+,)([\p{L}\d\s])"); + + string fixAction = Configuration.Settings.Language.FixCommonErrors.FixCommas; + int fixCount = 0; + for (int i = 0; i < subtitle.Paragraphs.Count; i++) + { + var p = subtitle.Paragraphs[i]; + if (p.Text.IndexOf(',') >= 0 && callbacks.AllowFix(p, fixAction)) + { + var s = p.Text; + var oldText = s; + + s = commaDouble.Replace(s, "$1,$3"); + s = commaTriple.Replace(s, "$1...$3"); + s = commaTripleEndOfLine.Replace(s, "$1..."); + s = commaWhiteSpaceBetween.Replace(s, "$1,$3"); + + while (s.Contains(",.")) + { + s = s.Replace(",.", "."); + } + + while (s.Contains(",!")) + { + s = s.Replace(",!", "!"); + } + + while (s.Contains(",?")) + { + s = s.Replace(",?", "?"); + } + + if (oldText != s) + { + fixCount++; + callbacks.AddFixToListView(p, fixAction, oldText, s); + p.Text = s; + } + } + } + callbacks.UpdateFixStatus(fixCount, Configuration.Settings.Language.FixCommonErrors.FixCommas, fixCount.ToString()); + } + } +} diff --git a/libse/Language.cs b/libse/Language.cs index edc66892c..2cf342dcc 100644 --- a/libse/Language.cs +++ b/libse/Language.cs @@ -795,6 +795,7 @@ namespace Nikse.SubtitleEdit.Core FixInvalidItalicTags = "Fix invalid italic tags", RemoveUnneededSpaces = "Remove unneeded spaces", RemoveUnneededPeriods = "Remove unneeded periods", + FixCommas = "Fix commas", FixMissingSpaces = "Fix missing spaces", BreakLongLines = "Break long lines", RemoveLineBreaks = "Remove line breaks in short texts with only one sentence", diff --git a/libse/LanguageDeserializer.cs b/libse/LanguageDeserializer.cs index 7d3831cab..aab378d36 100644 --- a/libse/LanguageDeserializer.cs +++ b/libse/LanguageDeserializer.cs @@ -1561,6 +1561,9 @@ namespace Nikse.SubtitleEdit.Core case "FixCommonErrors/RemoveUnneededPeriods": language.FixCommonErrors.RemoveUnneededPeriods = reader.Value; break; + case "FixCommonErrors/FixCommas": + language.FixCommonErrors.FixCommas = reader.Value; + break; case "FixCommonErrors/FixMissingSpaces": language.FixCommonErrors.FixMissingSpaces = reader.Value; break; diff --git a/libse/LanguageStructure.cs b/libse/LanguageStructure.cs index e6181321c..c7943ea7c 100644 --- a/libse/LanguageStructure.cs +++ b/libse/LanguageStructure.cs @@ -666,6 +666,7 @@ public string FixInvalidItalicTags { get; set; } public string RemoveUnneededSpaces { get; set; } public string RemoveUnneededPeriods { get; set; } + public string FixCommas { get; set; } public string FixMissingSpaces { get; set; } public string BreakLongLines { get; set; } public string RemoveLineBreaks { get; set; } diff --git a/libse/Settings.cs b/libse/Settings.cs index 9e2acdfd1..56143b484 100644 --- a/libse/Settings.cs +++ b/libse/Settings.cs @@ -643,6 +643,7 @@ $HorzAlign = Center public bool MergeShortLinesAllTicked { get; set; } public bool UnneededSpacesTicked { get; set; } public bool UnneededPeriodsTicked { get; set; } + public bool FixCommasTicked { get; set; } public bool MissingSpacesTicked { get; set; } public bool AddMissingQuotesTicked { get; set; } public bool Fix3PlusLinesTicked { get; set; } @@ -685,6 +686,7 @@ $HorzAlign = Center BreakLongLinesTicked = true; MergeShortLinesTicked = true; UnneededPeriodsTicked = true; + FixCommasTicked = true; UnneededSpacesTicked = true; MissingSpacesTicked = true; UppercaseIInsideLowercaseWordTicked = true; @@ -4568,6 +4570,12 @@ $HorzAlign = Center settings.CommonErrors.UnneededPeriodsTicked = Convert.ToBoolean(subNode.InnerText); } + subNode = node.SelectSingleNode("FixCommasTicked"); + if (subNode != null) + { + settings.CommonErrors.FixCommasTicked = Convert.ToBoolean(subNode.InnerText); + } + subNode = node.SelectSingleNode("MissingSpacesTicked"); if (subNode != null) { @@ -7150,6 +7158,7 @@ $HorzAlign = Center textWriter.WriteElementString("MergeShortLinesAllTicked", settings.CommonErrors.MergeShortLinesAllTicked.ToString(CultureInfo.InvariantCulture)); textWriter.WriteElementString("UnneededSpacesTicked", settings.CommonErrors.UnneededSpacesTicked.ToString(CultureInfo.InvariantCulture)); textWriter.WriteElementString("UnneededPeriodsTicked", settings.CommonErrors.UnneededPeriodsTicked.ToString(CultureInfo.InvariantCulture)); + textWriter.WriteElementString("FixCommasTicked", settings.CommonErrors.FixCommasTicked.ToString(CultureInfo.InvariantCulture)); textWriter.WriteElementString("MissingSpacesTicked", settings.CommonErrors.MissingSpacesTicked.ToString(CultureInfo.InvariantCulture)); textWriter.WriteElementString("AddMissingQuotesTicked", settings.CommonErrors.AddMissingQuotesTicked.ToString(CultureInfo.InvariantCulture)); textWriter.WriteElementString("Fix3PlusLinesTicked", settings.CommonErrors.Fix3PlusLinesTicked.ToString(CultureInfo.InvariantCulture)); diff --git a/src/Forms/FixCommonErrors.cs b/src/Forms/FixCommonErrors.cs index 31d7ca780..7674f6d6e 100644 --- a/src/Forms/FixCommonErrors.cs +++ b/src/Forms/FixCommonErrors.cs @@ -27,30 +27,31 @@ namespace Nikse.SubtitleEdit.Forms private const int IndexTooShortGap = 4; private const int IndexInvalidItalicTags = 5; private const int IndexUnneededSpaces = 6; - private const int IndexUnneededPeriods = 7; - private const int IndexMissingSpaces = 8; - private const int IndexBreakLongLines = 9; - private const int IndexMergeShortLines = 10; - private const int IndexMergeShortLinesAll = 11; - private const int IndexDoubleApostropheToQuote = 12; - private const int IndexFixMusicNotation = 13; - private const int IndexAddPeriodAfterParagraph = 14; - private const int IndexStartWithUppercaseLetterAfterParagraph = 15; - private const int IndexStartWithUppercaseLetterAfterPeriodInsideParagraph = 16; - private const int IndexStartWithUppercaseLetterAfterColon = 17; - private const int IndexAddMissingQuotes = 18; - private const int IndexFixHyphens = 19; - private const int IndexRemoveHyphensSingleLine = 20; - private const int IndexFix3PlusLines = 21; - private const int IndexFixDoubleDash = 22; - private const int IndexFixDoubleGreaterThan = 23; - private const int IndexFixContinuationStyle = 24; - private const int IndexFixMissingOpenBracket = 25; - private const int IndexFixOcrErrorsViaReplaceList = 26; - private const int IndexUppercaseIInsideLowercaseWord = 27; - private const int IndexRemoveSpaceBetweenNumbers = 28; - private const int IndexDialogsOnOneLine = 29; - private const int IndexFixEllipsesStart = 30; + private const int IndexMissingSpaces = 7; + private const int IndexUnneededPeriods = 8; + private const int IndexFixCommas = 9; + private const int IndexBreakLongLines = 10; + private const int IndexMergeShortLines = 11; + private const int IndexMergeShortLinesAll = 12; + private const int IndexDoubleApostropheToQuote = 13; + private const int IndexFixMusicNotation = 14; + private const int IndexAddPeriodAfterParagraph = 15; + private const int IndexStartWithUppercaseLetterAfterParagraph = 16; + private const int IndexStartWithUppercaseLetterAfterPeriodInsideParagraph = 17; + private const int IndexStartWithUppercaseLetterAfterColon = 18; + private const int IndexAddMissingQuotes = 19; + private const int IndexFixHyphens = 20; + private const int IndexRemoveHyphensSingleLine = 21; + private const int IndexFix3PlusLines = 22; + private const int IndexFixDoubleDash = 23; + private const int IndexFixDoubleGreaterThan = 24; + private const int IndexFixContinuationStyle = 25; + private const int IndexFixMissingOpenBracket = 26; + private const int IndexFixOcrErrorsViaReplaceList = 27; + private const int IndexUppercaseIInsideLowercaseWord = 28; + private const int IndexRemoveSpaceBetweenNumbers = 29; + private const int IndexDialogsOnOneLine = 30; + private const int IndexFixEllipsesStart = 31; private int _indexAloneLowercaseIToUppercaseIEnglish = -1; private int _turkishAnsiIndex = -1; private int _danishLetterIIndex = -1; @@ -379,8 +380,9 @@ namespace Nikse.SubtitleEdit.Forms new FixItem(_language.FixShortGaps, string.Empty, () => new FixShortGaps().Fix(Subtitle, this), ce.TooShortGapTicked), new FixItem(_language.FixInvalidItalicTags, _language.FixInvalidItalicTagsExample, () => new FixInvalidItalicTags().Fix(Subtitle, this), ce.InvalidItalicTagsTicked), new FixItem(_language.RemoveUnneededSpaces, _language.RemoveUnneededSpacesExample, () => new FixUnneededSpaces().Fix(Subtitle, this), ce.UnneededSpacesTicked), - new FixItem(_language.RemoveUnneededPeriods, _language.RemoveUnneededPeriodsExample, () => new FixUnneededPeriods().Fix(Subtitle, this), ce.UnneededPeriodsTicked), new FixItem(_language.FixMissingSpaces, _language.FixMissingSpacesExample, () => new FixMissingSpaces().Fix(Subtitle, this), ce.MissingSpacesTicked), + new FixItem(_language.RemoveUnneededPeriods, _language.RemoveUnneededPeriodsExample, () => new FixUnneededPeriods().Fix(Subtitle, this), ce.UnneededPeriodsTicked), + new FixItem(_language.FixCommas, ",, -> ,", () => new FixCommas().Fix(Subtitle, this), ce.FixCommasTicked), new FixItem(_language.BreakLongLines, string.Empty, () => new FixLongLines().Fix(Subtitle, this), ce.BreakLongLinesTicked), new FixItem(_language.RemoveLineBreaks, string.Empty, () => new FixShortLines().Fix(Subtitle, this), ce.MergeShortLinesTicked), new FixItem(_language.RemoveLineBreaksAll, string.Empty, () => new FixShortLinesAll().Fix(Subtitle, this), ce.MergeShortLinesAllTicked), @@ -454,7 +456,7 @@ namespace Nikse.SubtitleEdit.Forms } return Configuration.Settings.Language.Settings.DialogStyleDashBothLinesWithSpace; } - + public FixCommonErrors() { UiUtil.PreInitialize(this); @@ -1057,6 +1059,7 @@ namespace Nikse.SubtitleEdit.Forms ce.InvalidItalicTagsTicked = listView1.Items[IndexInvalidItalicTags].Checked; ce.UnneededSpacesTicked = listView1.Items[IndexUnneededSpaces].Checked; ce.UnneededPeriodsTicked = listView1.Items[IndexUnneededPeriods].Checked; + ce.FixCommasTicked = listView1.Items[IndexFixCommas].Checked; ce.MissingSpacesTicked = listView1.Items[IndexMissingSpaces].Checked; ce.BreakLongLinesTicked = listView1.Items[IndexBreakLongLines].Checked; ce.MergeShortLinesTicked = listView1.Items[IndexMergeShortLines].Checked; @@ -1085,7 +1088,7 @@ namespace Nikse.SubtitleEdit.Forms { ce.FixEllipsesStartTicked = listView1.Items[IndexFixEllipsesStart].Checked; } - + ce.FixMissingOpenBracketTicked = listView1.Items[IndexFixMissingOpenBracket].Checked; if (_indexAloneLowercaseIToUppercaseIEnglish >= 0) { diff --git a/src/Test/FixCommonErrors/FixCommonErrorsTest.cs b/src/Test/FixCommonErrors/FixCommonErrorsTest.cs index a95435236..908048aa6 100644 --- a/src/Test/FixCommonErrors/FixCommonErrorsTest.cs +++ b/src/Test/FixCommonErrors/FixCommonErrorsTest.cs @@ -2266,6 +2266,56 @@ namespace Test.FixCommonErrors Assert.AreEqual("안녕하세요...", sub.Paragraphs[0].Text); } + [TestMethod] + public void FixCommas1() + { + var sub = new Subtitle(); + sub.Paragraphs.Add(new Paragraph("Hi,, how are you?", 0, 1000)); + var fup = new FixCommas(); + fup.Fix(sub, new EmptyFixCallback()); + Assert.AreEqual("Hi, how are you?", sub.Paragraphs[0].Text); + } + + [TestMethod] + public void FixCommas2() + { + var sub = new Subtitle(); + sub.Paragraphs.Add(new Paragraph("Hi, , how are you?", 0, 1000)); + var fup = new FixCommas(); + fup.Fix(sub, new EmptyFixCallback()); + Assert.AreEqual("Hi, how are you?", sub.Paragraphs[0].Text); + } + + [TestMethod] + public void FixCommas3() + { + var sub = new Subtitle(); + sub.Paragraphs.Add(new Paragraph("Hi,,,", 0, 1000)); + var fup = new FixCommas(); + fup.Fix(sub, new EmptyFixCallback()); + Assert.AreEqual("Hi...", sub.Paragraphs[0].Text); + } + + [TestMethod] + public void FixCommas4() + { + var sub = new Subtitle(); + sub.Paragraphs.Add(new Paragraph("Hi,!", 0, 1000)); + var fup = new FixCommas(); + fup.Fix(sub, new EmptyFixCallback()); + Assert.AreEqual("Hi!", sub.Paragraphs[0].Text); + } + + [TestMethod] + public void FixCommas5() + { + var sub = new Subtitle(); + sub.Paragraphs.Add(new Paragraph("Hi,,, are you okay?", 0, 1000)); + var fup = new FixCommas(); + fup.Fix(sub, new EmptyFixCallback()); + Assert.AreEqual("Hi... are you okay?", sub.Paragraphs[0].Text); + } + #endregion #region Fix Danish letter "i" @@ -2465,7 +2515,7 @@ namespace Test.FixCommonErrors Assert.AreEqual("but we need to do it.", _subtitle.Paragraphs[1].Text); } } - + [TestMethod] public void FixContinuationStyle2() { @@ -2673,7 +2723,7 @@ namespace Test.FixCommonErrors Assert.AreEqual("- this right now.", _subtitle.Paragraphs[1].Text); } } - + [TestMethod] public void FixContinuationStyle15() { @@ -2699,7 +2749,7 @@ namespace Test.FixCommonErrors Assert.AreEqual("- ...this right now." + Environment.NewLine + "- You kidding me?", _subtitle.Paragraphs[1].Text); } } - + [TestMethod] public void FixContinuationStyle17() { @@ -2738,7 +2788,7 @@ namespace Test.FixCommonErrors Assert.AreEqual("To see if it works.", _subtitle.Paragraphs[1].Text); } } - + [TestMethod] public void FixContinuationStyle20() { @@ -2764,7 +2814,7 @@ namespace Test.FixCommonErrors Assert.AreEqual("...to see if it works.", _subtitle.Paragraphs[1].Text); } } - + [TestMethod] public void FixContinuationStyle22() { @@ -2908,7 +2958,7 @@ namespace Test.FixCommonErrors Assert.AreEqual("...test...", _subtitle.Paragraphs[1].Text); } } - + /*[TestMethod] public void FixContinuationStyle32() {