From aed741e06c36c2bbf7af29df403e1ee771c4e025 Mon Sep 17 00:00:00 2001 From: niksedk Date: Sun, 17 Oct 2010 17:39:33 +0000 Subject: [PATCH] Improved "Fix uppercase 'I' inside words" git-svn-id: https://subtitleedit.googlecode.com/svn/trunk@61 99eadd0c-20b8-1223-b5c4-2a2b2df33de2 --- src/Forms/FixCommonErrors.cs | 96 ++++++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 4 deletions(-) diff --git a/src/Forms/FixCommonErrors.cs b/src/Forms/FixCommonErrors.cs index dc493ce76..a8571a38d 100644 --- a/src/Forms/FixCommonErrors.cs +++ b/src/Forms/FixCommonErrors.cs @@ -1206,17 +1206,32 @@ namespace Nikse.SubtitleEdit.Forms LogStatus(fixAction, string.Format(_language.XMissingQuotesAdded, noOfFixes)); } + private static string GetWholeWord(string text, int index) + { + int start = index; + while (start > 0 && (Environment.NewLine + " ,.!?\"'=()/-").Contains(text[start - 1].ToString()) == false) + start--; + + int end = index; + while (end+1 < text.Length && (Environment.NewLine + " ,.!?\"'=()/-").Contains(text[end+1].ToString()) == false) + end++; + + return text.Substring(start, end - start +1); + } + public void FixUppercaseIInsideWords() { string fixAction = _language.FixUppercaseIInsideLowercaseWord; int uppercaseIsInsideLowercaseWords = 0; - Regex re = new Regex(@"[a-zæøåäöé]I", RegexOptions.Compiled); + Regex reAfterLowercaseLetter = new Regex(@"[a-zæøåäöé]I", RegexOptions.Compiled); + Regex reBeforeLowercaseLetter = new Regex(@"I[a-zæøåäöé]", RegexOptions.Compiled); + bool isLineContinuation = false; for (int i = 0; i < _subtitle.Paragraphs.Count; i++) { Paragraph p = _subtitle.Paragraphs[i]; + string oldText = p.Text; - // reg-ex - Match match = re.Match(p.Text); + Match match = reAfterLowercaseLetter.Match(p.Text); if (match.Success) { while (match.Success) @@ -1227,7 +1242,6 @@ namespace Nikse.SubtitleEdit.Forms { if (AllowFix(i + 1, fixAction)) { - string oldText = p.Text; p.Text = p.Text.Substring(0, match.Index + 1) + "l"; if (match.Index + 2 < oldText.Length) p.Text += oldText.Substring(match.Index + 2); @@ -1241,6 +1255,80 @@ namespace Nikse.SubtitleEdit.Forms match = match.NextMatch(); } } + + StripableText st = new StripableText(p.Text); + match = reBeforeLowercaseLetter.Match(st.StrippedText); + if (match.Success) + { + while (match.Success) + { + string word = GetWholeWord(st.StrippedText, match.Index); + if (!IsName(word)) + { + if (AllowFix(i + 1, fixAction)) + { + if (word.ToLower() == "internal" || + word.ToLower() == "island" || + word.ToLower() == "islands") + { + } + else if (match.Index == 0) + { // first letter in paragraph + + //too risky! - perhaps if periods is fixed at the same time... or too complicated!? + //if (isLineContinuation) + //{ + // st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); + // p.Text = st.MergedString; + // uppercaseIsInsideLowercaseWords++; + // _totalFixes++; + // AddFixToListView(p, i + 1, fixAction, oldText, p.Text); + //} + } + else + { + if (match.Index > 2 && st.StrippedText[match.Index - 1] == ' ') + { + if ((Utilities.GetLetters(true, true, true) + ",").Contains(st.StrippedText[match.Index - 2].ToString())) + { + st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); + p.Text = st.MergedString; + uppercaseIsInsideLowercaseWords++; + _totalFixes++; + AddFixToListView(p, i + 1, fixAction, oldText, p.Text); + } + } + else if (match.Index > Environment.NewLine.Length + 1 && Environment.NewLine.Contains(st.StrippedText[match.Index - 1].ToString())) + { + if ((Utilities.GetLetters(true, true, true) + ",").Contains(st.StrippedText[match.Index - (Environment.NewLine.Length + 1)].ToString())) + { + st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); + p.Text = st.MergedString; + uppercaseIsInsideLowercaseWords++; + _totalFixes++; + AddFixToListView(p, i + 1, fixAction, oldText, p.Text); + } + } + else if (match.Index > 1 && ((st.StrippedText[match.Index - 1] == '\"') || (st.StrippedText[match.Index - 1] == '>'))) + { + } + else + { + st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); + p.Text = st.MergedString; + uppercaseIsInsideLowercaseWords++; + _totalFixes++; + AddFixToListView(p, i + 1, fixAction, oldText, p.Text); + } + } + + } + } + match = match.NextMatch(); + } + } + + isLineContinuation = p.Text.Length > 0 && Utilities.GetLetters(true, true, false).Contains(p.Text[p.Text.Length - 1].ToString()); } if (uppercaseIsInsideLowercaseWords > 0) LogStatus(_language.FixUppercaseIInsindeLowercaseWords, string.Format(_language.XUppercaseIsFoundInsideLowercaseWords, uppercaseIsInsideLowercaseWords));