using System; using System.Text.RegularExpressions; namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors { public class FixUppercaseIInsideWords : IFixCommonError { private static readonly Regex ReAfterLowercaseLetter = new Regex(@"[a-zæøåäöéùáàìéóúñüéíóúñü]I", RegexOptions.Compiled); private static readonly Regex ReBeforeLowercaseLetter = new Regex(@"I[a-zæøåäöéùáàìéóúñüéíóúñü]", RegexOptions.Compiled); public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.FixUppercaseIInsideLowercaseWord; int uppercaseIsInsideLowercaseWords = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { Paragraph p = subtitle.Paragraphs[i]; string oldText = p.Text; var st = new StrippableText(p.Text); Match match = ReAfterLowercaseLetter.Match(p.Text); while (match.Success) { if (!(match.Index > 1 && p.Text.Substring(match.Index - 1, 2) == "Mc") // irish names, McDonalds etc. && p.Text[match.Index + 1] == 'I' && callbacks.AllowFix(p, fixAction)) { string word = GetWholeWord(st.StrippedText, match.Index); if (!callbacks.IsName(word)) { p.Text = p.Text.Substring(0, match.Index + 1) + "l"; if (match.Index + 2 < oldText.Length) p.Text += oldText.Substring(match.Index + 2); uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } match = match.NextMatch(); } match = ReBeforeLowercaseLetter.Match(st.StrippedText); while (match.Success) { string word = GetWholeWord(st.StrippedText, match.Index); if (!callbacks.IsName(word)) { if (callbacks.AllowFix(p, fixAction)) { if (word.Equals("internal", StringComparison.OrdinalIgnoreCase) || word.Equals("island", StringComparison.OrdinalIgnoreCase) || word.Equals("islands", StringComparison.OrdinalIgnoreCase)) { } else if (match.Index == 0) { // first letter in paragraph //too risky! - perhaps if periods is fixed at the same time... or too complicated!? //if (isLineContinuation) //{ // st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); // p.Text = st.MergedString; // uppercaseIsInsideLowercaseWords++; // AddFixToListView(p, fixAction, oldText, p.Text); //} } else { if (match.Index > 2 && st.StrippedText[match.Index - 1] == ' ') { if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - 2]) && match.Length >= 2 && Utilities.LowercaseVowels.Contains(char.ToLower(match.Value[1]))) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } else if (match.Index > Environment.NewLine.Length + 1 && Environment.NewLine.Contains(st.StrippedText[match.Index - 1])) { if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - Environment.NewLine.Length + 1]) && match.Length >= 2 && Utilities.LowercaseVowels.Contains(match.Value[1])) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } else if (match.Index > 1 && ((st.StrippedText[match.Index - 1] == '\"') || (st.StrippedText[match.Index - 1] == '\'') || (st.StrippedText[match.Index - 1] == '>') || (st.StrippedText[match.Index - 1] == '-'))) { } else { var before = '\0'; var after = '\0'; if (match.Index > 0) before = st.StrippedText[match.Index - 1]; if (match.Index < st.StrippedText.Length - 2) after = st.StrippedText[match.Index + 1]; if (before != '\0' && char.IsUpper(before) && after != '\0' && char.IsLower(after) && !Utilities.LowercaseVowels.Contains(char.ToLower(before)) && !Utilities.LowercaseVowels.Contains(after)) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "i"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } else if (@"‘’¡¿„“()[]♪'. @".Contains(before) && !Utilities.LowercaseVowels.Contains(char.ToLower(after))) { } else { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } } } match = match.NextMatch(); } } callbacks.UpdateFixStatus(uppercaseIsInsideLowercaseWords, language.FixUppercaseIInsindeLowercaseWords, language.XUppercaseIsFoundInsideLowercaseWords); } private static string GetWholeWord(string text, int index) { int start = index; while (start > 0 && !(Environment.NewLine + @" ,.!?""'=()[]/-¿¡«»“”>—").Contains(text[start - 1]) || start > 1 && text[start - 1] == '\'' && char.IsLetter(text[start - 2])) { start--; } int end = index; while (end + 1 < text.Length && !(Environment.NewLine + @" ,.!?:;""'=()[]/-«»“”<—").Contains(text[end + 1]) || end + 2 < text.Length && text[end + 1] == '\'' && char.IsLetter(text[end + 2])) { end++; } return text.Substring(start, end - start + 1); } } }