mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-10-28 06:52:35 +01:00
135 lines
6.3 KiB
C#
135 lines
6.3 KiB
C#
using System;
|
|
using System.Text.RegularExpressions;
|
|
using Nikse.SubtitleEdit.Core.Interfaces;
|
|
|
|
namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
|
{
|
|
public class FixUppercaseIInsideWords : IFixCommonError
|
|
{
|
|
private static readonly Regex ReAfterLowercaseLetter = new Regex(@"\p{Ll}I", RegexOptions.Compiled);
|
|
private static readonly Regex ReBeforeLowercaseLetter = new Regex(@"\p{L}I\p{Ll}", RegexOptions.Compiled);
|
|
|
|
public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
|
|
{
|
|
var l = Configuration.Settings.Language.FixCommonErrors;
|
|
string fixAction = l.FixUppercaseIInsideLowercaseWord;
|
|
var language = callbacks.Language;
|
|
int uppercaseIsInsideLowercaseWords = 0;
|
|
for (int i = 0; i < subtitle.Paragraphs.Count; i++)
|
|
{
|
|
Paragraph p = subtitle.Paragraphs[i];
|
|
string oldText = p.Text;
|
|
var st = new StrippableText(p.Text);
|
|
|
|
Match match = ReAfterLowercaseLetter.Match(st.StrippedText);
|
|
while (match.Success)
|
|
{
|
|
if (!(match.Index > 1 && st.StrippedText.Substring(match.Index - 1, 2) == "Mc") // irish names, McDonalds etc.
|
|
&& st.StrippedText[match.Index + 1] == 'I'
|
|
&& callbacks.AllowFix(p, fixAction))
|
|
{
|
|
string word = GetWholeWord(st.StrippedText, match.Index);
|
|
if (!callbacks.IsName(word))
|
|
{
|
|
var old = st.StrippedText;
|
|
st.StrippedText = st.StrippedText.Substring(0, match.Index + 1) + "l";
|
|
if (match.Index + 2 < old.Length)
|
|
{
|
|
st.StrippedText += old.Substring(match.Index + 2);
|
|
}
|
|
|
|
p.Text = st.MergedString;
|
|
|
|
st = new StrippableText(p.Text);
|
|
uppercaseIsInsideLowercaseWords++;
|
|
callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
|
|
match = ReAfterLowercaseLetter.Match(st.StrippedText, match.Index);
|
|
}
|
|
else
|
|
{
|
|
match = match.NextMatch();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
match = match.NextMatch();
|
|
}
|
|
}
|
|
|
|
match = ReBeforeLowercaseLetter.Match(st.StrippedText);
|
|
while (match.Success)
|
|
{
|
|
string word = GetWholeWord(st.StrippedText, match.Index);
|
|
if (!callbacks.IsName(word))
|
|
{
|
|
if (callbacks.AllowFix(p, fixAction))
|
|
{
|
|
if (language == "en" && word.EndsWith('s') &&
|
|
p.Text.Length > match.Index + 1 &&
|
|
word.TrimEnd('s') == word.TrimEnd('s').ToUpperInvariant())
|
|
{
|
|
// skip words like "MRIs" where the last 's' indicates plural
|
|
}
|
|
else
|
|
{
|
|
var before = st.StrippedText[match.Index];
|
|
var after = '\0';
|
|
if (match.Index < st.StrippedText.Length - 3)
|
|
{
|
|
after = st.StrippedText[match.Index + 2];
|
|
}
|
|
|
|
if (before != '\0' && char.IsUpper(before) && after != '\0' && char.IsLower(after) &&
|
|
!Utilities.LowercaseVowels.Contains(char.ToLower(before)) && !Utilities.LowercaseVowels.Contains(after))
|
|
{
|
|
st.StrippedText = st.StrippedText.Remove(match.Index + 1, 1).Insert(match.Index + 1, "i");
|
|
p.Text = st.MergedString;
|
|
uppercaseIsInsideLowercaseWords++;
|
|
callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
|
|
}
|
|
else if (@"‘’¡¿„“()[]♪'. @".Contains(before) && !Utilities.LowercaseVowels.Contains(char.ToLower(after)))
|
|
{
|
|
}
|
|
else
|
|
{
|
|
bool ok = !(match.Index >= 1 && st.StrippedText.Substring(match.Index - 1, 2) == "Mc");
|
|
|
|
if (ok)
|
|
{
|
|
st.StrippedText = st.StrippedText.Remove(match.Index + 1, 1).Insert(match.Index + 1, "l");
|
|
p.Text = st.MergedString;
|
|
uppercaseIsInsideLowercaseWords++;
|
|
callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
match = match.NextMatch();
|
|
}
|
|
}
|
|
callbacks.UpdateFixStatus(uppercaseIsInsideLowercaseWords, l.FixUppercaseIInsindeLowercaseWords, l.XUppercaseIsFoundInsideLowercaseWords);
|
|
}
|
|
|
|
private static string GetWholeWord(string text, int index)
|
|
{
|
|
int start = index;
|
|
while (start > 0 && !(Environment.NewLine + @" ,.!?""'=()[]/-¿¡«»“”>—").Contains(text[start - 1]) ||
|
|
start > 1 && text[start - 1] == '\'' && char.IsLetter(text[start - 2]))
|
|
{
|
|
start--;
|
|
}
|
|
|
|
int end = index;
|
|
while (end + 1 < text.Length && !(Environment.NewLine + @" ,.!?:;""'=()[]/-«»“”<—").Contains(text[end + 1]) ||
|
|
end + 2 < text.Length && text[end + 1] == '\'' && char.IsLetter(text[end + 2]))
|
|
{
|
|
end++;
|
|
}
|
|
|
|
return text.Substring(start, end - start + 1);
|
|
}
|
|
|
|
}
|
|
}
|