2015-09-19 09:28:36 +02:00
|
|
|
|
using System.Globalization;
|
|
|
|
|
|
|
|
|
|
namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
|
|
|
|
{
|
|
|
|
|
public class FixStartWithUppercaseLetterAfterPeriodInsideParagraph : IFixCommonError
|
|
|
|
|
{
|
2015-10-07 10:05:30 +02:00
|
|
|
|
private readonly static char[] ExpectedChars = { '.', '!', '?' };
|
2015-09-19 09:28:36 +02:00
|
|
|
|
|
|
|
|
|
private bool IsAbbreviation(string text, int index, IFixCallbacks callbacks)
|
|
|
|
|
{
|
2015-10-07 23:10:27 +02:00
|
|
|
|
if (text[index] != '.')
|
2015-09-19 09:28:36 +02:00
|
|
|
|
return false;
|
|
|
|
|
|
2016-07-05 18:01:26 +02:00
|
|
|
|
if (index - 3 > 0 && char.IsLetterOrDigit(text[index - 1]) && text[index - 2] == '.') // e.g: O.R.
|
2015-09-19 09:28:36 +02:00
|
|
|
|
return true;
|
|
|
|
|
|
2015-10-07 23:10:27 +02:00
|
|
|
|
var word = string.Empty;
|
2015-09-19 09:28:36 +02:00
|
|
|
|
int i = index - 1;
|
2016-07-05 18:01:26 +02:00
|
|
|
|
while (i >= 0 && char.IsLetter(text[i]))
|
2015-09-19 09:28:36 +02:00
|
|
|
|
{
|
2016-07-05 18:01:26 +02:00
|
|
|
|
word = text[i--] + word;
|
2015-09-19 09:28:36 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return callbacks.GetAbbreviations().Contains(word + ".");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
|
|
|
|
|
{
|
|
|
|
|
var language = Configuration.Settings.Language.FixCommonErrors;
|
|
|
|
|
string fixAction = language.StartWithUppercaseLetterAfterPeriodInsideParagraph;
|
|
|
|
|
int noOfFixes = 0;
|
|
|
|
|
for (int i = 0; i < subtitle.Paragraphs.Count; i++)
|
|
|
|
|
{
|
|
|
|
|
Paragraph p = subtitle.Paragraphs[i];
|
|
|
|
|
string oldText = p.Text;
|
2016-04-28 22:43:10 +02:00
|
|
|
|
if (p.Text.Length > 3 && callbacks.AllowFix(p, fixAction))
|
2015-09-19 09:28:36 +02:00
|
|
|
|
{
|
2016-11-19 22:40:34 +01:00
|
|
|
|
var st = new StrippableText(p.Text);
|
2016-04-28 22:43:10 +02:00
|
|
|
|
string text = st.StrippedText;
|
2015-10-07 10:05:30 +02:00
|
|
|
|
int start = text.IndexOfAny(ExpectedChars);
|
2016-04-28 22:43:10 +02:00
|
|
|
|
while (start > 0 && start < text.Length)
|
2015-09-19 09:28:36 +02:00
|
|
|
|
{
|
2016-04-28 22:43:10 +02:00
|
|
|
|
char charAtPosition = text[start];
|
|
|
|
|
// Allow fixing lowercase letter after recursive ??? or !!!.
|
|
|
|
|
if (charAtPosition != '.') // Dot is not include 'cause I don't capitalize word after the ellipses (...), right?
|
2015-09-19 09:28:36 +02:00
|
|
|
|
{
|
2016-04-28 22:43:10 +02:00
|
|
|
|
while (start + 1 < text.Length && text[start + 1] == charAtPosition)
|
|
|
|
|
{
|
|
|
|
|
start++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if ((start + 3 < text.Length) && (text[start + 1] == ' ') && !IsAbbreviation(text, start, callbacks))
|
|
|
|
|
{
|
2016-11-19 22:40:34 +01:00
|
|
|
|
var subText = new StrippableText(text.Substring(start + 2));
|
2016-04-28 22:43:10 +02:00
|
|
|
|
text = text.Substring(0, start + 2) + subText.CombineWithPrePost(ToUpperFirstLetter(subText.StrippedText, callbacks));
|
2015-09-19 09:28:36 +02:00
|
|
|
|
}
|
2016-04-28 22:43:10 +02:00
|
|
|
|
// Try to reach the last dot if char at *start is '.'.
|
|
|
|
|
if (charAtPosition == '.')
|
2015-09-19 09:28:36 +02:00
|
|
|
|
{
|
2016-04-28 22:43:10 +02:00
|
|
|
|
while (start + 1 < text.Length && text[start + 1] == '.')
|
2015-09-19 09:28:36 +02:00
|
|
|
|
{
|
2016-04-28 22:43:10 +02:00
|
|
|
|
start++;
|
2015-09-19 09:28:36 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
2016-04-28 22:43:10 +02:00
|
|
|
|
start += 3;
|
2015-09-19 09:28:36 +02:00
|
|
|
|
if (start < text.Length)
|
2015-10-07 10:05:30 +02:00
|
|
|
|
start = text.IndexOfAny(ExpectedChars, start);
|
2015-09-19 09:28:36 +02:00
|
|
|
|
}
|
2016-04-28 22:43:10 +02:00
|
|
|
|
text = st.CombineWithPrePost(text);
|
|
|
|
|
if (oldText != text)
|
|
|
|
|
{
|
|
|
|
|
p.Text = text;
|
|
|
|
|
noOfFixes++;
|
|
|
|
|
callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
|
|
|
|
|
}
|
2015-09-19 09:28:36 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture));
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-28 22:43:10 +02:00
|
|
|
|
private static string ToUpperFirstLetter(string text, IFixCallbacks callbacks)
|
|
|
|
|
{
|
|
|
|
|
if (string.IsNullOrEmpty(text) || !char.IsLetter(text[0]) || char.IsUpper(text[0]))
|
|
|
|
|
{
|
|
|
|
|
return text;
|
|
|
|
|
}
|
|
|
|
|
// Skip words like iPhone, iPad...
|
|
|
|
|
if (text[0] == 'i' && text.Length > 1 && char.IsUpper(text[1]))
|
|
|
|
|
{
|
|
|
|
|
return text;
|
|
|
|
|
}
|
|
|
|
|
if (Helper.IsTurkishLittleI(text[0], callbacks.Encoding, callbacks.Language))
|
|
|
|
|
{
|
|
|
|
|
text = Helper.GetTurkishUppercaseLetter(text[0], callbacks.Encoding) + text.Substring(1);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
text = char.ToUpper(text[0]) + text.Substring(1); // text.CapitalizeFirstLetter();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return text;
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-19 09:28:36 +02:00
|
|
|
|
}
|
|
|
|
|
}
|