mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-23 11:42:36 +01:00
281 lines
15 KiB
C#
281 lines
15 KiB
C#
using System;
|
|
using System.Globalization;
|
|
using System.Text;
|
|
|
|
namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
|
{
|
|
public class FixStartWithUppercaseLetterAfterParagraph : IFixCommonError
|
|
{
|
|
public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
|
|
{
|
|
var language = Configuration.Settings.Language.FixCommonErrors;
|
|
string fixAction = language.FixFirstLetterToUppercaseAfterParagraph;
|
|
int fixedStartWithUppercaseLetterAfterParagraphTicked = 0;
|
|
for (int i = 0; i < subtitle.Paragraphs.Count; i++)
|
|
{
|
|
Paragraph p = subtitle.Paragraphs[i];
|
|
Paragraph prev = subtitle.GetParagraphOrDefault(i - 1);
|
|
|
|
string oldText = p.Text;
|
|
string fixedText = DoFix(new Paragraph(p), prev, callbacks.Encoding, callbacks.Language);
|
|
|
|
if (oldText != fixedText && callbacks.AllowFix(p, fixAction))
|
|
{
|
|
p.Text = fixedText;
|
|
fixedStartWithUppercaseLetterAfterParagraphTicked++;
|
|
callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
|
|
}
|
|
}
|
|
callbacks.UpdateFixStatus(fixedStartWithUppercaseLetterAfterParagraphTicked, language.StartWithUppercaseLetterAfterParagraph, fixedStartWithUppercaseLetterAfterParagraphTicked.ToString(CultureInfo.InvariantCulture));
|
|
}
|
|
|
|
private static string DoFix(Paragraph p, Paragraph prev, Encoding encoding, string language)
|
|
{
|
|
if (p.Text != null && p.Text.Length > 1)
|
|
{
|
|
string text = p.Text;
|
|
string pre = string.Empty;
|
|
if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal))
|
|
{
|
|
pre = "<i> ";
|
|
text = text.Substring(4);
|
|
}
|
|
if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal))
|
|
{
|
|
pre = "<i>";
|
|
text = text.Substring(3);
|
|
}
|
|
if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal))
|
|
{
|
|
pre = "<I> ";
|
|
text = text.Substring(4);
|
|
}
|
|
if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal))
|
|
{
|
|
pre = "<I>";
|
|
text = text.Substring(3);
|
|
}
|
|
if (text.Length > 2 && text.StartsWith('♪'))
|
|
{
|
|
pre = pre + "♪";
|
|
text = text.Substring(1);
|
|
}
|
|
if (text.Length > 2 && text.StartsWith(' '))
|
|
{
|
|
pre = pre + " ";
|
|
text = text.Substring(1);
|
|
}
|
|
if (text.Length > 2 && text.StartsWith('♫'))
|
|
{
|
|
pre = pre + "♫";
|
|
text = text.Substring(1);
|
|
}
|
|
if (text.Length > 2 && text.StartsWith(' '))
|
|
{
|
|
pre = pre + " ";
|
|
text = text.Substring(1);
|
|
}
|
|
|
|
var firstLetter = text[0];
|
|
|
|
string prevText = " .";
|
|
if (prev != null)
|
|
prevText = HtmlUtil.RemoveHtmlTags(prev.Text);
|
|
|
|
bool isPrevEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText);
|
|
if (prevText == " .")
|
|
isPrevEndOfLine = true;
|
|
if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) &&
|
|
(char.IsLower(firstLetter) || Helper.IsTurkishLittleI(firstLetter, encoding, language)) &&
|
|
!char.IsDigit(firstLetter) &&
|
|
isPrevEndOfLine)
|
|
{
|
|
bool isMatchInKnowAbbreviations = language == "en" &&
|
|
(prevText.EndsWith(" o.r.", StringComparison.Ordinal) ||
|
|
prevText.EndsWith(" a.m.", StringComparison.Ordinal) ||
|
|
prevText.EndsWith(" p.m.", StringComparison.Ordinal));
|
|
|
|
if (!isMatchInKnowAbbreviations)
|
|
{
|
|
if (Helper.IsTurkishLittleI(firstLetter, encoding, language))
|
|
p.Text = pre + Helper.GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1);
|
|
else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ", StringComparison.Ordinal) || text.StartsWith("lnterested") ||
|
|
text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) ||
|
|
text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I
|
|
p.Text = pre + "I" + text.Substring(1);
|
|
else
|
|
p.Text = pre + char.ToUpper(firstLetter) + text.Substring(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (p.Text != null && p.Text.Contains(Environment.NewLine))
|
|
{
|
|
var arr = p.Text.SplitToLines();
|
|
if (arr.Length == 2 && arr[1].Length > 1)
|
|
{
|
|
string text = arr[1];
|
|
string pre = string.Empty;
|
|
if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal))
|
|
{
|
|
pre = "<i> ";
|
|
text = text.Substring(4);
|
|
}
|
|
if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal))
|
|
{
|
|
pre = "<i>";
|
|
text = text.Substring(3);
|
|
}
|
|
if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal))
|
|
{
|
|
pre = "<I> ";
|
|
text = text.Substring(4);
|
|
}
|
|
if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal))
|
|
{
|
|
pre = "<I>";
|
|
text = text.Substring(3);
|
|
}
|
|
if (text.Length > 2 && text.StartsWith('♪'))
|
|
{
|
|
pre = pre + "♪";
|
|
text = text.Substring(1);
|
|
}
|
|
if (text.Length > 2 && text.StartsWith(' '))
|
|
{
|
|
pre = pre + " ";
|
|
text = text.Substring(1);
|
|
}
|
|
if (text.Length > 2 && text.StartsWith('♫'))
|
|
{
|
|
pre = pre + "♫";
|
|
text = text.Substring(1);
|
|
}
|
|
if (text.Length > 2 && text.StartsWith(' '))
|
|
{
|
|
pre = pre + " ";
|
|
text = text.Substring(1);
|
|
}
|
|
|
|
char firstLetter = text[0];
|
|
string prevText = HtmlUtil.RemoveHtmlTags(arr[0]);
|
|
bool isPrevEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText);
|
|
if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) &&
|
|
(char.IsLower(firstLetter) || Helper.IsTurkishLittleI(firstLetter, encoding, language)) &&
|
|
!prevText.EndsWith("...", StringComparison.Ordinal) &&
|
|
isPrevEndOfLine)
|
|
{
|
|
bool isMatchInKnowAbbreviations = language == "en" &&
|
|
(prevText.EndsWith(" o.r.", StringComparison.Ordinal) ||
|
|
prevText.EndsWith(" a.m.", StringComparison.Ordinal) ||
|
|
prevText.EndsWith(" p.m.", StringComparison.Ordinal));
|
|
|
|
if (!isMatchInKnowAbbreviations)
|
|
{
|
|
if (Helper.IsTurkishLittleI(firstLetter, encoding, language))
|
|
text = pre + Helper.GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1);
|
|
else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ") || text.StartsWith("lnterested") ||
|
|
text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) ||
|
|
text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I
|
|
text = pre + "I" + text.Substring(1);
|
|
else
|
|
text = pre + char.ToUpper(firstLetter) + text.Substring(1);
|
|
p.Text = arr[0] + Environment.NewLine + text;
|
|
}
|
|
}
|
|
|
|
arr = p.Text.SplitToLines();
|
|
if ((arr[0].StartsWith('-') || arr[0].StartsWith("<i>-", StringComparison.Ordinal)) &&
|
|
(arr[1].StartsWith('-') || arr[1].StartsWith("<i>-", StringComparison.Ordinal)) &&
|
|
!arr[0].StartsWith("--", StringComparison.Ordinal) && !arr[0].StartsWith("<i>--", StringComparison.Ordinal) &&
|
|
!arr[1].StartsWith("--", StringComparison.Ordinal) && !arr[1].StartsWith("<i>--", StringComparison.Ordinal))
|
|
{
|
|
if (isPrevEndOfLine && arr[1].StartsWith("<i>- ", StringComparison.Ordinal) && arr[1].Length > 6)
|
|
{
|
|
p.Text = arr[0] + Environment.NewLine + "<i>- " + char.ToUpper(arr[1][5]) + arr[1].Remove(0, 6);
|
|
}
|
|
else if (isPrevEndOfLine && arr[1].StartsWith("- ", StringComparison.Ordinal) && arr[1].Length > 3)
|
|
{
|
|
p.Text = arr[0] + Environment.NewLine + "- " + char.ToUpper(arr[1][2]) + arr[1].Remove(0, 3);
|
|
}
|
|
arr = p.Text.SplitToLines();
|
|
|
|
prevText = " .";
|
|
if (prev != null && p.StartTime.TotalMilliseconds - 10000 < prev.EndTime.TotalMilliseconds)
|
|
prevText = HtmlUtil.RemoveHtmlTags(prev.Text);
|
|
bool isPrevLineEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText);
|
|
if (isPrevLineEndOfLine && arr[0].StartsWith("<i>- ", StringComparison.Ordinal) && arr[0].Length > 6)
|
|
{
|
|
p.Text = "<i>- " + char.ToUpper(arr[0][5]) + arr[0].Remove(0, 6) + Environment.NewLine + arr[1];
|
|
}
|
|
else if (isPrevLineEndOfLine && arr[0].StartsWith("- ", StringComparison.Ordinal) && arr[0].Length > 3)
|
|
{
|
|
p.Text = "- " + char.ToUpper(arr[0][2]) + arr[0].Remove(0, 3) + Environment.NewLine + arr[1];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (p.Text != null && p.Text.Length > 4)
|
|
{
|
|
int len = 0;
|
|
int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 1, StringComparison.Ordinal);
|
|
if (indexOfNewLine < 0)
|
|
{
|
|
indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "- <i> ♪", 1, StringComparison.Ordinal);
|
|
len = "- <i> ♪".Length;
|
|
}
|
|
if (indexOfNewLine < 0)
|
|
{
|
|
indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 1, StringComparison.Ordinal);
|
|
len = "-".Length;
|
|
}
|
|
if (indexOfNewLine < 0)
|
|
{
|
|
indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 1, StringComparison.Ordinal);
|
|
len = "<i>-".Length;
|
|
}
|
|
if (indexOfNewLine < 0)
|
|
{
|
|
indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 1, StringComparison.Ordinal);
|
|
len = "<i> -".Length;
|
|
}
|
|
if (indexOfNewLine < 0)
|
|
{
|
|
indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ -", 1, StringComparison.Ordinal);
|
|
len = "♪ -".Length;
|
|
}
|
|
if (indexOfNewLine < 0)
|
|
{
|
|
indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i> -", 1, StringComparison.Ordinal);
|
|
len = "♪ <i> -".Length;
|
|
}
|
|
if (indexOfNewLine < 0)
|
|
{
|
|
indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i>-", 1, StringComparison.Ordinal);
|
|
len = "♪ <i>-".Length;
|
|
}
|
|
|
|
if (indexOfNewLine > 0)
|
|
{
|
|
string text = p.Text.Substring(indexOfNewLine + len);
|
|
var st = new StripableText(text);
|
|
|
|
if (st.StrippedText.Length > 0 && Helper.IsTurkishLittleI(st.StrippedText[0], encoding, language) && !st.Pre.EndsWith('[') && !st.Pre.Contains("..."))
|
|
{
|
|
text = st.Pre + Helper.GetTurkishUppercaseLetter(st.StrippedText[0], encoding) + st.StrippedText.Substring(1) + st.Post;
|
|
p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text);
|
|
}
|
|
else if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.Contains("..."))
|
|
{
|
|
text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post;
|
|
p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text);
|
|
}
|
|
}
|
|
}
|
|
return p.Text;
|
|
}
|
|
|
|
}
|
|
}
|