Refactor uppercase fix after colon in subtitles

The subtitle correction code was refactored to improve readability and efficiency. A few lines were rearranged for better flow, and we introduced helper methods to determine the language, the start of the text, and special naming conventions. This will help in maintaining and understanding the code in the future.
This commit is contained in:
Ivandro Jao 2024-02-24 20:16:29 +00:00
parent bc1a9dc331
commit 8344295af9

View File

@ -1,7 +1,7 @@
using Nikse.SubtitleEdit.Core.Common; using System;
using Nikse.SubtitleEdit.Core.Interfaces;
using System;
using System.Globalization; using System.Globalization;
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Core.Interfaces;
namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
{ {
@ -12,105 +12,107 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
public static string StartWithUppercaseLetterAfterColon { get; set; } = "Start with uppercase letter after colon/semicolon"; public static string StartWithUppercaseLetterAfterColon { get; set; } = "Start with uppercase letter after colon/semicolon";
} }
private static readonly char[] ExpectedChars = { ':', ';' };
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
{ {
string fixAction = Language.StartWithUppercaseLetterAfterColon; var fixAction = Language.StartWithUppercaseLetterAfterColon;
int noOfFixes = 0; var noOfFixes = 0;
for (int i = 0; i < subtitle.Paragraphs.Count; i++)
var count = subtitle.Paragraphs.Count;
var isTurkish = IsTurkish(callbacks.Language);
// paragraph
for (var i = 0; i < count; i++)
{ {
var p = new Paragraph(subtitle.Paragraphs[i]); var p = subtitle.Paragraphs[i];
Paragraph last = subtitle.GetParagraphOrDefault(i - 1); if (!callbacks.AllowFix(p, fixAction))
string oldText = p.Text;
int skipCount = 0;
if (last != null)
{ {
string lastText = HtmlUtil.RemoveHtmlTags(last.Text); continue;
if (lastText.EndsWith(':') || lastText.EndsWith(';'))
{
var st = new StrippableText(p.Text);
if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]))
{
p.Text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post;
}
}
} }
if (oldText.Contains(ExpectedChars)) var text = p.Text;
var len = text.Length;
// text
for (var j = 0; j < len; j++)
{ {
bool lastWasColon = false; var ch = text[j];
for (int j = 0; j < p.Text.Length; j++) if (ch == ':' || ch == ';')
{ {
var s = p.Text[j]; var k = j + 1;
if (s == ':' || s == ';')
// skip white space before formatting
while (k < len && text[k] == ' ') k++;
// skip formatting e.g: <i>, <b>,<font..>...
while (k < len && text[k] == '<' || text[k] == '{')
{ {
lastWasColon = true; var closingPair = GetClosingPair(text[k]);
var closeIdx = text.IndexOf(closingPair, k + 1);
if (closeIdx < 0)
{
k++;
break;
}
k = closeIdx + 1;
} }
else if (lastWasColon) // skip whitespace after formatting
while (k < len && text[k] == ' ') k++;
if (k < len)
{ {
// skip whitespace index // slice from k index
if (j + 2 < p.Text.Length && p.Text[j] == ' ') var textFromK = text.Substring(k);
if (CanCapitalize(textFromK, callbacks) && !isTurkish)
{ {
s = p.Text[++j]; text = text.Substring(0, k) + textFromK.CapitalizeFirstLetter();
} }
else if (Helper.IsTurkishLittleI(text[k], callbacks.Encoding, callbacks.Language))
var startFromJ = p.Text.Substring(j);
if (startFromJ.Length > 3 && startFromJ[0] == '<' && startFromJ[2] == '>' && (startFromJ[1] == 'i' || startFromJ[1] == 'b' || startFromJ[1] == 'u'))
{ {
skipCount = 2; text = text.Remove(j, 1).Insert(j, Helper.GetTurkishUppercaseLetter(text[k], callbacks.Encoding).ToString(CultureInfo.InvariantCulture));
}
else if (startFromJ.StartsWith("<font ", StringComparison.OrdinalIgnoreCase) && p.Text.Substring(j).Contains('>'))
{
skipCount = (j + startFromJ.IndexOf('>', 6)) - j;
}
else if (Helper.IsTurkishLittleI(s, callbacks.Encoding, callbacks.Language))
{
p.Text = p.Text.Remove(j, 1).Insert(j, Helper.GetTurkishUppercaseLetter(s, callbacks.Encoding).ToString(CultureInfo.InvariantCulture));
lastWasColon = false;
}
else if (char.IsLower(s))
{
// iPhone
bool change = true;
if (s == 'i' && p.Text.Length > j + 1)
{
if (p.Text[j + 1] == char.ToUpper(p.Text[j + 1]))
{
change = false;
}
}
if (change)
{
p.Text = p.Text.Remove(j, 1).Insert(j, char.ToUpper(s).ToString(CultureInfo.InvariantCulture));
}
lastWasColon = false;
}
else if (!(" " + Environment.NewLine).Contains(s))
{
lastWasColon = false;
}
// move the: 'j' pointer and reset skipCount to 0
if (skipCount > 0)
{
j += skipCount;
skipCount = 0;
} }
} }
} }
} }
if (oldText != p.Text && callbacks.AllowFix(p, fixAction)) if (text != p.Text)
{ {
noOfFixes++; noOfFixes++;
subtitle.Paragraphs[i].Text = p.Text; var oldText = subtitle.Paragraphs[i].Text;
subtitle.Paragraphs[i].Text = text;
callbacks.AddFixToListView(subtitle.Paragraphs[i], fixAction, oldText, p.Text); callbacks.AddFixToListView(subtitle.Paragraphs[i], fixAction, oldText, p.Text);
} }
} }
callbacks.UpdateFixStatus(noOfFixes, Language.StartWithUppercaseLetterAfterColon); callbacks.UpdateFixStatus(noOfFixes, Language.StartWithUppercaseLetterAfterColon);
char GetClosingPair(char ch) => ch == '<' ? '>' : '}';
} }
private bool IsTurkish(string lang) => lang.Equals("tr", StringComparison.OrdinalIgnoreCase);
private static bool CanCapitalize(string input, IFixCallbacks callbacks)
{
return !IsAppleNaming(input) && BeginsWithLetter(input);
}
/// <summary>
/// Returns true if first character is convertible to uppercase otherwise false
/// </summary>
private static bool BeginsWithLetter(string input)
{
if (input.Length == 0) return false;
var ch = input[0];
return char.IsLetter(ch) && char.IsLower(ch);
}
/// <summary>
/// Check if word is one of the apple product name e.g; iPhone, iPad, iMac...
/// </summary>
private static bool IsAppleNaming(string input)
{
if (string.IsNullOrEmpty(input)) return false;
var len = input.Length;
if (len < 3) return false;
return input[0] == 'i' && char.IsUpper(input[1]) && char.IsLower(input[2]);
}
} }
} }