mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-25 12:44:46 +01:00
Refactor uppercase fix after colon in subtitles
The subtitle correction code was refactored to improve readability and efficiency. A few lines were rearranged for better flow, and we introduced helper methods to determine the language, the start of the text, and special naming conventions. This will help in maintaining and understanding the code in the future.
This commit is contained in:
parent
bc1a9dc331
commit
8344295af9
@ -1,7 +1,7 @@
|
||||
using Nikse.SubtitleEdit.Core.Common;
|
||||
using Nikse.SubtitleEdit.Core.Interfaces;
|
||||
using System;
|
||||
using System;
|
||||
using System.Globalization;
|
||||
using Nikse.SubtitleEdit.Core.Common;
|
||||
using Nikse.SubtitleEdit.Core.Interfaces;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
||||
{
|
||||
@ -12,105 +12,107 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
||||
public static string StartWithUppercaseLetterAfterColon { get; set; } = "Start with uppercase letter after colon/semicolon";
|
||||
}
|
||||
|
||||
private static readonly char[] ExpectedChars = { ':', ';' };
|
||||
public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
|
||||
{
|
||||
string fixAction = Language.StartWithUppercaseLetterAfterColon;
|
||||
int noOfFixes = 0;
|
||||
for (int i = 0; i < subtitle.Paragraphs.Count; i++)
|
||||
{
|
||||
var p = new Paragraph(subtitle.Paragraphs[i]);
|
||||
Paragraph last = subtitle.GetParagraphOrDefault(i - 1);
|
||||
string oldText = p.Text;
|
||||
int skipCount = 0;
|
||||
var fixAction = Language.StartWithUppercaseLetterAfterColon;
|
||||
var noOfFixes = 0;
|
||||
|
||||
if (last != null)
|
||||
var count = subtitle.Paragraphs.Count;
|
||||
var isTurkish = IsTurkish(callbacks.Language);
|
||||
// paragraph
|
||||
for (var i = 0; i < count; i++)
|
||||
{
|
||||
string lastText = HtmlUtil.RemoveHtmlTags(last.Text);
|
||||
if (lastText.EndsWith(':') || lastText.EndsWith(';'))
|
||||
var p = subtitle.Paragraphs[i];
|
||||
if (!callbacks.AllowFix(p, fixAction))
|
||||
{
|
||||
var st = new StrippableText(p.Text);
|
||||
if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]))
|
||||
{
|
||||
p.Text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (oldText.Contains(ExpectedChars))
|
||||
{
|
||||
bool lastWasColon = false;
|
||||
for (int j = 0; j < p.Text.Length; j++)
|
||||
{
|
||||
var s = p.Text[j];
|
||||
if (s == ':' || s == ';')
|
||||
{
|
||||
lastWasColon = true;
|
||||
}
|
||||
else if (lastWasColon)
|
||||
{
|
||||
// skip whitespace index
|
||||
if (j + 2 < p.Text.Length && p.Text[j] == ' ')
|
||||
{
|
||||
s = p.Text[++j];
|
||||
}
|
||||
var text = p.Text;
|
||||
var len = text.Length;
|
||||
|
||||
var startFromJ = p.Text.Substring(j);
|
||||
if (startFromJ.Length > 3 && startFromJ[0] == '<' && startFromJ[2] == '>' && (startFromJ[1] == 'i' || startFromJ[1] == 'b' || startFromJ[1] == 'u'))
|
||||
// text
|
||||
for (var j = 0; j < len; j++)
|
||||
{
|
||||
skipCount = 2;
|
||||
}
|
||||
else if (startFromJ.StartsWith("<font ", StringComparison.OrdinalIgnoreCase) && p.Text.Substring(j).Contains('>'))
|
||||
var ch = text[j];
|
||||
if (ch == ':' || ch == ';')
|
||||
{
|
||||
skipCount = (j + startFromJ.IndexOf('>', 6)) - j;
|
||||
}
|
||||
else if (Helper.IsTurkishLittleI(s, callbacks.Encoding, callbacks.Language))
|
||||
{
|
||||
p.Text = p.Text.Remove(j, 1).Insert(j, Helper.GetTurkishUppercaseLetter(s, callbacks.Encoding).ToString(CultureInfo.InvariantCulture));
|
||||
lastWasColon = false;
|
||||
}
|
||||
else if (char.IsLower(s))
|
||||
{
|
||||
// iPhone
|
||||
bool change = true;
|
||||
if (s == 'i' && p.Text.Length > j + 1)
|
||||
{
|
||||
if (p.Text[j + 1] == char.ToUpper(p.Text[j + 1]))
|
||||
{
|
||||
change = false;
|
||||
}
|
||||
}
|
||||
if (change)
|
||||
{
|
||||
p.Text = p.Text.Remove(j, 1).Insert(j, char.ToUpper(s).ToString(CultureInfo.InvariantCulture));
|
||||
}
|
||||
var k = j + 1;
|
||||
|
||||
lastWasColon = false;
|
||||
}
|
||||
else if (!(" " + Environment.NewLine).Contains(s))
|
||||
// skip white space before formatting
|
||||
while (k < len && text[k] == ' ') k++;
|
||||
// skip formatting e.g: <i>, <b>,<font..>...
|
||||
while (k < len && text[k] == '<' || text[k] == '{')
|
||||
{
|
||||
lastWasColon = false;
|
||||
var closingPair = GetClosingPair(text[k]);
|
||||
var closeIdx = text.IndexOf(closingPair, k + 1);
|
||||
if (closeIdx < 0)
|
||||
{
|
||||
k++;
|
||||
break;
|
||||
}
|
||||
k = closeIdx + 1;
|
||||
}
|
||||
// skip whitespace after formatting
|
||||
while (k < len && text[k] == ' ') k++;
|
||||
|
||||
// move the: 'j' pointer and reset skipCount to 0
|
||||
if (skipCount > 0)
|
||||
if (k < len)
|
||||
{
|
||||
j += skipCount;
|
||||
skipCount = 0;
|
||||
// slice from k index
|
||||
var textFromK = text.Substring(k);
|
||||
|
||||
if (CanCapitalize(textFromK, callbacks) && !isTurkish)
|
||||
{
|
||||
text = text.Substring(0, k) + textFromK.CapitalizeFirstLetter();
|
||||
}
|
||||
else if (Helper.IsTurkishLittleI(text[k], callbacks.Encoding, callbacks.Language))
|
||||
{
|
||||
text = text.Remove(j, 1).Insert(j, Helper.GetTurkishUppercaseLetter(text[k], callbacks.Encoding).ToString(CultureInfo.InvariantCulture));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (oldText != p.Text && callbacks.AllowFix(p, fixAction))
|
||||
if (text != p.Text)
|
||||
{
|
||||
noOfFixes++;
|
||||
subtitle.Paragraphs[i].Text = p.Text;
|
||||
var oldText = subtitle.Paragraphs[i].Text;
|
||||
subtitle.Paragraphs[i].Text = text;
|
||||
callbacks.AddFixToListView(subtitle.Paragraphs[i], fixAction, oldText, p.Text);
|
||||
}
|
||||
}
|
||||
|
||||
callbacks.UpdateFixStatus(noOfFixes, Language.StartWithUppercaseLetterAfterColon);
|
||||
|
||||
char GetClosingPair(char ch) => ch == '<' ? '>' : '}';
|
||||
}
|
||||
|
||||
private bool IsTurkish(string lang) => lang.Equals("tr", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
private static bool CanCapitalize(string input, IFixCallbacks callbacks)
|
||||
{
|
||||
return !IsAppleNaming(input) && BeginsWithLetter(input);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if first character is convertible to uppercase otherwise false
|
||||
/// </summary>
|
||||
private static bool BeginsWithLetter(string input)
|
||||
{
|
||||
if (input.Length == 0) return false;
|
||||
var ch = input[0];
|
||||
return char.IsLetter(ch) && char.IsLower(ch);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Check if word is one of the apple product name e.g; iPhone, iPad, iMac...
|
||||
/// </summary>
|
||||
private static bool IsAppleNaming(string input)
|
||||
{
|
||||
if (string.IsNullOrEmpty(input)) return false;
|
||||
var len = input.Length;
|
||||
if (len < 3) return false;
|
||||
return input[0] == 'i' && char.IsUpper(input[1]) && char.IsLower(input[2]);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user