mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-22 03:02:35 +01:00
Merge pull request #7974 from ivandrofly/feature/uppercaseAfterColon
Refactor uppercase fix after colon in subtitles
This commit is contained in:
commit
610460228b
@ -1,7 +1,7 @@
|
|||||||
using Nikse.SubtitleEdit.Core.Common;
|
using System;
|
||||||
using Nikse.SubtitleEdit.Core.Interfaces;
|
|
||||||
using System;
|
|
||||||
using System.Globalization;
|
using System.Globalization;
|
||||||
|
using Nikse.SubtitleEdit.Core.Common;
|
||||||
|
using Nikse.SubtitleEdit.Core.Interfaces;
|
||||||
|
|
||||||
namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
||||||
{
|
{
|
||||||
@ -12,105 +12,107 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
|||||||
public static string StartWithUppercaseLetterAfterColon { get; set; } = "Start with uppercase letter after colon/semicolon";
|
public static string StartWithUppercaseLetterAfterColon { get; set; } = "Start with uppercase letter after colon/semicolon";
|
||||||
}
|
}
|
||||||
|
|
||||||
private static readonly char[] ExpectedChars = { ':', ';' };
|
|
||||||
public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
|
public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
|
||||||
{
|
{
|
||||||
string fixAction = Language.StartWithUppercaseLetterAfterColon;
|
var fixAction = Language.StartWithUppercaseLetterAfterColon;
|
||||||
int noOfFixes = 0;
|
var noOfFixes = 0;
|
||||||
for (int i = 0; i < subtitle.Paragraphs.Count; i++)
|
|
||||||
|
var count = subtitle.Paragraphs.Count;
|
||||||
|
var isTurkish = IsTurkish(callbacks.Language);
|
||||||
|
// paragraph
|
||||||
|
for (var i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
var p = new Paragraph(subtitle.Paragraphs[i]);
|
var p = subtitle.Paragraphs[i];
|
||||||
Paragraph last = subtitle.GetParagraphOrDefault(i - 1);
|
if (!callbacks.AllowFix(p, fixAction))
|
||||||
string oldText = p.Text;
|
|
||||||
int skipCount = 0;
|
|
||||||
|
|
||||||
if (last != null)
|
|
||||||
{
|
{
|
||||||
string lastText = HtmlUtil.RemoveHtmlTags(last.Text);
|
continue;
|
||||||
if (lastText.EndsWith(':') || lastText.EndsWith(';'))
|
|
||||||
{
|
|
||||||
var st = new StrippableText(p.Text);
|
|
||||||
if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]))
|
|
||||||
{
|
|
||||||
p.Text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (oldText.Contains(ExpectedChars))
|
var text = p.Text;
|
||||||
|
var len = text.Length;
|
||||||
|
|
||||||
|
// text
|
||||||
|
for (var j = 0; j < len; j++)
|
||||||
{
|
{
|
||||||
bool lastWasColon = false;
|
var ch = text[j];
|
||||||
for (int j = 0; j < p.Text.Length; j++)
|
if (ch == ':' || ch == ';')
|
||||||
{
|
{
|
||||||
var s = p.Text[j];
|
var k = j + 1;
|
||||||
if (s == ':' || s == ';')
|
|
||||||
|
// skip white space before formatting
|
||||||
|
while (k < len && text[k] == ' ') k++;
|
||||||
|
// skip formatting e.g: <i>, <b>,<font..>...
|
||||||
|
while (k < len && text[k] == '<' || text[k] == '{')
|
||||||
{
|
{
|
||||||
lastWasColon = true;
|
var closingPair = GetClosingPair(text[k]);
|
||||||
|
var closeIdx = text.IndexOf(closingPair, k + 1);
|
||||||
|
if (closeIdx < 0)
|
||||||
|
{
|
||||||
|
k++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
k = closeIdx + 1;
|
||||||
}
|
}
|
||||||
else if (lastWasColon)
|
// skip whitespace after formatting
|
||||||
|
while (k < len && text[k] == ' ') k++;
|
||||||
|
|
||||||
|
if (k < len)
|
||||||
{
|
{
|
||||||
// skip whitespace index
|
// slice from k index
|
||||||
if (j + 2 < p.Text.Length && p.Text[j] == ' ')
|
var textFromK = text.Substring(k);
|
||||||
|
|
||||||
|
if (CanCapitalize(textFromK, callbacks) && !isTurkish)
|
||||||
{
|
{
|
||||||
s = p.Text[++j];
|
text = text.Substring(0, k) + textFromK.CapitalizeFirstLetter();
|
||||||
}
|
}
|
||||||
|
else if (Helper.IsTurkishLittleI(text[k], callbacks.Encoding, callbacks.Language))
|
||||||
var startFromJ = p.Text.Substring(j);
|
|
||||||
if (startFromJ.Length > 3 && startFromJ[0] == '<' && startFromJ[2] == '>' && (startFromJ[1] == 'i' || startFromJ[1] == 'b' || startFromJ[1] == 'u'))
|
|
||||||
{
|
{
|
||||||
skipCount = 2;
|
text = text.Remove(j, 1).Insert(j, Helper.GetTurkishUppercaseLetter(text[k], callbacks.Encoding).ToString(CultureInfo.InvariantCulture));
|
||||||
}
|
|
||||||
else if (startFromJ.StartsWith("<font ", StringComparison.OrdinalIgnoreCase) && p.Text.Substring(j).Contains('>'))
|
|
||||||
{
|
|
||||||
skipCount = (j + startFromJ.IndexOf('>', 6)) - j;
|
|
||||||
}
|
|
||||||
else if (Helper.IsTurkishLittleI(s, callbacks.Encoding, callbacks.Language))
|
|
||||||
{
|
|
||||||
p.Text = p.Text.Remove(j, 1).Insert(j, Helper.GetTurkishUppercaseLetter(s, callbacks.Encoding).ToString(CultureInfo.InvariantCulture));
|
|
||||||
lastWasColon = false;
|
|
||||||
}
|
|
||||||
else if (char.IsLower(s))
|
|
||||||
{
|
|
||||||
// iPhone
|
|
||||||
bool change = true;
|
|
||||||
if (s == 'i' && p.Text.Length > j + 1)
|
|
||||||
{
|
|
||||||
if (p.Text[j + 1] == char.ToUpper(p.Text[j + 1]))
|
|
||||||
{
|
|
||||||
change = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (change)
|
|
||||||
{
|
|
||||||
p.Text = p.Text.Remove(j, 1).Insert(j, char.ToUpper(s).ToString(CultureInfo.InvariantCulture));
|
|
||||||
}
|
|
||||||
|
|
||||||
lastWasColon = false;
|
|
||||||
}
|
|
||||||
else if (!(" " + Environment.NewLine).Contains(s))
|
|
||||||
{
|
|
||||||
lastWasColon = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// move the: 'j' pointer and reset skipCount to 0
|
|
||||||
if (skipCount > 0)
|
|
||||||
{
|
|
||||||
j += skipCount;
|
|
||||||
skipCount = 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (oldText != p.Text && callbacks.AllowFix(p, fixAction))
|
if (text != p.Text)
|
||||||
{
|
{
|
||||||
noOfFixes++;
|
noOfFixes++;
|
||||||
subtitle.Paragraphs[i].Text = p.Text;
|
var oldText = subtitle.Paragraphs[i].Text;
|
||||||
|
subtitle.Paragraphs[i].Text = text;
|
||||||
callbacks.AddFixToListView(subtitle.Paragraphs[i], fixAction, oldText, p.Text);
|
callbacks.AddFixToListView(subtitle.Paragraphs[i], fixAction, oldText, p.Text);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
callbacks.UpdateFixStatus(noOfFixes, Language.StartWithUppercaseLetterAfterColon);
|
callbacks.UpdateFixStatus(noOfFixes, Language.StartWithUppercaseLetterAfterColon);
|
||||||
|
|
||||||
|
char GetClosingPair(char ch) => ch == '<' ? '>' : '}';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private bool IsTurkish(string lang) => lang.Equals("tr", StringComparison.OrdinalIgnoreCase);
|
||||||
|
|
||||||
|
private static bool CanCapitalize(string input, IFixCallbacks callbacks)
|
||||||
|
{
|
||||||
|
return !IsAppleNaming(input) && BeginsWithLetter(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Returns true if first character is convertible to uppercase otherwise false
|
||||||
|
/// </summary>
|
||||||
|
private static bool BeginsWithLetter(string input)
|
||||||
|
{
|
||||||
|
if (input.Length == 0) return false;
|
||||||
|
var ch = input[0];
|
||||||
|
return char.IsLetter(ch) && char.IsLower(ch);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Check if word is one of the apple product name e.g; iPhone, iPad, iMac...
|
||||||
|
/// </summary>
|
||||||
|
private static bool IsAppleNaming(string input)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrEmpty(input)) return false;
|
||||||
|
var len = input.Length;
|
||||||
|
if (len < 3) return false;
|
||||||
|
return input[0] == 'i' && char.IsUpper(input[1]) && char.IsLower(input[2]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user