mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-25 04:33:04 +01:00
Add "Normalize" to FCE
Normalizes unicode chars (FORM C) + spaces + colon + dash
This commit is contained in:
parent
3a88fdc0d6
commit
34d541198b
@ -615,6 +615,7 @@ Note: Do check free disk space.</WaveFileMalformed>
|
||||
<CommonOcrErrorsFixed>Common OCR errors fixed (OcrReplaceList file used): {0}</CommonOcrErrorsFixed>
|
||||
<RemoveSpaceBetweenNumber>Remove space between numbers</RemoveSpaceBetweenNumber>
|
||||
<FixDialogsOnOneLine>Fix dialogs on one line</FixDialogsOnOneLine>
|
||||
<NormalizeStrings>Normalize strings</NormalizeStrings>
|
||||
<RemoveSpaceBetweenNumbersFixed>Remove space between numbers fixed: {0}</RemoveSpaceBetweenNumbersFixed>
|
||||
<FixTurkishAnsi>Fix Turkish ANSI (Icelandic) letters to Unicode</FixTurkishAnsi>
|
||||
<FixDanishLetterI>Fix Danish letter 'i'</FixDanishLetterI>
|
||||
|
45
libse/Forms/FixCommonErrors/NormalizeStrings.cs
Normal file
45
libse/Forms/FixCommonErrors/NormalizeStrings.cs
Normal file
@ -0,0 +1,45 @@
|
||||
using Nikse.SubtitleEdit.Core.Interfaces;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
||||
{
|
||||
public class NormalizeStrings : IFixCommonError
|
||||
{
|
||||
public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
|
||||
{
|
||||
var language = Configuration.Settings.Language.FixCommonErrors;
|
||||
string fixAction = language.NormalizeStrings;
|
||||
int noOfFixes = 0;
|
||||
for (int i = 0; i < subtitle.Paragraphs.Count; i++)
|
||||
{
|
||||
var p = subtitle.Paragraphs[i];
|
||||
var oldText = p.Text;
|
||||
var text = p.Text
|
||||
.Normalize()
|
||||
|
||||
.Replace('\u00a0', ' ') // replace non-break-space (160 decimal) ascii char with normal space
|
||||
.Replace("\u200B", string.Empty) // Zero Width Space
|
||||
.Replace("\uFEFF", string.Empty) // Zero Width No-Break Space
|
||||
|
||||
.Replace('\u02F8', ':') // ˸ Modifier Letter Raised Colon (\u02F8)
|
||||
.Replace('\uFF1A', ':') // : Fullwidth Colon (\uFF1A)
|
||||
.Replace('\uFE13', ':') // ︓ Presentation Form for Vertical Colon (\uFE13)
|
||||
|
||||
.Replace('\u2043', '-') // ⁃ Hyphen bullet (\u2043)
|
||||
.Replace('\u2010', '-') // ‐ Hyphen (\u2010)
|
||||
.Replace('\u2012', '-') // ‒ Figure dash (\u2012)
|
||||
.Replace('\u2013', '-') // – En dash (\u2013)
|
||||
.Replace('\u2014', '-') // — Em dash (\u2014)
|
||||
.Replace('\u2015', '-') // ― Horizontal bar (\u2015)
|
||||
;
|
||||
|
||||
if (oldText != text && callbacks.AllowFix(p, fixAction))
|
||||
{
|
||||
p.Text = text;
|
||||
noOfFixes++;
|
||||
callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
|
||||
}
|
||||
}
|
||||
callbacks.UpdateFixStatus(noOfFixes, language.FixCommonOcrErrors, language.FixDialogsOneLineExample);
|
||||
}
|
||||
}
|
||||
}
|
@ -811,6 +811,7 @@ namespace Nikse.SubtitleEdit.Core
|
||||
RemoveSpaceBetweenNumber = "Remove space between numbers",
|
||||
FixDialogsOnOneLine = "Fix dialogs on one line",
|
||||
RemoveSpaceBetweenNumbersFixed = "Remove space between numbers fixed: {0}",
|
||||
NormalizeStrings = "Normalize strings",
|
||||
FixLowercaseIToUppercaseI = "Fix alone lowercase 'i' to 'I' (English)",
|
||||
FixTurkishAnsi = "Fix Turkish ANSI (Icelandic) letters to Unicode",
|
||||
FixDanishLetterI = "Fix Danish letter 'i'",
|
||||
|
@ -1612,6 +1612,9 @@ namespace Nikse.SubtitleEdit.Core
|
||||
case "FixCommonErrors/FixDialogsOnOneLine":
|
||||
language.FixCommonErrors.FixDialogsOnOneLine = reader.Value;
|
||||
break;
|
||||
case "FixCommonErrors/NormalizeStrings":
|
||||
language.FixCommonErrors.NormalizeStrings = reader.Value;
|
||||
break;
|
||||
case "FixCommonErrors/RemoveSpaceBetweenNumbersFixed":
|
||||
language.FixCommonErrors.RemoveSpaceBetweenNumbersFixed = reader.Value;
|
||||
break;
|
||||
|
@ -683,6 +683,7 @@
|
||||
public string CommonOcrErrorsFixed { get; set; }
|
||||
public string RemoveSpaceBetweenNumber { get; set; }
|
||||
public string FixDialogsOnOneLine { get; set; }
|
||||
public string NormalizeStrings { get; set; }
|
||||
public string RemoveSpaceBetweenNumbersFixed { get; set; }
|
||||
public string FixTurkishAnsi { get; set; }
|
||||
public string FixDanishLetterI { get; set; }
|
||||
|
@ -689,6 +689,7 @@ $HorzAlign = Center
|
||||
public bool FixMusicNotationTicked { get; set; }
|
||||
public bool FixContinuationStyleTicked { get; set; }
|
||||
public bool FixUnnecessaryLeadingDotsTicked { get; set; }
|
||||
public bool NormalizeStringsTicked { get; set; }
|
||||
|
||||
public FixCommonErrorsSettings()
|
||||
{
|
||||
@ -725,6 +726,7 @@ $HorzAlign = Center
|
||||
FixMusicNotationTicked = true;
|
||||
FixContinuationStyleTicked = false;
|
||||
FixUnnecessaryLeadingDotsTicked = true;
|
||||
NormalizeStringsTicked = false;
|
||||
}
|
||||
|
||||
}
|
||||
@ -4842,6 +4844,12 @@ $HorzAlign = Center
|
||||
settings.CommonErrors.FixUnnecessaryLeadingDotsTicked = Convert.ToBoolean(subNode.InnerText);
|
||||
}
|
||||
|
||||
subNode = node.SelectSingleNode("NormalizeStringsTicked");
|
||||
if (subNode != null)
|
||||
{
|
||||
settings.CommonErrors.NormalizeStringsTicked = Convert.ToBoolean(subNode.InnerText);
|
||||
}
|
||||
|
||||
// Video Controls
|
||||
node = doc.DocumentElement.SelectSingleNode("VideoControls");
|
||||
subNode = node.SelectSingleNode("CustomSearchText1");
|
||||
@ -7411,6 +7419,7 @@ $HorzAlign = Center
|
||||
textWriter.WriteElementString("FixMusicNotationTicked", settings.CommonErrors.FixMusicNotationTicked.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteElementString("FixContinuationStyleTicked", settings.CommonErrors.FixContinuationStyleTicked.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteElementString("FixUnnecessaryLeadingDotsTicked", settings.CommonErrors.FixUnnecessaryLeadingDotsTicked.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteElementString("NormalizeStringsTicked", settings.CommonErrors.NormalizeStringsTicked.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteEndElement();
|
||||
|
||||
textWriter.WriteStartElement("VideoControls", string.Empty);
|
||||
|
@ -51,7 +51,8 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
private const int IndexUppercaseIInsideLowercaseWord = 28;
|
||||
private const int IndexRemoveSpaceBetweenNumbers = 29;
|
||||
private const int IndexDialogsOnOneLine = 30;
|
||||
private const int IndexFixEllipsesStart = 31;
|
||||
private const int IndexNormalizeStrings = 31;
|
||||
private const int IndexFixEllipsesStart = 32;
|
||||
private int _indexAloneLowercaseIToUppercaseIEnglish = -1;
|
||||
private int _turkishAnsiIndex = -1;
|
||||
private int _danishLetterIIndex = -1;
|
||||
@ -403,7 +404,8 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
new FixItem(_language.FixCommonOcrErrors, _language.FixOcrErrorExample, () => FixOcrErrorsViaReplaceList(threeLetterIsoLanguageName), ce.FixOcrErrorsViaReplaceListTicked),
|
||||
new FixItem(_language.FixUppercaseIInsindeLowercaseWords, _language.FixUppercaseIInsindeLowercaseWordsExample, () => new FixUppercaseIInsideWords().Fix(Subtitle, this), ce.UppercaseIInsideLowercaseWordTicked),
|
||||
new FixItem(_language.RemoveSpaceBetweenNumber, _language.FixSpaceBetweenNumbersExample, () => new RemoveSpaceBetweenNumbers().Fix(Subtitle, this), ce.RemoveSpaceBetweenNumberTicked),
|
||||
new FixItem(_language.FixDialogsOnOneLine, _language.FixDialogsOneLineExample, () => new FixDialogsOnOneLine().Fix(Subtitle, this), ce.FixDialogsOnOneLineTicked)
|
||||
new FixItem(_language.FixDialogsOnOneLine, _language.FixDialogsOneLineExample, () => new FixDialogsOnOneLine().Fix(Subtitle, this), ce.FixDialogsOnOneLineTicked),
|
||||
new FixItem(_language.NormalizeStrings, string.Empty, () => new NormalizeStrings().Fix(Subtitle, this), ce.NormalizeStringsTicked),
|
||||
};
|
||||
|
||||
if (Configuration.Settings.General.ContinuationStyle == ContinuationStyle.None)
|
||||
@ -1099,6 +1101,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
ce.FixOcrErrorsViaReplaceListTicked = listView1.Items[IndexFixOcrErrorsViaReplaceList].Checked;
|
||||
ce.RemoveSpaceBetweenNumberTicked = listView1.Items[IndexRemoveSpaceBetweenNumbers].Checked;
|
||||
ce.FixDialogsOnOneLineTicked = listView1.Items[IndexDialogsOnOneLine].Checked;
|
||||
ce.NormalizeStringsTicked = listView1.Items[IndexNormalizeStrings].Checked;
|
||||
if (_danishLetterIIndex >= 0)
|
||||
{
|
||||
ce.DanishLetterITicked = listView1.Items[_danishLetterIIndex].Checked;
|
||||
|
Loading…
Reference in New Issue
Block a user