Fix Arabic "Fix commas" - thx OmrSi :)

This commit is contained in:
Nikolaj Olsson 2020-04-16 18:51:34 +02:00
parent 414d59b4c5
commit c85630c9b0
2 changed files with 38 additions and 26 deletions

View File

@ -8,7 +8,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
{
var commaDouble = new Regex(@"([\p{L}\d\s])(,,)([\p{L}\d\s])");
var commaTriple = new Regex(@"([\p{L}\d\s])(, *, *,)([\p{L}\d\s])");
var commaTriple = new Regex(@"([\p{L}\d\s])( *, *, *,)([\p{L}\d\s])");
var commaTripleEndOfLine = new Regex(@"([\p{L}\d\s])(, *, *,)$");
var commaWhiteSpaceBetween = new Regex(@"([\p{L}\d\s])(,\s+,)([\p{L}\d\s])");
var commaFollowedByLetter = new Regex(@",(\p{L})");
@ -18,11 +18,13 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
for (int i = 0; i < subtitle.Paragraphs.Count; i++)
{
var p = subtitle.Paragraphs[i];
if (p.Text.IndexOf(',') >= 0 && callbacks.AllowFix(p, fixAction))
if ((p.Text.IndexOf(',') >= 0 || p.Text.IndexOf('،') >= 0) && callbacks.AllowFix(p, fixAction))
{
var s = p.Text;
var oldText = s;
if (p.Text.IndexOf(',') >= 0)
{
s = commaDouble.Replace(s, "$1,$3");
s = commaTriple.Replace(s, "$1...$3");
s = commaTripleEndOfLine.Replace(s, "$1...");
@ -43,13 +45,14 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
{
s = s.Replace(",?", "?");
}
}
if (p.Text.IndexOf('،') >= 0 && callbacks.Language == "ar")
if (p.Text.IndexOf('،') >= 0)
{
var commaDoubleAr = new Regex(@"([\p{L}\d\s])(،،)([\p{L}\d\s])");
var commaTripleAr = new Regex(@"([\p{L}\d\s])(، *، *،)([\p{L}\d\s])");
var commaTripleEndOfLineAr = new Regex(@"([\p{L}\d\s])(، *، *،)$");
var commaWhiteSpaceBetweenAr = new Regex(@"([\p{L}\d\s])(،\s+،)([\p{L}\d\s])");
var commaDoubleAr = new Regex(@"([\p{L}\d\s])( *،،)([\p{L}\d\s])");
var commaTripleAr = new Regex(@"([\p{L}\d\s])( *، *، *،)([\p{L}\d\s])");
var commaTripleEndOfLineAr = new Regex(@"([\p{L}\d\s])( *، *، *،)$");
var commaWhiteSpaceBetweenAr = new Regex(@"([\p{L}\d\s])( *،\s+،)([\p{L}\d\s])");
var commaFollowedByLetterAr = new Regex(@"،(\p{L})");
s = commaDoubleAr.Replace(s, "$1،$3");
s = commaTripleAr.Replace(s, "$1...$3");
@ -71,7 +74,6 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
{
s = s.Replace("،?", "?");
}
}
if (oldText != s)

View File

@ -2316,6 +2316,16 @@ namespace Test.FixCommonErrors
Assert.AreEqual("Hi... are you okay?", sub.Paragraphs[0].Text);
}
[TestMethod]
public void FixCommasArabic()
{
var sub = new Subtitle();
sub.Paragraphs.Add(new Paragraph("مرحبا ، ، مرحبا", 0, 1000));
var fup = new FixCommas();
fup.Fix(sub, new EmptyFixCallback { Language = "ar" });
Assert.AreEqual("مرحبا، مرحبا", sub.Paragraphs[0].Text);
}
#endregion
#region Fix Danish letter "i"