Fix common errors Chinese problem - thx TokerX :)

Fix #6888
This commit is contained in:
niksedk 2023-05-03 14:38:19 +02:00
parent 98e5970ecb
commit 5a3edb0e2c
3 changed files with 87 additions and 24 deletions

View File

@ -2478,7 +2478,7 @@ can edit in same subtitle file (collaboration)</Information>
<ToggleDialogDashes>Toggle dialog dashes</ToggleDialogDashes>
<ToggleQuotes>Toggle quotes</ToggleQuotes>
<ToggleHiTags>Toggle HI tags</ToggleHiTags>
<ToggleCustomTags>Toggle custom tags</ToggleCustomTags>
<ToggleCustomTags>Toggle custom tags (surround with)</ToggleCustomTags>
<ToggleMusicSymbols>Toggle music symbols</ToggleMusicSymbols>
<Alignment>Alignment (selected lines)</Alignment>
<AlignmentN1>Alignment bottom left - {\an1}</AlignmentN1>

View File

@ -2434,6 +2434,36 @@ namespace Test.FixCommonErrors
Assert.AreEqual("안녕하세요...", sub.Paragraphs[0].Text);
}
[TestMethod]
public void FixUnneededPeriodsTestChineseDoNotChange()
{
var sub = new Subtitle();
sub.Paragraphs.Add(new Paragraph("但是……但是我們必須等待。", 0, 1000));
var fup = new FixUnneededPeriods();
fup.Fix(sub, new EmptyFixCallback { Language = "zh" });
Assert.AreEqual("但是……但是我們必須等待。", sub.Paragraphs[0].Text);
}
[TestMethod]
public void FixUnneededPeriodsTestChineseDoChange()
{
var sub = new Subtitle();
sub.Paragraphs.Add(new Paragraph("但是.......但是我們必須等待。", 0, 1000));
var fup = new FixUnneededPeriods();
fup.Fix(sub, new EmptyFixCallback { Language = "zh" });
Assert.AreEqual("但是......但是我們必須等待。", sub.Paragraphs[0].Text);
}
//[TestMethod]
//public void FixUnneededPeriodsTestChineseDoChange2()
//{
// var sub = new Subtitle();
// sub.Paragraphs.Add(new Paragraph("但是…但是我們必須等待。", 0, 1000));
// var fup = new FixUnneededPeriods();
// fup.Fix(sub, new EmptyFixCallback { Language = "zh" });
// Assert.AreEqual("但是……但是我們必須等待。", sub.Paragraphs[0].Text);
//}
[TestMethod]
public void FixCommas1()
{

View File

@ -1,6 +1,8 @@
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Core.Interfaces;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
@ -15,34 +17,63 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
{
string fixAction = Language.UnneededPeriod;
int removedCount = 0;
for (int i = 0; i < subtitle.Paragraphs.Count; i++)
var fixAction = Language.UnneededPeriod;
var removedCount = 0;
for (var i = 0; i < subtitle.Paragraphs.Count; i++)
{
var p = subtitle.Paragraphs[i];
if (callbacks.AllowFix(p, fixAction))
{
// Returns processed text.
string procText = RemoveDotAfterPunctuation(p.Text);
var procText = RemoveDotAfterPunctuation(p.Text);
while (procText.Contains("....", StringComparison.Ordinal))
if (callbacks.Language == "zh")
{
procText = procText.Replace("....", "...");
while (procText.Contains(".......", StringComparison.Ordinal))
{
procText = procText.Replace(".......", "......");
}
//var insertIndexes = new List<int>();
//for (var j = 0; j < procText.Length; j++)
//{
// if (procText[j] == '…')
// {
// var startOk = j == 0 || procText[j - 1] != '…';
// var endOk = j == procText.Length - 1 || procText[j + 1] != '…';
// if (startOk && endOk)
// {
// insertIndexes.Add(j);
// }
// }
//}
//foreach (var insertIndex in insertIndexes.OrderByDescending(idx => idx))
//{
// procText = procText.Insert(insertIndex, "…");
//}
}
while (procText.Contains("……", StringComparison.Ordinal))
else
{
procText = procText.Replace("……", "…");
}
while (procText.Contains("....", StringComparison.Ordinal))
{
procText = procText.Replace("....", "...");
}
while (procText.Contains(".…", StringComparison.Ordinal))
{
procText = procText.Replace(".…", "…");
}
while (procText.Contains("…", StringComparison.Ordinal))
{
procText = procText.Replace("…", "…");
}
while (procText.Contains("….", StringComparison.Ordinal))
{
procText = procText.Replace("….", "…");
while (procText.Contains(".…", StringComparison.Ordinal))
{
procText = procText.Replace(".…", "…");
}
while (procText.Contains("….", StringComparison.Ordinal))
{
procText = procText.Replace("….", "…");
}
}
var l = callbacks.Language;
@ -67,32 +98,34 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
procText = sb.ToString().TrimEnd();
}
int diff = p.Text.Length - procText.Length;
if (diff > 0)
var diff = p.Text.Length - procText.Length;
if (diff != 0)
{
// Calculate total removed dots.
removedCount += diff;
removedCount += Math.Abs(diff);
callbacks.AddFixToListView(p, fixAction, p.Text, procText);
p.Text = procText;
}
}
}
callbacks.UpdateFixStatus(removedCount, Language.RemoveUnneededPeriods);
}
public static string RemoveDotAfterPunctuation(string input)
{
for (int i = input.Length - 1; i > 0; i--)
for (var i = input.Length - 1; i > 0; i--)
{
// Expecting pre characters: [?!]
if (input[i] == '.' && (input[i - 1] == '?' || input[i - 1] == '!'))
{
int j = i;
var j = i;
// Fix recursive dot after ?/!
while (j + 1 < input.Length && input[j + 1] == '.')
{
j++;
}
// Expecting post characters: [\r\n ]
if (j + 1 == input.Length || input[j + 1] == ' ' || input[j + 1] == '\r' || input[j + 1] == '\n')
{
@ -100,8 +133,8 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
}
}
}
return input;
}
}
}