Testing new auto translate merge-split-helper

This commit is contained in:
Nikolaj Olsson 2023-12-03 16:02:17 +01:00
parent 13db58f377
commit c99b56fd58
2 changed files with 121 additions and 32 deletions

View File

@ -28,7 +28,10 @@ namespace Test.Logic.AutoTranslate
var splitResult = MergeAndSplitHelper.SplitMultipleLines(mergeResult, mergeResult.Text, "en"); var splitResult = MergeAndSplitHelper.SplitMultipleLines(mergeResult, mergeResult.Text, "en");
Assert.AreEqual(subtitle.Paragraphs.Count, splitResult.Count); Assert.AreEqual(subtitle.Paragraphs.Count, splitResult.Count);
Assert.AreEqual(string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)), string.Join(" ", splitResult));
var inputText = string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)).Replace(Environment.NewLine, " ");
var splitResultText = string.Join(" ", splitResult);
Assert.AreEqual(inputText, splitResultText);
} }
[TestMethod] [TestMethod]
@ -58,6 +61,36 @@ namespace Test.Logic.AutoTranslate
Assert.AreEqual(string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)), string.Join(" ", splitResult)); Assert.AreEqual(string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)), string.Join(" ", splitResult));
} }
[TestMethod]
public void TestTextForHiWithTextAfter()
{
var subtitle = new Subtitle();
subtitle.Paragraphs.Add(new Paragraph("", 0, 1000));
subtitle.Paragraphs.Add(new Paragraph("[Raining]" + Environment.NewLine + "Hallo.", 1, 2000));
subtitle.Paragraphs.Add(new Paragraph("How are you?", 3000, 4000));
var mergeResult = MergeAndSplitHelper.MergeMultipleLines(subtitle, 0, 1500);
Assert.IsNotNull(mergeResult);
Assert.AreEqual("[Raining]" + Environment.NewLine + "Hallo." + Environment.NewLine + "How are you?", mergeResult.Text);
Assert.AreEqual(subtitle.Paragraphs.Count, mergeResult.ParagraphCount);
Assert.AreEqual(3, mergeResult.MergeResultItems.Count);
Assert.AreEqual(true, mergeResult.MergeResultItems[0].IsEmpty);
Assert.AreEqual(false, mergeResult.MergeResultItems[1].IsEmpty);
Assert.AreEqual(false, mergeResult.MergeResultItems[1].Continious);
Assert.AreEqual(1, mergeResult.MergeResultItems[1].StartIndex);
Assert.AreEqual(1, mergeResult.MergeResultItems[1].EndIndex);
Assert.AreEqual('.', mergeResult.MergeResultItems[1].EndChar);
Assert.AreEqual(1, mergeResult.MergeResultItems[1].EndCharOccurences);
var splitResult = MergeAndSplitHelper.SplitMultipleLines(mergeResult, mergeResult.Text, "en");
Assert.AreEqual(subtitle.Paragraphs.Count, splitResult.Count);
var inputText = string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)).Replace(Environment.NewLine, " ");
var splitResultText = string.Join(" ", splitResult);
Assert.AreEqual(inputText, splitResultText);
}
[TestMethod] [TestMethod]
public void Test3() public void Test3()
{ {
@ -145,6 +178,10 @@ namespace Test.Logic.AutoTranslate
Assert.AreEqual("My name is Peter. And Jones.", splitResult[5]); Assert.AreEqual("My name is Peter. And Jones.", splitResult[5]);
Assert.AreEqual("", splitResult[6]); Assert.AreEqual("", splitResult[6]);
Assert.AreEqual("Hallo there.", splitResult[7]); Assert.AreEqual("Hallo there.", splitResult[7]);
var inputText = string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)).Replace(Environment.NewLine, " ");
var splitResultText = string.Join(" ", splitResult);
Assert.AreEqual(inputText, splitResultText);
} }
} }
} }

View File

@ -19,11 +19,6 @@ namespace Nikse.SubtitleEdit.Forms.Translate
} }
var p = sourceSubtitle.Paragraphs[index]; var p = sourceSubtitle.Paragraphs[index];
if (p.Text.Contains("{\\", StringComparison.Ordinal) || p.Text.EndsWith(')') || p.Text.StartsWith('-'))
{
return 0;
}
char? splitAtChar = null; char? splitAtChar = null;
var mergeCount = 0; var mergeCount = 0;
var allItalic = false; var allItalic = false;
@ -31,33 +26,52 @@ namespace Nikse.SubtitleEdit.Forms.Translate
var text = string.Empty; var text = string.Empty;
var linesTranslate = 0; var linesTranslate = 0;
if (MergeWithThreeNext(sourceSubtitle, index, source.Code)) MergeResult mergeResult = null;
List<Formatting> formattings = null;
if (mergeCount == 0)
{ {
mergeCount = 3; var maxChars =
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i"); autoTranslator.Name == GoogleTranslateV1.StaticName ||
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b"); autoTranslator.Name == ChatGptTranslate.StaticName ||
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold); autoTranslator.Name == MicrosoftTranslator.StaticName
} ? 1500
else if (MergeWithTwoNext(sourceSubtitle, index, source.Code)) : 250;
{
mergeCount = 2; // Try to handle (remove and save info for later restore) italics, bold, alignment where possible
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i"); var s = new Subtitle(sourceSubtitle);
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b"); formattings = HandleFormatting(s, index, target.Code);
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
} // Merge text for better translation and save info enough to split again later
else if (MergeWithNext(sourceSubtitle, index, source.Code)) mergeResult = MergeMultipleLines(s, index, maxChars);
{ mergeCount = mergeResult.ParagraphCount;
mergeCount = 1; text = mergeResult.Text;
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
} }
//if (mergeCount == 0 && autoTranslator.Name == GoogleTranslateV1.StaticName || autoTranslator.Name == ChatGptTranslate.StaticName) if (mergeCount == 0)
//{ {
// var maxChars = 1500; if (MergeWithThreeNext(sourceSubtitle, index, source.Code))
// var mergeResult = MergeMultipleLines(sourceSubtitle, index, maxChars); {
//} mergeCount = 3;
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
}
else if (MergeWithTwoNext(sourceSubtitle, index, source.Code))
{
mergeCount = 2;
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
}
else if (MergeWithNext(sourceSubtitle, index, source.Code))
{
mergeCount = 1;
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
}
}
// just take next sentence too // just take next sentence too
var next = sourceSubtitle.GetParagraphOrDefault(index + 1); var next = sourceSubtitle.GetParagraphOrDefault(index + 1);
@ -70,10 +84,32 @@ namespace Nikse.SubtitleEdit.Forms.Translate
text = Utilities.UnbreakLine(p.Text) + Environment.NewLine + Utilities.UnbreakLine(next.Text); text = Utilities.UnbreakLine(p.Text) + Environment.NewLine + Utilities.UnbreakLine(next.Text);
} }
if (mergeCount > 0 && !text.Contains("{\\", StringComparison.Ordinal)) if (mergeResult != null)
{ {
var mergedTranslation = await autoTranslator.Translate(text, source.Code, target.Code); var mergedTranslation = await autoTranslator.Translate(text, source.Code, target.Code);
var splitResult = SplitMultipleLines(mergeResult, mergedTranslation, target.Code);
if (splitResult.Count == mergeCount)
{
var idx = 0;
foreach (var line in splitResult)
{
var s = formattings[idx].ReAddFormatting(line);
targetSubtitle.Paragraphs[index].Text = s;
index++;
linesTranslate++;
idx++;
}
return linesTranslate;
}
}
if (mergeCount > 0)
{
var mergedTranslation = await autoTranslator.Translate(text, source.Code, target.Code);
List<string> result; List<string> result;
if (splitAtChar != null && mergeCount == 1) if (splitAtChar != null && mergeCount == 1)
{ {
result = SplitResultAtSplitChar(mergedTranslation, splitAtChar.Value, target.Code); result = SplitResultAtSplitChar(mergedTranslation, splitAtChar.Value, target.Code);
@ -115,6 +151,22 @@ namespace Nikse.SubtitleEdit.Forms.Translate
return linesTranslate; return linesTranslate;
} }
private static List<Formatting> HandleFormatting(Subtitle sourceSubtitle, int index, string sourceLanguage)
{
var formattings = new List<Formatting>();
for (var i = index; i < sourceSubtitle.Paragraphs.Count; i++)
{
var p = sourceSubtitle.Paragraphs[i];
var f = new Formatting();
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text, sourceLanguage), sourceLanguage);
p.Text = text;
formattings.Add(f);
}
return formattings;
}
public class MergeResultItem public class MergeResultItem
{ {
public string Text { get; set; } public string Text { get; set; }
@ -161,7 +213,7 @@ namespace Nikse.SubtitleEdit.Forms.Translate
{ {
var p = sourceSubtitle.Paragraphs[i]; var p = sourceSubtitle.Paragraphs[i];
if (item != null && Utilities.UrlEncodeLength(item.Text + Environment.NewLine + p.Text) > maxTextSize) if (item != null && Utilities.UrlEncodeLength(result.Text + Environment.NewLine + p.Text) > maxTextSize)
{ {
break; break;
} }