Testing new auto translate merge-split-helper

This commit is contained in:
Nikolaj Olsson 2023-12-03 16:02:17 +01:00
parent 13db58f377
commit c99b56fd58
2 changed files with 121 additions and 32 deletions

View File

@ -28,7 +28,10 @@ namespace Test.Logic.AutoTranslate
var splitResult = MergeAndSplitHelper.SplitMultipleLines(mergeResult, mergeResult.Text, "en");
Assert.AreEqual(subtitle.Paragraphs.Count, splitResult.Count);
Assert.AreEqual(string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)), string.Join(" ", splitResult));
var inputText = string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)).Replace(Environment.NewLine, " ");
var splitResultText = string.Join(" ", splitResult);
Assert.AreEqual(inputText, splitResultText);
}
[TestMethod]
@ -58,6 +61,36 @@ namespace Test.Logic.AutoTranslate
Assert.AreEqual(string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)), string.Join(" ", splitResult));
}
[TestMethod]
public void TestTextForHiWithTextAfter()
{
var subtitle = new Subtitle();
subtitle.Paragraphs.Add(new Paragraph("", 0, 1000));
subtitle.Paragraphs.Add(new Paragraph("[Raining]" + Environment.NewLine + "Hallo.", 1, 2000));
subtitle.Paragraphs.Add(new Paragraph("How are you?", 3000, 4000));
var mergeResult = MergeAndSplitHelper.MergeMultipleLines(subtitle, 0, 1500);
Assert.IsNotNull(mergeResult);
Assert.AreEqual("[Raining]" + Environment.NewLine + "Hallo." + Environment.NewLine + "How are you?", mergeResult.Text);
Assert.AreEqual(subtitle.Paragraphs.Count, mergeResult.ParagraphCount);
Assert.AreEqual(3, mergeResult.MergeResultItems.Count);
Assert.AreEqual(true, mergeResult.MergeResultItems[0].IsEmpty);
Assert.AreEqual(false, mergeResult.MergeResultItems[1].IsEmpty);
Assert.AreEqual(false, mergeResult.MergeResultItems[1].Continious);
Assert.AreEqual(1, mergeResult.MergeResultItems[1].StartIndex);
Assert.AreEqual(1, mergeResult.MergeResultItems[1].EndIndex);
Assert.AreEqual('.', mergeResult.MergeResultItems[1].EndChar);
Assert.AreEqual(1, mergeResult.MergeResultItems[1].EndCharOccurences);
var splitResult = MergeAndSplitHelper.SplitMultipleLines(mergeResult, mergeResult.Text, "en");
Assert.AreEqual(subtitle.Paragraphs.Count, splitResult.Count);
var inputText = string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)).Replace(Environment.NewLine, " ");
var splitResultText = string.Join(" ", splitResult);
Assert.AreEqual(inputText, splitResultText);
}
[TestMethod]
public void Test3()
{
@ -145,6 +178,10 @@ namespace Test.Logic.AutoTranslate
Assert.AreEqual("My name is Peter. And Jones.", splitResult[5]);
Assert.AreEqual("", splitResult[6]);
Assert.AreEqual("Hallo there.", splitResult[7]);
var inputText = string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)).Replace(Environment.NewLine, " ");
var splitResultText = string.Join(" ", splitResult);
Assert.AreEqual(inputText, splitResultText);
}
}
}

View File

@ -19,11 +19,6 @@ namespace Nikse.SubtitleEdit.Forms.Translate
}
var p = sourceSubtitle.Paragraphs[index];
if (p.Text.Contains("{\\", StringComparison.Ordinal) || p.Text.EndsWith(')') || p.Text.StartsWith('-'))
{
return 0;
}
char? splitAtChar = null;
var mergeCount = 0;
var allItalic = false;
@ -31,33 +26,52 @@ namespace Nikse.SubtitleEdit.Forms.Translate
var text = string.Empty;
var linesTranslate = 0;
if (MergeWithThreeNext(sourceSubtitle, index, source.Code))
MergeResult mergeResult = null;
List<Formatting> formattings = null;
if (mergeCount == 0)
{
mergeCount = 3;
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
}
else if (MergeWithTwoNext(sourceSubtitle, index, source.Code))
{
mergeCount = 2;
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
}
else if (MergeWithNext(sourceSubtitle, index, source.Code))
{
mergeCount = 1;
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
var maxChars =
autoTranslator.Name == GoogleTranslateV1.StaticName ||
autoTranslator.Name == ChatGptTranslate.StaticName ||
autoTranslator.Name == MicrosoftTranslator.StaticName
? 1500
: 250;
// Try to handle (remove and save info for later restore) italics, bold, alignment where possible
var s = new Subtitle(sourceSubtitle);
formattings = HandleFormatting(s, index, target.Code);
// Merge text for better translation and save info enough to split again later
mergeResult = MergeMultipleLines(s, index, maxChars);
mergeCount = mergeResult.ParagraphCount;
text = mergeResult.Text;
}
//if (mergeCount == 0 && autoTranslator.Name == GoogleTranslateV1.StaticName || autoTranslator.Name == ChatGptTranslate.StaticName)
//{
// var maxChars = 1500;
// var mergeResult = MergeMultipleLines(sourceSubtitle, index, maxChars);
//}
if (mergeCount == 0)
{
if (MergeWithThreeNext(sourceSubtitle, index, source.Code))
{
mergeCount = 3;
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
}
else if (MergeWithTwoNext(sourceSubtitle, index, source.Code))
{
mergeCount = 2;
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
}
else if (MergeWithNext(sourceSubtitle, index, source.Code))
{
mergeCount = 1;
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
}
}
// just take next sentence too
var next = sourceSubtitle.GetParagraphOrDefault(index + 1);
@ -70,10 +84,32 @@ namespace Nikse.SubtitleEdit.Forms.Translate
text = Utilities.UnbreakLine(p.Text) + Environment.NewLine + Utilities.UnbreakLine(next.Text);
}
if (mergeCount > 0 && !text.Contains("{\\", StringComparison.Ordinal))
if (mergeResult != null)
{
var mergedTranslation = await autoTranslator.Translate(text, source.Code, target.Code);
var splitResult = SplitMultipleLines(mergeResult, mergedTranslation, target.Code);
if (splitResult.Count == mergeCount)
{
var idx = 0;
foreach (var line in splitResult)
{
var s = formattings[idx].ReAddFormatting(line);
targetSubtitle.Paragraphs[index].Text = s;
index++;
linesTranslate++;
idx++;
}
return linesTranslate;
}
}
if (mergeCount > 0)
{
var mergedTranslation = await autoTranslator.Translate(text, source.Code, target.Code);
List<string> result;
if (splitAtChar != null && mergeCount == 1)
{
result = SplitResultAtSplitChar(mergedTranslation, splitAtChar.Value, target.Code);
@ -115,6 +151,22 @@ namespace Nikse.SubtitleEdit.Forms.Translate
return linesTranslate;
}
private static List<Formatting> HandleFormatting(Subtitle sourceSubtitle, int index, string sourceLanguage)
{
var formattings = new List<Formatting>();
for (var i = index; i < sourceSubtitle.Paragraphs.Count; i++)
{
var p = sourceSubtitle.Paragraphs[i];
var f = new Formatting();
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text, sourceLanguage), sourceLanguage);
p.Text = text;
formattings.Add(f);
}
return formattings;
}
public class MergeResultItem
{
public string Text { get; set; }
@ -161,7 +213,7 @@ namespace Nikse.SubtitleEdit.Forms.Translate
{
var p = sourceSubtitle.Paragraphs[i];
if (item != null && Utilities.UrlEncodeLength(item.Text + Environment.NewLine + p.Text) > maxTextSize)
if (item != null && Utilities.UrlEncodeLength(result.Text + Environment.NewLine + p.Text) > maxTextSize)
{
break;
}