From c99b56fd5869a2937bc47ede1b0b6601ac0eeed2 Mon Sep 17 00:00:00 2001 From: Nikolaj Olsson Date: Sun, 3 Dec 2023 16:02:17 +0100 Subject: [PATCH] Testing new auto translate merge-split-helper --- .../AutoTranslate/MergeAndSplitHelperTest.cs | 39 +++++- src/ui/Forms/Translate/MergeAndSplitHelper.cs | 114 +++++++++++++----- 2 files changed, 121 insertions(+), 32 deletions(-) diff --git a/src/Test/Logic/AutoTranslate/MergeAndSplitHelperTest.cs b/src/Test/Logic/AutoTranslate/MergeAndSplitHelperTest.cs index a9dc8e1ec..5e2a6f192 100644 --- a/src/Test/Logic/AutoTranslate/MergeAndSplitHelperTest.cs +++ b/src/Test/Logic/AutoTranslate/MergeAndSplitHelperTest.cs @@ -28,7 +28,10 @@ namespace Test.Logic.AutoTranslate var splitResult = MergeAndSplitHelper.SplitMultipleLines(mergeResult, mergeResult.Text, "en"); Assert.AreEqual(subtitle.Paragraphs.Count, splitResult.Count); - Assert.AreEqual(string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)), string.Join(" ", splitResult)); + + var inputText = string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)).Replace(Environment.NewLine, " "); + var splitResultText = string.Join(" ", splitResult); + Assert.AreEqual(inputText, splitResultText); } [TestMethod] @@ -58,6 +61,36 @@ namespace Test.Logic.AutoTranslate Assert.AreEqual(string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)), string.Join(" ", splitResult)); } + [TestMethod] + public void TestTextForHiWithTextAfter() + { + var subtitle = new Subtitle(); + subtitle.Paragraphs.Add(new Paragraph("", 0, 1000)); + subtitle.Paragraphs.Add(new Paragraph("[Raining]" + Environment.NewLine + "Hallo.", 1, 2000)); + subtitle.Paragraphs.Add(new Paragraph("How are you?", 3000, 4000)); + + var mergeResult = MergeAndSplitHelper.MergeMultipleLines(subtitle, 0, 1500); + + Assert.IsNotNull(mergeResult); + Assert.AreEqual("[Raining]" + Environment.NewLine + "Hallo." + Environment.NewLine + "How are you?", mergeResult.Text); + Assert.AreEqual(subtitle.Paragraphs.Count, mergeResult.ParagraphCount); + Assert.AreEqual(3, mergeResult.MergeResultItems.Count); + Assert.AreEqual(true, mergeResult.MergeResultItems[0].IsEmpty); + Assert.AreEqual(false, mergeResult.MergeResultItems[1].IsEmpty); + Assert.AreEqual(false, mergeResult.MergeResultItems[1].Continious); + Assert.AreEqual(1, mergeResult.MergeResultItems[1].StartIndex); + Assert.AreEqual(1, mergeResult.MergeResultItems[1].EndIndex); + Assert.AreEqual('.', mergeResult.MergeResultItems[1].EndChar); + Assert.AreEqual(1, mergeResult.MergeResultItems[1].EndCharOccurences); + + var splitResult = MergeAndSplitHelper.SplitMultipleLines(mergeResult, mergeResult.Text, "en"); + Assert.AreEqual(subtitle.Paragraphs.Count, splitResult.Count); + + var inputText = string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)).Replace(Environment.NewLine, " "); + var splitResultText = string.Join(" ", splitResult); + Assert.AreEqual(inputText, splitResultText); + } + [TestMethod] public void Test3() { @@ -145,6 +178,10 @@ namespace Test.Logic.AutoTranslate Assert.AreEqual("My name is Peter. And Jones.", splitResult[5]); Assert.AreEqual("", splitResult[6]); Assert.AreEqual("Hallo there.", splitResult[7]); + + var inputText = string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)).Replace(Environment.NewLine, " "); + var splitResultText = string.Join(" ", splitResult); + Assert.AreEqual(inputText, splitResultText); } } } diff --git a/src/ui/Forms/Translate/MergeAndSplitHelper.cs b/src/ui/Forms/Translate/MergeAndSplitHelper.cs index 749a5ffb2..6f40a4eb7 100644 --- a/src/ui/Forms/Translate/MergeAndSplitHelper.cs +++ b/src/ui/Forms/Translate/MergeAndSplitHelper.cs @@ -19,11 +19,6 @@ namespace Nikse.SubtitleEdit.Forms.Translate } var p = sourceSubtitle.Paragraphs[index]; - if (p.Text.Contains("{\\", StringComparison.Ordinal) || p.Text.EndsWith(')') || p.Text.StartsWith('-')) - { - return 0; - } - char? splitAtChar = null; var mergeCount = 0; var allItalic = false; @@ -31,33 +26,52 @@ namespace Nikse.SubtitleEdit.Forms.Translate var text = string.Empty; var linesTranslate = 0; - if (MergeWithThreeNext(sourceSubtitle, index, source.Code)) + MergeResult mergeResult = null; + List formattings = null; + + if (mergeCount == 0) { - mergeCount = 3; - allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i"); - allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b"); - text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold); - } - else if (MergeWithTwoNext(sourceSubtitle, index, source.Code)) - { - mergeCount = 2; - allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i"); - allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b"); - text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold); - } - else if (MergeWithNext(sourceSubtitle, index, source.Code)) - { - mergeCount = 1; - allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i"); - allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b"); - text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold); + var maxChars = + autoTranslator.Name == GoogleTranslateV1.StaticName || + autoTranslator.Name == ChatGptTranslate.StaticName || + autoTranslator.Name == MicrosoftTranslator.StaticName + ? 1500 + : 250; + + // Try to handle (remove and save info for later restore) italics, bold, alignment where possible + var s = new Subtitle(sourceSubtitle); + formattings = HandleFormatting(s, index, target.Code); + + // Merge text for better translation and save info enough to split again later + mergeResult = MergeMultipleLines(s, index, maxChars); + mergeCount = mergeResult.ParagraphCount; + text = mergeResult.Text; } - //if (mergeCount == 0 && autoTranslator.Name == GoogleTranslateV1.StaticName || autoTranslator.Name == ChatGptTranslate.StaticName) - //{ - // var maxChars = 1500; - // var mergeResult = MergeMultipleLines(sourceSubtitle, index, maxChars); - //} + if (mergeCount == 0) + { + if (MergeWithThreeNext(sourceSubtitle, index, source.Code)) + { + mergeCount = 3; + allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i"); + allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b"); + text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold); + } + else if (MergeWithTwoNext(sourceSubtitle, index, source.Code)) + { + mergeCount = 2; + allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i"); + allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b"); + text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold); + } + else if (MergeWithNext(sourceSubtitle, index, source.Code)) + { + mergeCount = 1; + allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i"); + allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b"); + text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold); + } + } // just take next sentence too var next = sourceSubtitle.GetParagraphOrDefault(index + 1); @@ -70,10 +84,32 @@ namespace Nikse.SubtitleEdit.Forms.Translate text = Utilities.UnbreakLine(p.Text) + Environment.NewLine + Utilities.UnbreakLine(next.Text); } - if (mergeCount > 0 && !text.Contains("{\\", StringComparison.Ordinal)) + if (mergeResult != null) { var mergedTranslation = await autoTranslator.Translate(text, source.Code, target.Code); + var splitResult = SplitMultipleLines(mergeResult, mergedTranslation, target.Code); + if (splitResult.Count == mergeCount) + { + var idx = 0; + foreach (var line in splitResult) + { + var s = formattings[idx].ReAddFormatting(line); + targetSubtitle.Paragraphs[index].Text = s; + index++; + linesTranslate++; + idx++; + } + + return linesTranslate; + } + } + + if (mergeCount > 0) + { + var mergedTranslation = await autoTranslator.Translate(text, source.Code, target.Code); + List result; + if (splitAtChar != null && mergeCount == 1) { result = SplitResultAtSplitChar(mergedTranslation, splitAtChar.Value, target.Code); @@ -115,6 +151,22 @@ namespace Nikse.SubtitleEdit.Forms.Translate return linesTranslate; } + private static List HandleFormatting(Subtitle sourceSubtitle, int index, string sourceLanguage) + { + var formattings = new List(); + + for (var i = index; i < sourceSubtitle.Paragraphs.Count; i++) + { + var p = sourceSubtitle.Paragraphs[i]; + var f = new Formatting(); + var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text, sourceLanguage), sourceLanguage); + p.Text = text; + formattings.Add(f); + } + + return formattings; + } + public class MergeResultItem { public string Text { get; set; } @@ -161,7 +213,7 @@ namespace Nikse.SubtitleEdit.Forms.Translate { var p = sourceSubtitle.Paragraphs[i]; - if (item != null && Utilities.UrlEncodeLength(item.Text + Environment.NewLine + p.Text) > maxTextSize) + if (item != null && Utilities.UrlEncodeLength(result.Text + Environment.NewLine + p.Text) > maxTextSize) { break; }