mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-10-27 14:32:35 +01:00
Testing new auto translate merge-split-helper
This commit is contained in:
parent
13db58f377
commit
c99b56fd58
@ -28,7 +28,10 @@ namespace Test.Logic.AutoTranslate
|
|||||||
|
|
||||||
var splitResult = MergeAndSplitHelper.SplitMultipleLines(mergeResult, mergeResult.Text, "en");
|
var splitResult = MergeAndSplitHelper.SplitMultipleLines(mergeResult, mergeResult.Text, "en");
|
||||||
Assert.AreEqual(subtitle.Paragraphs.Count, splitResult.Count);
|
Assert.AreEqual(subtitle.Paragraphs.Count, splitResult.Count);
|
||||||
Assert.AreEqual(string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)), string.Join(" ", splitResult));
|
|
||||||
|
var inputText = string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)).Replace(Environment.NewLine, " ");
|
||||||
|
var splitResultText = string.Join(" ", splitResult);
|
||||||
|
Assert.AreEqual(inputText, splitResultText);
|
||||||
}
|
}
|
||||||
|
|
||||||
[TestMethod]
|
[TestMethod]
|
||||||
@ -58,6 +61,36 @@ namespace Test.Logic.AutoTranslate
|
|||||||
Assert.AreEqual(string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)), string.Join(" ", splitResult));
|
Assert.AreEqual(string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)), string.Join(" ", splitResult));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[TestMethod]
|
||||||
|
public void TestTextForHiWithTextAfter()
|
||||||
|
{
|
||||||
|
var subtitle = new Subtitle();
|
||||||
|
subtitle.Paragraphs.Add(new Paragraph("", 0, 1000));
|
||||||
|
subtitle.Paragraphs.Add(new Paragraph("[Raining]" + Environment.NewLine + "Hallo.", 1, 2000));
|
||||||
|
subtitle.Paragraphs.Add(new Paragraph("How are you?", 3000, 4000));
|
||||||
|
|
||||||
|
var mergeResult = MergeAndSplitHelper.MergeMultipleLines(subtitle, 0, 1500);
|
||||||
|
|
||||||
|
Assert.IsNotNull(mergeResult);
|
||||||
|
Assert.AreEqual("[Raining]" + Environment.NewLine + "Hallo." + Environment.NewLine + "How are you?", mergeResult.Text);
|
||||||
|
Assert.AreEqual(subtitle.Paragraphs.Count, mergeResult.ParagraphCount);
|
||||||
|
Assert.AreEqual(3, mergeResult.MergeResultItems.Count);
|
||||||
|
Assert.AreEqual(true, mergeResult.MergeResultItems[0].IsEmpty);
|
||||||
|
Assert.AreEqual(false, mergeResult.MergeResultItems[1].IsEmpty);
|
||||||
|
Assert.AreEqual(false, mergeResult.MergeResultItems[1].Continious);
|
||||||
|
Assert.AreEqual(1, mergeResult.MergeResultItems[1].StartIndex);
|
||||||
|
Assert.AreEqual(1, mergeResult.MergeResultItems[1].EndIndex);
|
||||||
|
Assert.AreEqual('.', mergeResult.MergeResultItems[1].EndChar);
|
||||||
|
Assert.AreEqual(1, mergeResult.MergeResultItems[1].EndCharOccurences);
|
||||||
|
|
||||||
|
var splitResult = MergeAndSplitHelper.SplitMultipleLines(mergeResult, mergeResult.Text, "en");
|
||||||
|
Assert.AreEqual(subtitle.Paragraphs.Count, splitResult.Count);
|
||||||
|
|
||||||
|
var inputText = string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)).Replace(Environment.NewLine, " ");
|
||||||
|
var splitResultText = string.Join(" ", splitResult);
|
||||||
|
Assert.AreEqual(inputText, splitResultText);
|
||||||
|
}
|
||||||
|
|
||||||
[TestMethod]
|
[TestMethod]
|
||||||
public void Test3()
|
public void Test3()
|
||||||
{
|
{
|
||||||
@ -145,6 +178,10 @@ namespace Test.Logic.AutoTranslate
|
|||||||
Assert.AreEqual("My name is Peter. And Jones.", splitResult[5]);
|
Assert.AreEqual("My name is Peter. And Jones.", splitResult[5]);
|
||||||
Assert.AreEqual("", splitResult[6]);
|
Assert.AreEqual("", splitResult[6]);
|
||||||
Assert.AreEqual("Hallo there.", splitResult[7]);
|
Assert.AreEqual("Hallo there.", splitResult[7]);
|
||||||
|
|
||||||
|
var inputText = string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)).Replace(Environment.NewLine, " ");
|
||||||
|
var splitResultText = string.Join(" ", splitResult);
|
||||||
|
Assert.AreEqual(inputText, splitResultText);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -19,11 +19,6 @@ namespace Nikse.SubtitleEdit.Forms.Translate
|
|||||||
}
|
}
|
||||||
|
|
||||||
var p = sourceSubtitle.Paragraphs[index];
|
var p = sourceSubtitle.Paragraphs[index];
|
||||||
if (p.Text.Contains("{\\", StringComparison.Ordinal) || p.Text.EndsWith(')') || p.Text.StartsWith('-'))
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
char? splitAtChar = null;
|
char? splitAtChar = null;
|
||||||
var mergeCount = 0;
|
var mergeCount = 0;
|
||||||
var allItalic = false;
|
var allItalic = false;
|
||||||
@ -31,33 +26,52 @@ namespace Nikse.SubtitleEdit.Forms.Translate
|
|||||||
var text = string.Empty;
|
var text = string.Empty;
|
||||||
var linesTranslate = 0;
|
var linesTranslate = 0;
|
||||||
|
|
||||||
if (MergeWithThreeNext(sourceSubtitle, index, source.Code))
|
MergeResult mergeResult = null;
|
||||||
|
List<Formatting> formattings = null;
|
||||||
|
|
||||||
|
if (mergeCount == 0)
|
||||||
{
|
{
|
||||||
mergeCount = 3;
|
var maxChars =
|
||||||
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
|
autoTranslator.Name == GoogleTranslateV1.StaticName ||
|
||||||
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
|
autoTranslator.Name == ChatGptTranslate.StaticName ||
|
||||||
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
|
autoTranslator.Name == MicrosoftTranslator.StaticName
|
||||||
}
|
? 1500
|
||||||
else if (MergeWithTwoNext(sourceSubtitle, index, source.Code))
|
: 250;
|
||||||
{
|
|
||||||
mergeCount = 2;
|
// Try to handle (remove and save info for later restore) italics, bold, alignment where possible
|
||||||
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
|
var s = new Subtitle(sourceSubtitle);
|
||||||
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
|
formattings = HandleFormatting(s, index, target.Code);
|
||||||
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
|
|
||||||
}
|
// Merge text for better translation and save info enough to split again later
|
||||||
else if (MergeWithNext(sourceSubtitle, index, source.Code))
|
mergeResult = MergeMultipleLines(s, index, maxChars);
|
||||||
{
|
mergeCount = mergeResult.ParagraphCount;
|
||||||
mergeCount = 1;
|
text = mergeResult.Text;
|
||||||
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
|
|
||||||
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
|
|
||||||
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//if (mergeCount == 0 && autoTranslator.Name == GoogleTranslateV1.StaticName || autoTranslator.Name == ChatGptTranslate.StaticName)
|
if (mergeCount == 0)
|
||||||
//{
|
{
|
||||||
// var maxChars = 1500;
|
if (MergeWithThreeNext(sourceSubtitle, index, source.Code))
|
||||||
// var mergeResult = MergeMultipleLines(sourceSubtitle, index, maxChars);
|
{
|
||||||
//}
|
mergeCount = 3;
|
||||||
|
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
|
||||||
|
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
|
||||||
|
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
|
||||||
|
}
|
||||||
|
else if (MergeWithTwoNext(sourceSubtitle, index, source.Code))
|
||||||
|
{
|
||||||
|
mergeCount = 2;
|
||||||
|
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
|
||||||
|
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
|
||||||
|
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
|
||||||
|
}
|
||||||
|
else if (MergeWithNext(sourceSubtitle, index, source.Code))
|
||||||
|
{
|
||||||
|
mergeCount = 1;
|
||||||
|
allItalic = HasAllLinesTag(sourceSubtitle, index, mergeCount, "i");
|
||||||
|
allBold = HasAllLinesTag(sourceSubtitle, index, mergeCount, "b");
|
||||||
|
text = MergeLines(sourceSubtitle, index, mergeCount, allItalic, allBold);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// just take next sentence too
|
// just take next sentence too
|
||||||
var next = sourceSubtitle.GetParagraphOrDefault(index + 1);
|
var next = sourceSubtitle.GetParagraphOrDefault(index + 1);
|
||||||
@ -70,10 +84,32 @@ namespace Nikse.SubtitleEdit.Forms.Translate
|
|||||||
text = Utilities.UnbreakLine(p.Text) + Environment.NewLine + Utilities.UnbreakLine(next.Text);
|
text = Utilities.UnbreakLine(p.Text) + Environment.NewLine + Utilities.UnbreakLine(next.Text);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mergeCount > 0 && !text.Contains("{\\", StringComparison.Ordinal))
|
if (mergeResult != null)
|
||||||
{
|
{
|
||||||
var mergedTranslation = await autoTranslator.Translate(text, source.Code, target.Code);
|
var mergedTranslation = await autoTranslator.Translate(text, source.Code, target.Code);
|
||||||
|
var splitResult = SplitMultipleLines(mergeResult, mergedTranslation, target.Code);
|
||||||
|
if (splitResult.Count == mergeCount)
|
||||||
|
{
|
||||||
|
var idx = 0;
|
||||||
|
foreach (var line in splitResult)
|
||||||
|
{
|
||||||
|
var s = formattings[idx].ReAddFormatting(line);
|
||||||
|
targetSubtitle.Paragraphs[index].Text = s;
|
||||||
|
index++;
|
||||||
|
linesTranslate++;
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return linesTranslate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mergeCount > 0)
|
||||||
|
{
|
||||||
|
var mergedTranslation = await autoTranslator.Translate(text, source.Code, target.Code);
|
||||||
|
|
||||||
List<string> result;
|
List<string> result;
|
||||||
|
|
||||||
if (splitAtChar != null && mergeCount == 1)
|
if (splitAtChar != null && mergeCount == 1)
|
||||||
{
|
{
|
||||||
result = SplitResultAtSplitChar(mergedTranslation, splitAtChar.Value, target.Code);
|
result = SplitResultAtSplitChar(mergedTranslation, splitAtChar.Value, target.Code);
|
||||||
@ -115,6 +151,22 @@ namespace Nikse.SubtitleEdit.Forms.Translate
|
|||||||
return linesTranslate;
|
return linesTranslate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static List<Formatting> HandleFormatting(Subtitle sourceSubtitle, int index, string sourceLanguage)
|
||||||
|
{
|
||||||
|
var formattings = new List<Formatting>();
|
||||||
|
|
||||||
|
for (var i = index; i < sourceSubtitle.Paragraphs.Count; i++)
|
||||||
|
{
|
||||||
|
var p = sourceSubtitle.Paragraphs[i];
|
||||||
|
var f = new Formatting();
|
||||||
|
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text, sourceLanguage), sourceLanguage);
|
||||||
|
p.Text = text;
|
||||||
|
formattings.Add(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
return formattings;
|
||||||
|
}
|
||||||
|
|
||||||
public class MergeResultItem
|
public class MergeResultItem
|
||||||
{
|
{
|
||||||
public string Text { get; set; }
|
public string Text { get; set; }
|
||||||
@ -161,7 +213,7 @@ namespace Nikse.SubtitleEdit.Forms.Translate
|
|||||||
{
|
{
|
||||||
var p = sourceSubtitle.Paragraphs[i];
|
var p = sourceSubtitle.Paragraphs[i];
|
||||||
|
|
||||||
if (item != null && Utilities.UrlEncodeLength(item.Text + Environment.NewLine + p.Text) > maxTextSize)
|
if (item != null && Utilities.UrlEncodeLength(result.Text + Environment.NewLine + p.Text) > maxTextSize)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user