Improve auto translate w auto merge/split - thx xmrea :)

Fix #4238
Can be turned off with the setting "TranslateAllowSplit"
This commit is contained in:
Nikolaj Olsson 2020-06-11 19:32:51 +02:00
parent 0a6fd71d01
commit 78113946b8
6 changed files with 178 additions and 118 deletions

View File

@ -141,10 +141,9 @@ namespace Nikse.SubtitleEdit.Core
public string GoogleApiV2Key { get; set; }
public bool GoogleApiV2KeyInfoShow { get; set; }
public bool GoogleTranslateNoKeyWarningShow { get; set; }
public bool UseGooleApiPaidService { get; set; }
public int GoogleApiV1ChunkSize { get; set; }
public string GoogleTranslateLastTargetLanguage { get; set; }
public bool TranslateAutoSplit { get; set; }
public bool TranslateAllowSplit { get; set; }
public bool ListViewSyntaxColorDurationSmall { get; set; }
public bool ListViewSyntaxColorDurationBig { get; set; }
public bool ListViewSyntaxColorOverlap { get; set; }
@ -332,10 +331,9 @@ namespace Nikse.SubtitleEdit.Core
MicrosoftTranslatorTokenEndpoint = "https://api.cognitive.microsoft.com/sts/v1.0/issueToken";
GoogleApiV2KeyInfoShow = true;
GoogleTranslateNoKeyWarningShow = true;
UseGooleApiPaidService = false;
GoogleApiV1ChunkSize = 1500;
GoogleTranslateLastTargetLanguage = "en";
TranslateAutoSplit = true;
TranslateAllowSplit = true;
CheckOneLetterWords = true;
SpellCheckEnglishAllowInQuoteAsIng = false;
SpellCheckShowCompletedMessage = true;
@ -3320,12 +3318,6 @@ $HorzAlign = Center
settings.Tools.GoogleApiV2KeyInfoShow = Convert.ToBoolean(subNode.InnerText);
}
subNode = node.SelectSingleNode("UseGooleApiPaidService");
if (subNode != null)
{
settings.Tools.UseGooleApiPaidService = Convert.ToBoolean(subNode.InnerText);
}
subNode = node.SelectSingleNode("GoogleApiV1ChunkSize");
if (subNode != null)
{
@ -3338,10 +3330,10 @@ $HorzAlign = Center
settings.Tools.GoogleTranslateLastTargetLanguage = subNode.InnerText;
}
subNode = node.SelectSingleNode("TranslateAutoSplit");
subNode = node.SelectSingleNode("TranslateAllowSplit");
if (subNode != null)
{
settings.Tools.TranslateAutoSplit = Convert.ToBoolean(subNode.InnerText);
settings.Tools.TranslateAllowSplit = Convert.ToBoolean(subNode.InnerText);
}
subNode = node.SelectSingleNode("ListViewSyntaxColorDurationSmall");
@ -7188,10 +7180,9 @@ $HorzAlign = Center
textWriter.WriteElementString("GoogleApiV2Key", settings.Tools.GoogleApiV2Key);
textWriter.WriteElementString("GoogleApiV2KeyInfoShow", settings.Tools.GoogleApiV2KeyInfoShow.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("GoogleTranslateNoKeyWarningShow", settings.Tools.GoogleTranslateNoKeyWarningShow.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("UseGooleApiPaidService", settings.Tools.UseGooleApiPaidService.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("GoogleApiV1ChunkSize", settings.Tools.GoogleApiV1ChunkSize.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("GoogleTranslateLastTargetLanguage", settings.Tools.GoogleTranslateLastTargetLanguage);
textWriter.WriteElementString("TranslateAutoSplit", settings.Tools.TranslateAutoSplit.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("TranslateAllowSplit", settings.Tools.TranslateAllowSplit.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("ListViewSyntaxColorDurationSmall", settings.Tools.ListViewSyntaxColorDurationSmall.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("ListViewSyntaxColorDurationBig", settings.Tools.ListViewSyntaxColorDurationBig.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("ListViewSyntaxColorLongLines", settings.Tools.ListViewSyntaxColorLongLines.ToString(CultureInfo.InvariantCulture));

View File

@ -6,6 +6,11 @@ namespace Nikse.SubtitleEdit.Core.Translate
{
public class Formatting
{
private static readonly List<string> LanguagesAllowingLineMerging = new List<string>
{
"en", "da", "nl", "de", "sv", "nb", "fr", "it", "tr", "es", "pt", "sr", "ru", "lv", "lt", "et", "ro", "pl", "ar", "he", "no"
};
private bool Italic { get; set; }
private string Font { get; set; }
private bool ItalicTwoLines { get; set; }
@ -14,8 +19,20 @@ namespace Nikse.SubtitleEdit.Core.Translate
private bool SquareBrackets { get; set; }
private bool SquareBracketsUppercase { get; set; }
public string SetTagsAndReturnTrimmed(string input, string source)
private int BreakNumberOfLines { get; set; }
private bool BreakSplitAtLineEnding { get; set; }
private bool BreakIsDialog { get; set; }
public bool SkipNext { get; set; }
public string SetTagsAndReturnTrimmed(string input, string sourceLanguage, string inputNext)
{
if (string.IsNullOrEmpty(input))
{
return string.Empty;
}
var text = input.Trim();
// SSA/ASS tags
@ -51,20 +68,6 @@ namespace Nikse.SubtitleEdit.Core.Translate
text = text.Remove(text.Length - "</font>".Length);
}
// Un-break line
var allowedLanguages = new List<string> { "en", "da", "nl", "de", "sv", "nb", "fr", "it" };
if (allowedLanguages.Contains(source))
{
var lines = HtmlUtil.RemoveHtmlTags(text).SplitToLines();
if (lines.Count == 2 && !string.IsNullOrEmpty(lines[0]) && !string.IsNullOrEmpty(lines[1]) &&
char.IsLetterOrDigit(lines[0][lines[0].Length - 1]) &&
char.IsLower(lines[1][0]))
{
text = Utilities.UnbreakLine(text);
AutoBreak = true;
}
}
// Square brackets
if (text.StartsWith("[", StringComparison.Ordinal) && text.EndsWith("]", StringComparison.Ordinal) &&
Utilities.GetNumberOfLines(text) == 1 && Utilities.CountTagInText(text, "[") == 1 &&
@ -82,12 +85,37 @@ namespace Nikse.SubtitleEdit.Core.Translate
text = text.Replace("[", string.Empty).Replace("]", string.Empty);
}
// Un-break line
if (LanguagesAllowingLineMerging.Contains(sourceLanguage))
{
var lines = HtmlUtil.RemoveHtmlTags(text).SplitToLines();
if (lines.Count == 2 && !string.IsNullOrEmpty(lines[0]) && !string.IsNullOrEmpty(lines[1]) &&
char.IsLetterOrDigit(lines[0][lines[0].Length - 1]) &&
char.IsLower(lines[1][0]))
{
text = Utilities.UnbreakLine(text);
AutoBreak = true;
}
if (Configuration.Settings.Tools.TranslateAllowSplit &&
!string.IsNullOrEmpty(inputNext) && !string.IsNullOrEmpty(text) &&
(char.IsLetterOrDigit(text[text.Length - 1]) || text[text.Length - 1] == ',' || sourceLanguage == "ar" && text[text.Length - 1] == '\u060C') &&
char.IsLower(inputNext[0]) &&
!text.Contains('-') && !inputNext.Contains('-') && !Italic && !SquareBrackets && string.IsNullOrEmpty(Font))
{
text = Utilities.UnbreakLine(text);
text = text + " " + Utilities.UnbreakLine(inputNext);
SkipNext = true;
}
}
return text.Trim();
}
public string ReAddFormatting(string input)
public string ReAddFormatting(string input, out string nextText)
{
var text = input.Trim();
nextText = null;
// Auto-break line
if (AutoBreak)
@ -95,6 +123,31 @@ namespace Nikse.SubtitleEdit.Core.Translate
text = Utilities.AutoBreakLine(text);
}
if (SkipNext)
{
var lines = Utilities.AutoBreakLine(text).SplitToLines();
if (lines.Count == 1)
{
nextText = string.Empty;
}
else if (lines.Count == 2)
{
text = Utilities.AutoBreakLine(lines[0]);
nextText = Utilities.AutoBreakLine(lines[1]);
}
else
{
text = Utilities.AutoBreakLine(lines[0] + " " + lines[1]);
var sb = new StringBuilder();
for (int i = 2; i < lines.Count; i++)
{
sb.Append(lines[i]);
sb.Append(" ");
}
nextText = Utilities.AutoBreakLine(sb.ToString().TrimEnd());
}
}
// Square brackets
if (SquareBracketsUppercase)
{
@ -132,11 +185,6 @@ namespace Nikse.SubtitleEdit.Core.Translate
return text;
}
private int BreakNumberOfLines { get; set; }
private bool BreakSplitAtLineEnding { get; set; }
private bool BreakIsDialog { get; set; }
public string UnBreak(string text, string source)
{
var lines = source.SplitToLines();

View File

@ -34,18 +34,37 @@ namespace Nikse.SubtitleEdit.Core.Translate
{
string result;
var input = new StringBuilder();
var formatList = new Formatting[paragraphs.Count];
var formatList = new List<Formatting>();
bool skipNext = false;
for (var index = 0; index < paragraphs.Count; index++)
{
if (skipNext)
{
skipNext = false;
continue;
}
var p = paragraphs[index];
var f = new Formatting();
formatList[index] = f;
formatList.Add(f);
if (input.Length > 0)
{
input.Append(" " + SplitChar + " ");
}
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text.Replace(SplitChar.ToString(), string.Empty), sourceLanguage), sourceLanguage);
text = f.UnBreak(text, p.Text);
var nextText = string.Empty;
if (index < paragraphs.Count - 1 && paragraphs[index + 1].StartTime.TotalMilliseconds - p.EndTime.TotalMilliseconds < 200)
{
nextText = paragraphs[index + 1].Text;
}
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text.Replace(SplitChar.ToString(), string.Empty), sourceLanguage), sourceLanguage, nextText);
skipNext = f.SkipNext;
if (!skipNext)
{
text = f.UnBreak(text, p.Text);
}
input.Append(text);
}
@ -114,13 +133,22 @@ namespace Nikse.SubtitleEdit.Core.Translate
s = s.Replace(Environment.NewLine + " ", Environment.NewLine);
s = s.Replace(" " + Environment.NewLine, Environment.NewLine);
s = s.Replace(" " + Environment.NewLine, Environment.NewLine).Trim();
if (formatList.Length > index)
string nextText = null;
if (formatList.Count > index)
{
s = formatList[index].ReAddFormatting(s);
s = formatList[index].ReBreak(s, targetLanguage);
s = formatList[index].ReAddFormatting(s, out nextText);
if (nextText == null)
{
s = formatList[index].ReBreak(s, targetLanguage);
}
}
resultList.Add(s);
if (nextText != null)
{
resultList.Add(nextText);
}
}
if (resultList.Count > paragraphs.Count)

View File

@ -153,18 +153,37 @@ namespace Nikse.SubtitleEdit.Core.Translate
var baseUrl = "https://translation.googleapis.com/language/translate/v2";
var format = "text";
var input = new StringBuilder();
var formattings = new Formatting[paragraphs.Count];
var formatList = new List<Formatting>();
bool skipNext = false;
for (var index = 0; index < paragraphs.Count; index++)
{
if (skipNext)
{
skipNext = false;
continue;
}
var p = paragraphs[index];
var f = new Formatting();
formattings[index] = f;
formatList.Add(f);
if (input.Length > 0)
{
input.Append("&");
}
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text, sourceLanguage), sourceLanguage);
var nextText = string.Empty;
if (index < paragraphs.Count - 1 && paragraphs[index + 1].StartTime.TotalMilliseconds - p.EndTime.TotalMilliseconds < 200)
{
nextText = paragraphs[index + 1].Text;
}
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text, sourceLanguage), sourceLanguage, nextText);
skipNext = f.SkipNext;
if (!skipNext)
{
text = f.UnBreak(text, p.Text);
}
input.Append("q=" + Utilities.UrlEncode(text));
}
@ -198,14 +217,25 @@ namespace Nikse.SubtitleEdit.Core.Translate
{
if (v2[innerKey2] is string translatedText)
{
string nextText = null;
translatedText = Regex.Unescape(translatedText);
translatedText = string.Join(Environment.NewLine, translatedText.SplitToLines());
translatedText = TranslationHelper.PostTranslate(translatedText, targetLanguage);
if (resultList.Count < formattings.Length)
if (resultList.Count < formatList.Count)
{
translatedText = formattings[resultList.Count].ReAddFormatting(translatedText);
translatedText = formatList[resultList.Count].ReAddFormatting(translatedText, out nextText);
if (nextText == null)
{
translatedText = formatList[resultList.Count].ReBreak(translatedText, targetLanguage);
}
}
resultList.Add(translatedText);
if (nextText != null)
{
resultList.Add(nextText);
}
}
}
}

View File

@ -106,9 +106,16 @@ namespace Nikse.SubtitleEdit.Core.Translate
var jsonBuilder = new StringBuilder();
jsonBuilder.Append("[");
bool isFirst = true;
var formatList = new Formatting[paragraphs.Count];
bool skipNext = false;
var formatList = new List<Formatting>();
for (var index = 0; index < paragraphs.Count; index++)
{
if (skipNext)
{
skipNext = false;
continue;
}
var p = paragraphs[index];
if (!isFirst)
{
@ -119,10 +126,20 @@ namespace Nikse.SubtitleEdit.Core.Translate
isFirst = false;
}
var nextText = string.Empty;
if (index < paragraphs.Count - 1 && paragraphs[index + 1].StartTime.TotalMilliseconds - p.EndTime.TotalMilliseconds < 200)
{
nextText = paragraphs[index + 1].Text;
}
var f = new Formatting();
formatList[index] = f;
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text, sourceLanguage), sourceLanguage);
text = f.UnBreak(text, p.Text);
formatList.Add(f);
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text, sourceLanguage), sourceLanguage, nextText);
skipNext = f.SkipNext;
if (!skipNext)
{
text = f.UnBreak(text, p.Text);
}
jsonBuilder.Append("{ \"Text\":\"" + Json.EncodeJsonText(text) + "\"}");
}
@ -153,15 +170,25 @@ namespace Nikse.SubtitleEdit.Core.Translate
var textDics = (Dictionary<string, object>)o;
var res = (string)textDics["text"];
if (formatList.Length > results.Count)
string nextText = null;
if (formatList.Count > results.Count)
{
res = formatList[results.Count].ReAddFormatting(res);
res = formatList[results.Count].ReBreak(res, targetLanguage);
res = formatList[results.Count].ReAddFormatting(res, out nextText);
if (nextText == null)
{
res = formatList[results.Count].ReBreak(res, targetLanguage);
}
}
res = TranslationHelper.PostTranslate(res, targetLanguage);
results.Add(res);
if (nextText != null)
{
results.Add(nextText);
}
}
}
}

View File

@ -368,57 +368,6 @@ namespace Nikse.SubtitleEdit.Forms
}
}
private string SetFormattingTypeAndSplitting(int i, string text, bool skipSplit)
{
text = text.Trim();
if (text.StartsWith("<i>", StringComparison.Ordinal) && text.EndsWith("</i>", StringComparison.Ordinal) && text.Contains("</i>" + Environment.NewLine + "<i>") && Utilities.GetNumberOfLines(text) == 2 && Utilities.CountTagInText(text, "<i>") == 2)
{
_formattingTypes[i] = FormattingType.ItalicTwoLines;
text = HtmlUtil.RemoveOpenCloseTags(text, HtmlUtil.TagItalic);
}
else if (text.StartsWith("<i>", StringComparison.Ordinal) && text.EndsWith("</i>", StringComparison.Ordinal) && Utilities.CountTagInText(text, "<i>") == 1)
{
_formattingTypes[i] = FormattingType.Italic;
text = text.Substring(3, text.Length - 7);
}
else
{
_formattingTypes[i] = FormattingType.None;
}
if (skipSplit)
{
return text;
}
var lines = text.SplitToLines();
if (Configuration.Settings.Tools.TranslateAutoSplit && lines.Count == 2 && !string.IsNullOrEmpty(lines[0]) && (Utilities.AllLettersAndNumbers + ",").Contains(lines[0].Substring(lines[0].Length - 1)))
{
_autoSplit[i] = true;
text = Utilities.RemoveLineBreaks(text);
}
return text;
}
private void FillTranslatedText(string translatedText, int start, int end)
{
int index = start;
foreach (string s in SplitToLines(translatedText))
{
if (index < TranslatedSubtitle.Paragraphs.Count)
{
var cleanText = CleanText(s, index);
TranslatedSubtitle.Paragraphs[index].Text = cleanText;
}
index++;
}
subtitleListViewTo.BeginUpdate();
subtitleListViewTo.Fill(TranslatedSubtitle);
subtitleListViewTo.SelectIndexAndEnsureVisible(end);
subtitleListViewTo.EndUpdate();
}
private string CleanText(string s, int index)
{
string cleanText = s.Replace("</p>", string.Empty).Trim();
@ -485,19 +434,6 @@ namespace Nikse.SubtitleEdit.Forms
return cleanText;
}
private List<string> SplitToLines(string translatedText)
{
if (!_googleTranslate)
{
translatedText = translatedText.Replace("+- +", "+-+");
translatedText = translatedText.Replace("+ -+", "+-+");
translatedText = translatedText.Replace("+ - +", "+-+");
translatedText = translatedText.Replace("+ +", "+-+");
translatedText = translatedText.Replace("+-+", "\0");
}
return translatedText.Split('\0').ToList();
}
public void FillComboWithLanguages(ComboBox comboBox)
{
if (!_googleTranslate)