mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-10-27 14:32:35 +01:00
Improve auto translate w auto merge/split - thx xmrea :)
Fix #4238 Can be turned off with the setting "TranslateAllowSplit"
This commit is contained in:
parent
0a6fd71d01
commit
78113946b8
@ -141,10 +141,9 @@ namespace Nikse.SubtitleEdit.Core
|
||||
public string GoogleApiV2Key { get; set; }
|
||||
public bool GoogleApiV2KeyInfoShow { get; set; }
|
||||
public bool GoogleTranslateNoKeyWarningShow { get; set; }
|
||||
public bool UseGooleApiPaidService { get; set; }
|
||||
public int GoogleApiV1ChunkSize { get; set; }
|
||||
public string GoogleTranslateLastTargetLanguage { get; set; }
|
||||
public bool TranslateAutoSplit { get; set; }
|
||||
public bool TranslateAllowSplit { get; set; }
|
||||
public bool ListViewSyntaxColorDurationSmall { get; set; }
|
||||
public bool ListViewSyntaxColorDurationBig { get; set; }
|
||||
public bool ListViewSyntaxColorOverlap { get; set; }
|
||||
@ -332,10 +331,9 @@ namespace Nikse.SubtitleEdit.Core
|
||||
MicrosoftTranslatorTokenEndpoint = "https://api.cognitive.microsoft.com/sts/v1.0/issueToken";
|
||||
GoogleApiV2KeyInfoShow = true;
|
||||
GoogleTranslateNoKeyWarningShow = true;
|
||||
UseGooleApiPaidService = false;
|
||||
GoogleApiV1ChunkSize = 1500;
|
||||
GoogleTranslateLastTargetLanguage = "en";
|
||||
TranslateAutoSplit = true;
|
||||
TranslateAllowSplit = true;
|
||||
CheckOneLetterWords = true;
|
||||
SpellCheckEnglishAllowInQuoteAsIng = false;
|
||||
SpellCheckShowCompletedMessage = true;
|
||||
@ -3320,12 +3318,6 @@ $HorzAlign = Center
|
||||
settings.Tools.GoogleApiV2KeyInfoShow = Convert.ToBoolean(subNode.InnerText);
|
||||
}
|
||||
|
||||
subNode = node.SelectSingleNode("UseGooleApiPaidService");
|
||||
if (subNode != null)
|
||||
{
|
||||
settings.Tools.UseGooleApiPaidService = Convert.ToBoolean(subNode.InnerText);
|
||||
}
|
||||
|
||||
subNode = node.SelectSingleNode("GoogleApiV1ChunkSize");
|
||||
if (subNode != null)
|
||||
{
|
||||
@ -3338,10 +3330,10 @@ $HorzAlign = Center
|
||||
settings.Tools.GoogleTranslateLastTargetLanguage = subNode.InnerText;
|
||||
}
|
||||
|
||||
subNode = node.SelectSingleNode("TranslateAutoSplit");
|
||||
subNode = node.SelectSingleNode("TranslateAllowSplit");
|
||||
if (subNode != null)
|
||||
{
|
||||
settings.Tools.TranslateAutoSplit = Convert.ToBoolean(subNode.InnerText);
|
||||
settings.Tools.TranslateAllowSplit = Convert.ToBoolean(subNode.InnerText);
|
||||
}
|
||||
|
||||
subNode = node.SelectSingleNode("ListViewSyntaxColorDurationSmall");
|
||||
@ -7188,10 +7180,9 @@ $HorzAlign = Center
|
||||
textWriter.WriteElementString("GoogleApiV2Key", settings.Tools.GoogleApiV2Key);
|
||||
textWriter.WriteElementString("GoogleApiV2KeyInfoShow", settings.Tools.GoogleApiV2KeyInfoShow.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteElementString("GoogleTranslateNoKeyWarningShow", settings.Tools.GoogleTranslateNoKeyWarningShow.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteElementString("UseGooleApiPaidService", settings.Tools.UseGooleApiPaidService.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteElementString("GoogleApiV1ChunkSize", settings.Tools.GoogleApiV1ChunkSize.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteElementString("GoogleTranslateLastTargetLanguage", settings.Tools.GoogleTranslateLastTargetLanguage);
|
||||
textWriter.WriteElementString("TranslateAutoSplit", settings.Tools.TranslateAutoSplit.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteElementString("TranslateAllowSplit", settings.Tools.TranslateAllowSplit.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteElementString("ListViewSyntaxColorDurationSmall", settings.Tools.ListViewSyntaxColorDurationSmall.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteElementString("ListViewSyntaxColorDurationBig", settings.Tools.ListViewSyntaxColorDurationBig.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteElementString("ListViewSyntaxColorLongLines", settings.Tools.ListViewSyntaxColorLongLines.ToString(CultureInfo.InvariantCulture));
|
||||
|
@ -6,6 +6,11 @@ namespace Nikse.SubtitleEdit.Core.Translate
|
||||
{
|
||||
public class Formatting
|
||||
{
|
||||
private static readonly List<string> LanguagesAllowingLineMerging = new List<string>
|
||||
{
|
||||
"en", "da", "nl", "de", "sv", "nb", "fr", "it", "tr", "es", "pt", "sr", "ru", "lv", "lt", "et", "ro", "pl", "ar", "he", "no"
|
||||
};
|
||||
|
||||
private bool Italic { get; set; }
|
||||
private string Font { get; set; }
|
||||
private bool ItalicTwoLines { get; set; }
|
||||
@ -14,8 +19,20 @@ namespace Nikse.SubtitleEdit.Core.Translate
|
||||
private bool SquareBrackets { get; set; }
|
||||
private bool SquareBracketsUppercase { get; set; }
|
||||
|
||||
public string SetTagsAndReturnTrimmed(string input, string source)
|
||||
private int BreakNumberOfLines { get; set; }
|
||||
private bool BreakSplitAtLineEnding { get; set; }
|
||||
private bool BreakIsDialog { get; set; }
|
||||
|
||||
public bool SkipNext { get; set; }
|
||||
|
||||
|
||||
public string SetTagsAndReturnTrimmed(string input, string sourceLanguage, string inputNext)
|
||||
{
|
||||
if (string.IsNullOrEmpty(input))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var text = input.Trim();
|
||||
|
||||
// SSA/ASS tags
|
||||
@ -51,20 +68,6 @@ namespace Nikse.SubtitleEdit.Core.Translate
|
||||
text = text.Remove(text.Length - "</font>".Length);
|
||||
}
|
||||
|
||||
// Un-break line
|
||||
var allowedLanguages = new List<string> { "en", "da", "nl", "de", "sv", "nb", "fr", "it" };
|
||||
if (allowedLanguages.Contains(source))
|
||||
{
|
||||
var lines = HtmlUtil.RemoveHtmlTags(text).SplitToLines();
|
||||
if (lines.Count == 2 && !string.IsNullOrEmpty(lines[0]) && !string.IsNullOrEmpty(lines[1]) &&
|
||||
char.IsLetterOrDigit(lines[0][lines[0].Length - 1]) &&
|
||||
char.IsLower(lines[1][0]))
|
||||
{
|
||||
text = Utilities.UnbreakLine(text);
|
||||
AutoBreak = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Square brackets
|
||||
if (text.StartsWith("[", StringComparison.Ordinal) && text.EndsWith("]", StringComparison.Ordinal) &&
|
||||
Utilities.GetNumberOfLines(text) == 1 && Utilities.CountTagInText(text, "[") == 1 &&
|
||||
@ -82,12 +85,37 @@ namespace Nikse.SubtitleEdit.Core.Translate
|
||||
text = text.Replace("[", string.Empty).Replace("]", string.Empty);
|
||||
}
|
||||
|
||||
// Un-break line
|
||||
if (LanguagesAllowingLineMerging.Contains(sourceLanguage))
|
||||
{
|
||||
var lines = HtmlUtil.RemoveHtmlTags(text).SplitToLines();
|
||||
if (lines.Count == 2 && !string.IsNullOrEmpty(lines[0]) && !string.IsNullOrEmpty(lines[1]) &&
|
||||
char.IsLetterOrDigit(lines[0][lines[0].Length - 1]) &&
|
||||
char.IsLower(lines[1][0]))
|
||||
{
|
||||
text = Utilities.UnbreakLine(text);
|
||||
AutoBreak = true;
|
||||
}
|
||||
|
||||
if (Configuration.Settings.Tools.TranslateAllowSplit &&
|
||||
!string.IsNullOrEmpty(inputNext) && !string.IsNullOrEmpty(text) &&
|
||||
(char.IsLetterOrDigit(text[text.Length - 1]) || text[text.Length - 1] == ',' || sourceLanguage == "ar" && text[text.Length - 1] == '\u060C') &&
|
||||
char.IsLower(inputNext[0]) &&
|
||||
!text.Contains('-') && !inputNext.Contains('-') && !Italic && !SquareBrackets && string.IsNullOrEmpty(Font))
|
||||
{
|
||||
text = Utilities.UnbreakLine(text);
|
||||
text = text + " " + Utilities.UnbreakLine(inputNext);
|
||||
SkipNext = true;
|
||||
}
|
||||
}
|
||||
|
||||
return text.Trim();
|
||||
}
|
||||
|
||||
public string ReAddFormatting(string input)
|
||||
public string ReAddFormatting(string input, out string nextText)
|
||||
{
|
||||
var text = input.Trim();
|
||||
nextText = null;
|
||||
|
||||
// Auto-break line
|
||||
if (AutoBreak)
|
||||
@ -95,6 +123,31 @@ namespace Nikse.SubtitleEdit.Core.Translate
|
||||
text = Utilities.AutoBreakLine(text);
|
||||
}
|
||||
|
||||
if (SkipNext)
|
||||
{
|
||||
var lines = Utilities.AutoBreakLine(text).SplitToLines();
|
||||
if (lines.Count == 1)
|
||||
{
|
||||
nextText = string.Empty;
|
||||
}
|
||||
else if (lines.Count == 2)
|
||||
{
|
||||
text = Utilities.AutoBreakLine(lines[0]);
|
||||
nextText = Utilities.AutoBreakLine(lines[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
text = Utilities.AutoBreakLine(lines[0] + " " + lines[1]);
|
||||
var sb = new StringBuilder();
|
||||
for (int i = 2; i < lines.Count; i++)
|
||||
{
|
||||
sb.Append(lines[i]);
|
||||
sb.Append(" ");
|
||||
}
|
||||
nextText = Utilities.AutoBreakLine(sb.ToString().TrimEnd());
|
||||
}
|
||||
}
|
||||
|
||||
// Square brackets
|
||||
if (SquareBracketsUppercase)
|
||||
{
|
||||
@ -132,11 +185,6 @@ namespace Nikse.SubtitleEdit.Core.Translate
|
||||
return text;
|
||||
}
|
||||
|
||||
|
||||
private int BreakNumberOfLines { get; set; }
|
||||
private bool BreakSplitAtLineEnding { get; set; }
|
||||
private bool BreakIsDialog { get; set; }
|
||||
|
||||
public string UnBreak(string text, string source)
|
||||
{
|
||||
var lines = source.SplitToLines();
|
||||
|
@ -34,18 +34,37 @@ namespace Nikse.SubtitleEdit.Core.Translate
|
||||
{
|
||||
string result;
|
||||
var input = new StringBuilder();
|
||||
var formatList = new Formatting[paragraphs.Count];
|
||||
var formatList = new List<Formatting>();
|
||||
bool skipNext = false;
|
||||
for (var index = 0; index < paragraphs.Count; index++)
|
||||
{
|
||||
if (skipNext)
|
||||
{
|
||||
skipNext = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
var p = paragraphs[index];
|
||||
var f = new Formatting();
|
||||
formatList[index] = f;
|
||||
formatList.Add(f);
|
||||
if (input.Length > 0)
|
||||
{
|
||||
input.Append(" " + SplitChar + " ");
|
||||
}
|
||||
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text.Replace(SplitChar.ToString(), string.Empty), sourceLanguage), sourceLanguage);
|
||||
text = f.UnBreak(text, p.Text);
|
||||
|
||||
var nextText = string.Empty;
|
||||
if (index < paragraphs.Count - 1 && paragraphs[index + 1].StartTime.TotalMilliseconds - p.EndTime.TotalMilliseconds < 200)
|
||||
{
|
||||
nextText = paragraphs[index + 1].Text;
|
||||
}
|
||||
|
||||
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text.Replace(SplitChar.ToString(), string.Empty), sourceLanguage), sourceLanguage, nextText);
|
||||
skipNext = f.SkipNext;
|
||||
if (!skipNext)
|
||||
{
|
||||
text = f.UnBreak(text, p.Text);
|
||||
}
|
||||
|
||||
input.Append(text);
|
||||
}
|
||||
|
||||
@ -114,13 +133,22 @@ namespace Nikse.SubtitleEdit.Core.Translate
|
||||
s = s.Replace(Environment.NewLine + " ", Environment.NewLine);
|
||||
s = s.Replace(" " + Environment.NewLine, Environment.NewLine);
|
||||
s = s.Replace(" " + Environment.NewLine, Environment.NewLine).Trim();
|
||||
if (formatList.Length > index)
|
||||
string nextText = null;
|
||||
if (formatList.Count > index)
|
||||
{
|
||||
s = formatList[index].ReAddFormatting(s);
|
||||
s = formatList[index].ReBreak(s, targetLanguage);
|
||||
s = formatList[index].ReAddFormatting(s, out nextText);
|
||||
if (nextText == null)
|
||||
{
|
||||
s = formatList[index].ReBreak(s, targetLanguage);
|
||||
}
|
||||
}
|
||||
|
||||
resultList.Add(s);
|
||||
|
||||
if (nextText != null)
|
||||
{
|
||||
resultList.Add(nextText);
|
||||
}
|
||||
}
|
||||
|
||||
if (resultList.Count > paragraphs.Count)
|
||||
|
@ -153,18 +153,37 @@ namespace Nikse.SubtitleEdit.Core.Translate
|
||||
var baseUrl = "https://translation.googleapis.com/language/translate/v2";
|
||||
var format = "text";
|
||||
var input = new StringBuilder();
|
||||
var formattings = new Formatting[paragraphs.Count];
|
||||
var formatList = new List<Formatting>();
|
||||
bool skipNext = false;
|
||||
for (var index = 0; index < paragraphs.Count; index++)
|
||||
{
|
||||
if (skipNext)
|
||||
{
|
||||
skipNext = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
var p = paragraphs[index];
|
||||
var f = new Formatting();
|
||||
formattings[index] = f;
|
||||
formatList.Add(f);
|
||||
if (input.Length > 0)
|
||||
{
|
||||
input.Append("&");
|
||||
}
|
||||
|
||||
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text, sourceLanguage), sourceLanguage);
|
||||
var nextText = string.Empty;
|
||||
if (index < paragraphs.Count - 1 && paragraphs[index + 1].StartTime.TotalMilliseconds - p.EndTime.TotalMilliseconds < 200)
|
||||
{
|
||||
nextText = paragraphs[index + 1].Text;
|
||||
}
|
||||
|
||||
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text, sourceLanguage), sourceLanguage, nextText);
|
||||
skipNext = f.SkipNext;
|
||||
if (!skipNext)
|
||||
{
|
||||
text = f.UnBreak(text, p.Text);
|
||||
}
|
||||
|
||||
input.Append("q=" + Utilities.UrlEncode(text));
|
||||
}
|
||||
|
||||
@ -198,14 +217,25 @@ namespace Nikse.SubtitleEdit.Core.Translate
|
||||
{
|
||||
if (v2[innerKey2] is string translatedText)
|
||||
{
|
||||
string nextText = null;
|
||||
translatedText = Regex.Unescape(translatedText);
|
||||
translatedText = string.Join(Environment.NewLine, translatedText.SplitToLines());
|
||||
translatedText = TranslationHelper.PostTranslate(translatedText, targetLanguage);
|
||||
if (resultList.Count < formattings.Length)
|
||||
if (resultList.Count < formatList.Count)
|
||||
{
|
||||
translatedText = formattings[resultList.Count].ReAddFormatting(translatedText);
|
||||
translatedText = formatList[resultList.Count].ReAddFormatting(translatedText, out nextText);
|
||||
if (nextText == null)
|
||||
{
|
||||
translatedText = formatList[resultList.Count].ReBreak(translatedText, targetLanguage);
|
||||
}
|
||||
}
|
||||
|
||||
resultList.Add(translatedText);
|
||||
|
||||
if (nextText != null)
|
||||
{
|
||||
resultList.Add(nextText);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -106,9 +106,16 @@ namespace Nikse.SubtitleEdit.Core.Translate
|
||||
var jsonBuilder = new StringBuilder();
|
||||
jsonBuilder.Append("[");
|
||||
bool isFirst = true;
|
||||
var formatList = new Formatting[paragraphs.Count];
|
||||
bool skipNext = false;
|
||||
var formatList = new List<Formatting>();
|
||||
for (var index = 0; index < paragraphs.Count; index++)
|
||||
{
|
||||
if (skipNext)
|
||||
{
|
||||
skipNext = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
var p = paragraphs[index];
|
||||
if (!isFirst)
|
||||
{
|
||||
@ -119,10 +126,20 @@ namespace Nikse.SubtitleEdit.Core.Translate
|
||||
isFirst = false;
|
||||
}
|
||||
|
||||
var nextText = string.Empty;
|
||||
if (index < paragraphs.Count - 1 && paragraphs[index + 1].StartTime.TotalMilliseconds - p.EndTime.TotalMilliseconds < 200)
|
||||
{
|
||||
nextText = paragraphs[index + 1].Text;
|
||||
}
|
||||
|
||||
var f = new Formatting();
|
||||
formatList[index] = f;
|
||||
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text, sourceLanguage), sourceLanguage);
|
||||
text = f.UnBreak(text, p.Text);
|
||||
formatList.Add(f);
|
||||
var text = f.SetTagsAndReturnTrimmed(TranslationHelper.PreTranslate(p.Text, sourceLanguage), sourceLanguage, nextText);
|
||||
skipNext = f.SkipNext;
|
||||
if (!skipNext)
|
||||
{
|
||||
text = f.UnBreak(text, p.Text);
|
||||
}
|
||||
|
||||
jsonBuilder.Append("{ \"Text\":\"" + Json.EncodeJsonText(text) + "\"}");
|
||||
}
|
||||
@ -153,15 +170,25 @@ namespace Nikse.SubtitleEdit.Core.Translate
|
||||
var textDics = (Dictionary<string, object>)o;
|
||||
var res = (string)textDics["text"];
|
||||
|
||||
if (formatList.Length > results.Count)
|
||||
string nextText = null;
|
||||
if (formatList.Count > results.Count)
|
||||
{
|
||||
res = formatList[results.Count].ReAddFormatting(res);
|
||||
res = formatList[results.Count].ReBreak(res, targetLanguage);
|
||||
res = formatList[results.Count].ReAddFormatting(res, out nextText);
|
||||
|
||||
if (nextText == null)
|
||||
{
|
||||
res = formatList[results.Count].ReBreak(res, targetLanguage);
|
||||
}
|
||||
}
|
||||
|
||||
res = TranslationHelper.PostTranslate(res, targetLanguage);
|
||||
|
||||
results.Add(res);
|
||||
|
||||
if (nextText != null)
|
||||
{
|
||||
results.Add(nextText);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -368,57 +368,6 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
}
|
||||
}
|
||||
|
||||
private string SetFormattingTypeAndSplitting(int i, string text, bool skipSplit)
|
||||
{
|
||||
text = text.Trim();
|
||||
if (text.StartsWith("<i>", StringComparison.Ordinal) && text.EndsWith("</i>", StringComparison.Ordinal) && text.Contains("</i>" + Environment.NewLine + "<i>") && Utilities.GetNumberOfLines(text) == 2 && Utilities.CountTagInText(text, "<i>") == 2)
|
||||
{
|
||||
_formattingTypes[i] = FormattingType.ItalicTwoLines;
|
||||
text = HtmlUtil.RemoveOpenCloseTags(text, HtmlUtil.TagItalic);
|
||||
}
|
||||
else if (text.StartsWith("<i>", StringComparison.Ordinal) && text.EndsWith("</i>", StringComparison.Ordinal) && Utilities.CountTagInText(text, "<i>") == 1)
|
||||
{
|
||||
_formattingTypes[i] = FormattingType.Italic;
|
||||
text = text.Substring(3, text.Length - 7);
|
||||
}
|
||||
else
|
||||
{
|
||||
_formattingTypes[i] = FormattingType.None;
|
||||
}
|
||||
|
||||
if (skipSplit)
|
||||
{
|
||||
return text;
|
||||
}
|
||||
|
||||
var lines = text.SplitToLines();
|
||||
if (Configuration.Settings.Tools.TranslateAutoSplit && lines.Count == 2 && !string.IsNullOrEmpty(lines[0]) && (Utilities.AllLettersAndNumbers + ",").Contains(lines[0].Substring(lines[0].Length - 1)))
|
||||
{
|
||||
_autoSplit[i] = true;
|
||||
text = Utilities.RemoveLineBreaks(text);
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
private void FillTranslatedText(string translatedText, int start, int end)
|
||||
{
|
||||
int index = start;
|
||||
foreach (string s in SplitToLines(translatedText))
|
||||
{
|
||||
if (index < TranslatedSubtitle.Paragraphs.Count)
|
||||
{
|
||||
var cleanText = CleanText(s, index);
|
||||
TranslatedSubtitle.Paragraphs[index].Text = cleanText;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
subtitleListViewTo.BeginUpdate();
|
||||
subtitleListViewTo.Fill(TranslatedSubtitle);
|
||||
subtitleListViewTo.SelectIndexAndEnsureVisible(end);
|
||||
subtitleListViewTo.EndUpdate();
|
||||
}
|
||||
|
||||
private string CleanText(string s, int index)
|
||||
{
|
||||
string cleanText = s.Replace("</p>", string.Empty).Trim();
|
||||
@ -485,19 +434,6 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
return cleanText;
|
||||
}
|
||||
|
||||
private List<string> SplitToLines(string translatedText)
|
||||
{
|
||||
if (!_googleTranslate)
|
||||
{
|
||||
translatedText = translatedText.Replace("+- +", "+-+");
|
||||
translatedText = translatedText.Replace("+ -+", "+-+");
|
||||
translatedText = translatedText.Replace("+ - +", "+-+");
|
||||
translatedText = translatedText.Replace("+ +", "+-+");
|
||||
translatedText = translatedText.Replace("+-+", "\0");
|
||||
}
|
||||
return translatedText.Split('\0').ToList();
|
||||
}
|
||||
|
||||
public void FillComboWithLanguages(ComboBox comboBox)
|
||||
{
|
||||
if (!_googleTranslate)
|
||||
|
Loading…
Reference in New Issue
Block a user