Refactor & More logic

This commit is contained in:
Martijn van Berkel (Flitskikker) 2020-04-10 00:58:25 +02:00
parent 0b6601aa09
commit 608116b595
3 changed files with 241 additions and 158 deletions

View File

@ -11,9 +11,6 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
public class FixContinuationStyle : IFixCommonError
{
private ContinuationUtilities.ContinuationProfile continuationProfile = null;
private List<string> prefixes = null;
private List<string> suffixes = null;
private List<string> names = null;
public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
@ -61,15 +58,15 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
var isChecked = true;
// Check if we should fix this paragraph
if (((!IsEndOfSentence(text) || HasSuffix(text)) && !text.EndsWith("--") && !text.EndsWith(":")))
if (ShouldFixParagraph(text))
{
// If ends with nothing...
if (!IsEndOfSentence(text))
if (!ContinuationUtilities.IsEndOfSentence(text))
{
// ...ignore inserts
if (Configuration.Settings.General.FixContinuationStyleUncheckInsertsAllCaps)
{
if (IsAllCaps(text))
if (ContinuationUtilities.IsAllCaps(text))
{
isChecked = false;
}
@ -78,7 +75,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
// ...and italic lyrics
if (Configuration.Settings.General.FixContinuationStyleUncheckInsertsItalic)
{
if (IsItalic(oldText) && !IsNewSentence(text) && inItalicSentence == false)
if (ContinuationUtilities.IsItalic(oldText) && !ContinuationUtilities.IsNewSentence(text) && inItalicSentence == false)
{
isChecked = false;
}
@ -87,16 +84,17 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
// ...and smallcaps inserts or non-italic lyrics
if (Configuration.Settings.General.FixContinuationStyleUncheckInsertsLowercase)
{
if (!IsNewSentence(text) && !inSentence)
if (!ContinuationUtilities.IsNewSentence(text) && !inSentence)
{
isChecked = false;
}
}
}
// Remove any previous suffixes from first paragraph
var textWithoutSuffix = text;
foreach (string suffix in this.suffixes)
foreach (string suffix in ContinuationUtilities.Suffixes.Union(new List<string> { "," }))
{
if (textWithoutSuffix.EndsWith(suffix)) textWithoutSuffix = textWithoutSuffix.Substring(0, textWithoutSuffix.Length - suffix.Length);
}
@ -104,7 +102,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
// Remove any previous prefixes from second paragraph
var textNextWithoutPrefix = textNext;
foreach (string prefix in this.prefixes)
foreach (string prefix in ContinuationUtilities.Prefixes)
{
if (textNextWithoutPrefix.StartsWith(prefix)) textNextWithoutPrefix = textNextWithoutPrefix.Substring(prefix.Length);
}
@ -120,11 +118,12 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
// Detect gap
bool gap = pNext.StartTime.TotalMilliseconds - p.EndTime.TotalMilliseconds > minGapMs;
// If ends with dots (possible interruptions), check if next sentence is new sentence, otherwise don't check by default
if (text.EndsWith("..") || text.EndsWith("…"))
{
if (!HasPrefix(textNext) && IsNewSentence(textNext))
if (!HasPrefix(textNext) && ContinuationUtilities.IsNewSentence(textNext))
{
isChecked = false;
@ -139,6 +138,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
}
}
// First paragraph...
// If first paragraphs ends with a suffix,
@ -146,7 +146,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
// and next sentence starts with conjunction,
// try to re-add comma
var addComma = false;
if (!lastWord.EndsWith(",")
if (!lastWord.EndsWith(",")
&& HasSuffix(text)
&& (gap ? !gapReplaceComma : !replaceComma)
&& ContinuationUtilities.StartsWithConjunction(textNextWithoutPrefix, callbacks.Language))
@ -179,6 +179,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
callbacks.AddFixToListView(p, fixAction, oldText, newText, isChecked);
}
// Second paragraph...
// Make new first word
@ -210,11 +211,11 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
skipThisLine:
// Detect new sentence
if (IsNewSentence(text))
if (ContinuationUtilities.IsNewSentence(text))
{
inSentence = true;
if (IsItalic(oldText))
if (ContinuationUtilities.IsItalic(oldText))
{
inItalicSentence = true;
}
@ -225,11 +226,11 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
}
// Detect end of sentence
if (IsEndOfSentence(text))
if (ContinuationUtilities.IsEndOfSentence(text))
{
inSentence = false;
if (IsItalic(oldText))
if (ContinuationUtilities.IsItalic(oldText))
{
inItalicSentence = false;
}
@ -248,100 +249,21 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
}
}
private bool IsNewSentence(string input)
private bool ShouldFixParagraph(string input)
{
if (Char.IsLetter(input[0]) && Char.IsUpper(input[0]))
{
// First letter
return true;
}
else if (Char.IsLetter(input[0]) && !Char.IsUpper(input[0]) && Char.IsLetter(input[1]) && Char.IsUpper(input[1]))
{
// iPhone
return true;
}
else if (Char.IsPunctuation(input[0]) && Char.IsLetter(input[1]) && !Char.IsUpper(input[1]) && Char.IsWhiteSpace(input[2]) && Char.IsLetter(input[3]) && Char.IsUpper(input[3]))
{
// 's Avonds
return true;
}
else if ("¿¡".Contains(input[0]) && Char.IsLetter(input[1]) && Char.IsUpper(input[1]))
{
// Spanish
return true;
}
return false;
}
private bool IsEndOfSentence(string input)
{
return (input.EndsWith(".") && !input.EndsWith("..")) || input.EndsWith("?") || input.EndsWith("!") || input.EndsWith(";") /* Greek question mark */ || input.EndsWith("--");
return ContinuationUtilities.ShouldAddSuffix(input, this.continuationProfile, false);
}
private bool HasPrefix(string input)
{
foreach (string prefix in this.prefixes)
{
if (input.StartsWith(prefix))
{
return true;
}
}
return false;
return ContinuationUtilities.HasPrefix(input, this.continuationProfile);
}
private bool HasSuffix(string input)
{
foreach (string suffix in this.suffixes)
{
if (input.EndsWith(suffix))
{
return true;
}
}
return false;
return ContinuationUtilities.HasSuffix(input, this.continuationProfile);
}
private bool IsAllCaps(string input)
{
int totalCount = 0;
int allCapsCount = 0;
// Count all caps chars
for (int i = 0; i < input.Length; i++)
{
if (Char.IsLetter(input[i]))
{
totalCount++;
if (Char.IsUpper(input[i]))
{
allCapsCount++;
}
}
}
return (double)allCapsCount / (double)totalCount >= 0.80;
}
private bool IsItalic(string input)
{
input = ContinuationUtilities.ExtractParagraphOnly(input);
if (input.Length > 2)
{
if (input.StartsWith("<i>") && ((input.EndsWith("</i>") && !input.Substring(2).Contains("<i>")) || !input.Contains("</i>")))
{
return true;
}
}
return false;
}
private bool StartsWithName(string input, string language)
{
if (this.names == null)
@ -367,32 +289,6 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
private void SetContinuationProfile(ContinuationStyle continuationStyle)
{
this.continuationProfile = ContinuationUtilities.GetContinuationProfile(continuationStyle);
this.prefixes = ContinuationUtilities.Prefixes;
this.suffixes = ContinuationUtilities.Suffixes;
if (this.continuationProfile.Prefix.Length > 0)
{
this.prefixes.Add(this.continuationProfile.Prefix);
}
if (this.continuationProfile.Suffix.Length > 0)
{
this.suffixes.Add(this.continuationProfile.Suffix);
}
if (this.continuationProfile.UseDifferentStyleGap)
{
if (this.continuationProfile.GapPrefix.Length > 0)
{
this.prefixes.Add(this.continuationProfile.GapPrefix);
}
if (this.continuationProfile.GapSuffix.Length > 0)
{
this.suffixes.Add(this.continuationProfile.GapSuffix);
}
}
}
}
}

View File

@ -2818,10 +2818,15 @@ namespace Nikse.SubtitleEdit.Core
private static string doubleQuotes = "''";
public static List<string> Prefixes = new List<string>() { "...", "..", "-", "", "", "—", "…" };
public static List<string> Suffixes = new List<string>() { ",", "...", "..", "-", "", "", "—", "…" };
public static List<string> Suffixes = new List<string>() { "...", "..", "-", "", "", "—", "…" };
public static string SanitizeString(string input, bool removeDashes = true)
{
if (string.IsNullOrEmpty(input))
{
return input;
}
string checkString = input;
checkString = Regex.Replace(checkString, "<.*?>", String.Empty);
checkString = Regex.Replace(checkString, "\\(.*?\\)", String.Empty);
@ -2899,28 +2904,40 @@ namespace Nikse.SubtitleEdit.Core
return checkString;
}
public static string ReplaceFirstOccurrence(string Source, string Find, string Replace)
public static string ReplaceFirstOccurrence(string source, string find, string replace)
{
int place = Source.IndexOf(Find);
int place = source.IndexOf(find);
if (place == -1)
return Source;
return source;
string result = Source.Remove(place, Find.Length).Insert(place, Replace);
string result = source.Remove(place, find.Length).Insert(place, replace);
return result;
}
public static string ReplaceLastOccurrence(string Source, string Find, string Replace)
public static string ReplaceLastOccurrence(string source, string find, string replace)
{
int place = Source.LastIndexOf(Find);
int place = source.LastIndexOf(find);
if (place == -1)
return Source;
return source;
string result = Source.Remove(place, Find.Length).Insert(place, Replace);
string result = source.Remove(place, find.Length).Insert(place, replace);
return result;
}
public static bool ShouldAddSuffix(string input, ContinuationProfile profile, bool sanitize = true)
{
string text = sanitize ? ContinuationUtilities.SanitizeString(input) : input;
if (((!ContinuationUtilities.IsEndOfSentence(text) || text.EndsWith(",") || HasSuffix(text, profile)) && !text.EndsWith("--") && !text.EndsWith(":")))
{
return true;
}
return false;
}
public static string AddSuffixIfNeeded(string originalText, ContinuationProfile profile, bool gap)
{
// Get last word
@ -2990,7 +3007,7 @@ namespace Nikse.SubtitleEdit.Core
}
// Replace it
return ContinuationUtilities.ReplaceLastOccurrence(originalText, firstWord, newFirstWord);
return ContinuationUtilities.ReplaceFirstOccurrence(originalText, firstWord, newFirstWord);
}
public static string RemoveSuffixIfNeeded(string originalText, ContinuationProfile profile, bool gap)
@ -3062,7 +3079,159 @@ namespace Nikse.SubtitleEdit.Core
}
// Replace it
return ContinuationUtilities.ReplaceLastOccurrence(originalText, firstWord, newFirstWord);
return ContinuationUtilities.ReplaceFirstOccurrence(originalText, firstWord, newFirstWord);
}
public static string RemoveSuffix(string originalText, ContinuationProfile profile, bool addComma = false)
{
// Get last word
string text = ContinuationUtilities.SanitizeString(originalText);
string[] split = text.Split(Convert.ToChar(" "));
string lastWord = split.Last();
string newLastWord = lastWord;
foreach (string suffix in Suffixes)
{
if (newLastWord.EndsWith(suffix)) newLastWord = newLastWord.Substring(0, newLastWord.Length - suffix.Length);
}
newLastWord = newLastWord.Trim();
if (addComma) newLastWord = newLastWord + ",";
// Replace it
return ContinuationUtilities.ReplaceLastOccurrence(originalText, lastWord, newLastWord);
}
public static string RemovePrefix(string originalText, ContinuationProfile profile)
{
// Get first word of the next paragraph
string text = ContinuationUtilities.SanitizeString(originalText);
string[] split = text.Split(Convert.ToChar(" "));
string firstWord = split.First();
string newFirstWord = firstWord;
foreach (string prefix in Prefixes)
{
if (newFirstWord.StartsWith(prefix)) newFirstWord = newFirstWord.Substring(prefix.Length);
}
newFirstWord = newFirstWord.Trim();
// Replace it
return ContinuationUtilities.ReplaceFirstOccurrence(originalText, firstWord, newFirstWord);
}
public static bool IsNewSentence(string input)
{
if (Char.IsLetter(input[0]) && Char.IsUpper(input[0]))
{
// First letter
return true;
}
else if (Char.IsLetter(input[0]) && !Char.IsUpper(input[0]) && Char.IsLetter(input[1]) && Char.IsUpper(input[1]))
{
// iPhone
return true;
}
else if (Char.IsPunctuation(input[0]) && Char.IsLetter(input[1]) && !Char.IsUpper(input[1]) && Char.IsWhiteSpace(input[2]) && Char.IsLetter(input[3]) && Char.IsUpper(input[3]))
{
// 's Avonds
return true;
}
else if ("¿¡".Contains(input[0]) && Char.IsLetter(input[1]) && Char.IsUpper(input[1]))
{
// Spanish
return true;
}
return false;
}
public static bool IsEndOfSentence(string input)
{
return (input.EndsWith(".") && !input.EndsWith("..")) || input.EndsWith("?") || input.EndsWith("!") || input.EndsWith(";") /* Greek question mark */ || input.EndsWith("--");
}
public static bool IsAllCaps(string input)
{
int totalCount = 0;
int allCapsCount = 0;
// Count all caps chars
for (int i = 0; i < input.Length; i++)
{
if (Char.IsLetter(input[i]))
{
totalCount++;
if (Char.IsUpper(input[i]))
{
allCapsCount++;
}
}
}
return (double)allCapsCount / (double)totalCount >= 0.80;
}
public static bool IsItalic(string input)
{
input = ContinuationUtilities.ExtractParagraphOnly(input);
if (input.Length > 2)
{
if (input.StartsWith("<i>") && ((input.EndsWith("</i>") && !input.Substring(2).Contains("<i>")) || !input.Contains("</i>")))
{
return true;
}
}
return false;
}
public static bool HasPrefix(string input, ContinuationProfile profile)
{
if (profile.Prefix.Length > 0 && input.StartsWith(profile.Prefix))
{
return true;
}
if (profile.UseDifferentStyleGap && profile.GapPrefix.Length > 0 && input.StartsWith(profile.GapPrefix))
{
return true;
}
foreach (string prefix in Prefixes)
{
if (input.StartsWith(prefix))
{
return true;
}
}
return false;
}
public static bool HasSuffix(string input, ContinuationProfile profile)
{
if (profile.Suffix.Length > 0 && input.EndsWith(profile.Suffix))
{
return true;
}
if (profile.UseDifferentStyleGap && profile.GapSuffix.Length > 0 && input.EndsWith(profile.GapSuffix))
{
return true;
}
foreach (string suffix in Suffixes)
{
if (input.EndsWith(suffix))
{
return true;
}
}
return false;
}
public static bool StartsWithConjunction(string input, string language)
@ -3097,6 +3266,23 @@ namespace Nikse.SubtitleEdit.Core
return false;
}
public static Tuple<string, string> MergeHelper(string input, string nextInput, ContinuationProfile profile, string language)
{
var thisText = ContinuationUtilities.SanitizeString(input);
var nextText = ContinuationUtilities.SanitizeString(nextInput);
var nextTextWithDashPrefix = ContinuationUtilities.SanitizeString(nextInput, profile.GapPrefix != "-");
if ((ContinuationUtilities.HasSuffix(thisText, profile) && ContinuationUtilities.HasPrefix(nextTextWithDashPrefix, profile))
|| (ContinuationUtilities.HasSuffix(thisText, profile) && !ContinuationUtilities.IsNewSentence(nextText)))
{
var newText = ContinuationUtilities.RemoveSuffix(input, profile, StartsWithConjunction(nextText, language));
var newNextText = ContinuationUtilities.RemovePrefix(nextInput, profile);
return new Tuple<string, string>(newText, newNextText);
}
return new Tuple<string, string>(input, nextInput);
}
public static int GetMinimumGapMs()
{
return Math.Max(Configuration.Settings.General.MinimumMillisecondsBetweenLines + 5, 150);

View File

@ -9704,8 +9704,11 @@ namespace Nikse.SubtitleEdit.Forms
if (continuationStyle != ContinuationStyle.None)
{
var continuationProfile = ContinuationUtilities.GetContinuationProfile(continuationStyle);
currentParagraph.Text = ContinuationUtilities.AddSuffixIfNeeded(currentParagraph.Text, continuationProfile, false);
newParagraph.Text = ContinuationUtilities.AddPrefixIfNeeded(newParagraph.Text, continuationProfile, false);
if (ContinuationUtilities.ShouldAddSuffix(currentParagraph.Text, continuationProfile))
{
currentParagraph.Text = ContinuationUtilities.AddSuffixIfNeeded(currentParagraph.Text, continuationProfile, false);
newParagraph.Text = ContinuationUtilities.AddPrefixIfNeeded(newParagraph.Text, continuationProfile, false);
}
}
SetSplitTime(splitSeconds, currentParagraph, newParagraph, oldText);
@ -9863,8 +9866,11 @@ namespace Nikse.SubtitleEdit.Forms
if (continuationStyle != ContinuationStyle.None)
{
var continuationProfile = ContinuationUtilities.GetContinuationProfile(continuationStyle);
originalCurrent.Text = ContinuationUtilities.AddSuffixIfNeeded(originalCurrent.Text, continuationProfile, false);
originalNew.Text = ContinuationUtilities.AddPrefixIfNeeded(originalNew.Text, continuationProfile, false);
if (ContinuationUtilities.ShouldAddSuffix(originalCurrent.Text, continuationProfile))
{
originalCurrent.Text = ContinuationUtilities.AddSuffixIfNeeded(originalCurrent.Text, continuationProfile, false);
originalNew.Text = ContinuationUtilities.AddPrefixIfNeeded(originalNew.Text, continuationProfile, false);
}
}
}
@ -10154,23 +10160,20 @@ namespace Nikse.SubtitleEdit.Forms
next++;
}
var addText = _subtitle.Paragraphs[index].Text;
var continuationStyle = Configuration.Settings.General.ContinuationStyle;
if (continuationStyle != ContinuationStyle.None)
{
var continuationProfile = ContinuationUtilities.GetContinuationProfile(continuationStyle);
var gap = next < firstIndex + SubtitleListview1.SelectedIndices.Count ? _subtitle.Paragraphs[next].StartTime.TotalMilliseconds - _subtitle.Paragraphs[index].EndTime.TotalMilliseconds > ContinuationUtilities.GetMinimumGapMs() : false;
if (index != firstIndex)
{
addText = ContinuationUtilities.RemovePrefixIfNeeded(addText, continuationProfile, gap);
}
if (next < firstIndex + SubtitleListview1.SelectedIndices.Count)
{
addText = ContinuationUtilities.RemoveSuffixIfNeeded(addText, continuationProfile, gap);
var mergeResult = ContinuationUtilities.MergeHelper(_subtitle.Paragraphs[index].Text, _subtitle.Paragraphs[index + 1].Text, continuationProfile, LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle));
_subtitle.Paragraphs[index].Text = mergeResult.Item1;
_subtitle.Paragraphs[index + 1].Text = mergeResult.Item2;
}
}
var addText = _subtitle.Paragraphs[index].Text;
if (firstIndex != index)
{
addText = RemoveAssStartAlignmentTag(addText);
@ -10391,11 +10394,10 @@ namespace Nikse.SubtitleEdit.Forms
var continuationStyle = Configuration.Settings.General.ContinuationStyle;
if (continuationStyle != ContinuationStyle.None)
{
var continuationProfile = ContinuationUtilities.GetContinuationProfile(continuationStyle);
var gap = nextParagraph.StartTime.TotalMilliseconds - currentParagraph.EndTime.TotalMilliseconds > ContinuationUtilities.GetMinimumGapMs();
currentParagraph.Text = ContinuationUtilities.RemoveSuffixIfNeeded(currentParagraph.Text, continuationProfile, gap);
nextParagraph.Text = ContinuationUtilities.RemovePrefixIfNeeded(nextParagraph.Text, continuationProfile, gap);
var continuationProfile = ContinuationUtilities.GetContinuationProfile(continuationStyle);
var mergeResult = ContinuationUtilities.MergeHelper(currentParagraph.Text, nextParagraph.Text, continuationProfile, LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle));
currentParagraph.Text = mergeResult.Item1;
nextParagraph.Text = mergeResult.Item2;
}
if (_subtitleAlternate != null)
@ -10408,10 +10410,9 @@ namespace Nikse.SubtitleEdit.Forms
if (continuationStyle != ContinuationStyle.None)
{
var continuationProfile = ContinuationUtilities.GetContinuationProfile(continuationStyle);
var gap = originalNext.StartTime.TotalMilliseconds - original.EndTime.TotalMilliseconds > ContinuationUtilities.GetMinimumGapMs();
original.Text = ContinuationUtilities.RemoveSuffixIfNeeded(original.Text, continuationProfile, gap);
originalNext.Text = ContinuationUtilities.RemovePrefixIfNeeded(originalNext.Text, continuationProfile, gap);
var mergeResult = ContinuationUtilities.MergeHelper(original.Text, originalNext.Text, continuationProfile, LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitleAlternate));
original.Text = mergeResult.Item1;
originalNext.Text = mergeResult.Item2;
}
}