SubtitleEdit/libse/ContinuationUtilities.cs

1244 lines
46 KiB
C#
Raw Normal View History

2020-04-11 10:31:14 +02:00
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using Nikse.SubtitleEdit.Core.Enums;
namespace Nikse.SubtitleEdit.Core
{
public static class ContinuationUtilities
{
private static readonly string Dashes = "-‐–—";
private static readonly string Quotes = "'\"“”‘’«»‹›„“‚‘";
private static readonly string SingleQuotes = "'";
private static readonly string DoubleQuotes = "''";
private static readonly string MusicSymbols = "♪♫#*¶";
2020-04-11 10:31:14 +02:00
2020-04-11 12:08:03 +02:00
public static readonly List<string> Prefixes = new List<string> { "...", "..", "-", "", "", "—", "…" };
public static readonly List<string> DashPrefixes = new List<string> { "-", "", "", "—" };
2020-04-11 12:08:03 +02:00
public static readonly List<string> Suffixes = new List<string> { "...", "..", "-", "", "", "—", "…" };
2020-04-11 10:31:14 +02:00
2020-04-11 12:08:03 +02:00
public static string SanitizeString(string input, bool removeDashes)
2020-04-11 10:31:14 +02:00
{
2020-04-12 13:55:29 +02:00
// Return if empty string
2020-04-11 10:31:14 +02:00
if (string.IsNullOrEmpty(input))
{
return input;
}
string checkString = input;
2020-04-11 12:08:03 +02:00
checkString = Regex.Replace(checkString, "<.*?>", string.Empty);
checkString = Regex.Replace(checkString, "\\(.*?\\)", string.Empty, RegexOptions.Singleline);
checkString = Regex.Replace(checkString, "\\[.*?\\]", string.Empty, RegexOptions.Singleline);
2020-04-11 10:31:14 +02:00
checkString = Regex.Replace(checkString, "\\{.*?\\}", string.Empty);
checkString = checkString.Trim();
// Remove string elevation
if (checkString.EndsWith("\n_", StringComparison.Ordinal) || checkString.EndsWith("\n.", StringComparison.Ordinal)
|| checkString.EndsWith("\n _", StringComparison.Ordinal) || checkString.EndsWith("\n .", StringComparison.Ordinal)
|| checkString.EndsWith("\n _", StringComparison.Ordinal) || checkString.EndsWith("\n .", StringComparison.Ordinal) /* Alt+255 */)
2020-04-11 10:31:14 +02:00
{
checkString = checkString.Substring(0, checkString.Length - 1).Trim();
}
2020-04-12 13:55:29 +02:00
// Return if empty string by now
if (string.IsNullOrEmpty(checkString))
{
return "";
}
// Remove >> from the beginning
while (checkString.StartsWith(">"))
{
checkString = checkString.Substring(1).Trim();
}
// Remove SPEAKER: from the beginning
if (checkString.Contains(":"))
{
string[] split = checkString.Split(':');
if (string.IsNullOrEmpty(split[0].Trim()))
{
checkString = checkString.Substring(1).Trim();
}
else if (IsAllCaps(split[0]))
{
2020-04-12 13:55:29 +02:00
var newCheckString = string.Join(":", split.Skip(1)).Trim();
if (!string.IsNullOrEmpty(newCheckString))
{
checkString = newCheckString;
}
}
}
2020-04-11 10:31:14 +02:00
// Remove dashes from the beginning
if (removeDashes)
{
if (checkString.Length > 1 && Dashes.Contains(checkString[0]) && (checkString[1] != '\r' && checkString[1] != '\n'))
2020-04-11 10:31:14 +02:00
{
checkString = checkString.Substring(1).Trim();
}
}
// Remove single-char quotes from the beginning
if (checkString.Length > 0 && Quotes.Contains(checkString[0]))
2020-04-11 10:31:14 +02:00
{
if (checkString.Length > 3 && SingleQuotes.Contains(checkString[0]) && char.IsLetter(checkString[1]) && !char.IsUpper(checkString[1]) && char.IsWhiteSpace(checkString[2]) && char.IsLetter(checkString[3]))
2020-04-11 10:31:14 +02:00
{
// 's Avonds -- don't remove
}
else if (checkString.Length > 2 && SingleQuotes.Contains(checkString[0]) && checkString.Substring(1).StartsWith("cause"))
{
// 'cause -- don't remove
}
2020-04-11 10:31:14 +02:00
else
{
checkString = checkString.Substring(1).Trim();
}
}
// Remove double-char quotes from the beginning
2020-04-12 13:55:29 +02:00
if (checkString.Length > 1 && DoubleQuotes.Contains(checkString.Substring(0, 2)))
2020-04-11 10:31:14 +02:00
{
checkString = checkString.Substring(2).Trim();
}
// Remove music symbols from the beginning
if (checkString.Length > 0 && MusicSymbols.Contains(checkString[0]))
{
checkString = checkString.Substring(1).Trim();
}
2020-04-11 10:31:14 +02:00
// Remove single-char quotes from the ending
if (checkString.Length > 0 && Quotes.Contains(checkString[checkString.Length - 1]))
2020-04-11 10:31:14 +02:00
{
if (checkString[checkString.Length - 1] == '\'' && checkString.EndsWith("in'") && char.IsLetter(checkString[checkString.Length - 4]))
{
// nothin' -- Don't remove
}
2020-04-11 11:06:36 +02:00
else if (checkString[checkString.Length - 1] == '\'' && (checkString.EndsWith("déj'") || checkString.EndsWith("ap'") || checkString.EndsWith("app'")))
2020-04-11 10:31:14 +02:00
{
// déj' -- Don't remove
}
else
{
checkString = checkString.Substring(0, checkString.Length - 1).Trim();
}
}
// Remove double-char quotes from the ending
2020-04-12 13:55:29 +02:00
if (checkString.Length > 1 && DoubleQuotes.Contains(checkString.Substring(checkString.Length - 2, 2)))
2020-04-11 10:31:14 +02:00
{
checkString = checkString.Substring(0, checkString.Length - 2).Trim();
}
// Remove music symbols from the ending
if (checkString.Length > 0 && MusicSymbols.Contains(checkString[checkString.Length - 1]))
{
checkString = checkString.Substring(0, checkString.Length - 1).Trim();
}
2020-04-11 10:31:14 +02:00
return checkString;
}
2020-04-11 12:08:03 +02:00
public static string SanitizeString(string input)
{
return SanitizeString(input, true);
}
public static string ExtractParagraphOnly(string input, bool removeDashes)
2020-04-11 10:31:14 +02:00
{
string checkString = input;
checkString = Regex.Replace(checkString, "\\{.*?\\}", string.Empty);
checkString = checkString.Trim();
// Remove string elevation
if (checkString.EndsWith("\n_", StringComparison.Ordinal) || checkString.EndsWith("\n.", StringComparison.Ordinal)
|| checkString.EndsWith("\n _", StringComparison.Ordinal) || checkString.EndsWith("\n .", StringComparison.Ordinal)
|| checkString.EndsWith("\n _", StringComparison.Ordinal) || checkString.EndsWith("\n .", StringComparison.Ordinal) /* Alt+255 */)
2020-04-11 10:31:14 +02:00
{
checkString = checkString.Substring(0, checkString.Length - 1).Trim();
}
return checkString;
}
2020-04-11 12:08:03 +02:00
public static string ExtractParagraphOnly(string input)
{
return ExtractParagraphOnly(input, true);
}
2020-04-11 10:31:14 +02:00
public static string ReplaceFirstOccurrence(string source, string find, string replace)
{
int place = source.IndexOf(find, StringComparison.Ordinal);
if (place == -1)
{
return source;
}
string result = source.Remove(place, find.Length).Insert(place, replace);
return result;
}
public static string ReplaceLastOccurrence(string source, string find, string replace)
{
int place = source.LastIndexOf(find, StringComparison.Ordinal);
if (place == -1)
{
return source;
}
string result = source.Remove(place, find.Length).Insert(place, replace);
return result;
}
public static string GetFirstWord(string input)
{
2020-04-12 13:55:29 +02:00
// Return if empty string
if (string.IsNullOrEmpty(input))
{
return "";
}
string[] split = input.Split(' ');
string firstWord = split.First();
// For "... this is a test" we would only have the prefix in the first split item
foreach (string prefix in Prefixes)
{
2020-04-12 13:55:29 +02:00
if (firstWord == prefix && split.Length > 1)
{
firstWord = split[0] + " " + split[1];
break;
}
}
return firstWord;
}
public static string GetLastWord(string input)
{
2020-04-12 13:55:29 +02:00
// Return if empty string
if (string.IsNullOrEmpty(input))
{
return "";
}
string[] split = input.Split(' ');
string lastWord = split.Last();
// For "This is a test ..." we would only have the suffix in the last split item
foreach (string suffix in Suffixes)
{
2020-04-12 13:55:29 +02:00
if (lastWord == suffix && split.Length > 1)
{
lastWord = split[split.Length - 2] + " " + split[split.Length - 1];
break;
}
}
return lastWord;
}
2020-04-11 12:08:03 +02:00
public static bool ShouldAddSuffix(string input, ContinuationProfile profile, bool sanitize)
2020-04-11 10:31:14 +02:00
{
string text = sanitize ? SanitizeString(input) : input;
2020-04-12 13:55:29 +02:00
// Return if empty string
if (string.IsNullOrEmpty(text))
{
return false;
}
if ((EndsWithNothing(text, profile) || text.EndsWith(",") || HasSuffix(text, profile)) && !text.EndsWith("--") && !text.EndsWith(":") && !text.EndsWith(";"))
2020-04-11 10:31:14 +02:00
{
return true;
}
return false;
}
2020-04-11 12:08:03 +02:00
public static bool ShouldAddSuffix(string input, ContinuationProfile profile)
{
return ShouldAddSuffix(input, profile, true);
}
public static string AddSuffixIfNeeded(string originalText, ContinuationProfile profile, bool gap, bool addComma)
2020-04-11 10:31:14 +02:00
{
string text = SanitizeString(originalText);
2020-04-12 13:55:29 +02:00
// Return if empty string
if (string.IsNullOrEmpty(text))
{
return originalText;
}
// Return if only suffix/prefix
if (IsOnlySuffix(text, profile) || IsOnlyPrefix(text, profile))
{
return originalText;
}
// Get last word
string lastWord = GetLastWord(text);
2020-04-11 10:31:14 +02:00
string newLastWord = lastWord;
if (gap && profile.UseDifferentStyleGap)
{
// Make new last word
string gapAddEnd = (profile.GapSuffixAddSpace ? " " : "") + profile.GapSuffix;
if (gapAddEnd.Length == 0 || !newLastWord.EndsWith(gapAddEnd)) {
newLastWord = newLastWord.TrimEnd(',') + ((lastWord.EndsWith(",") || addComma) && !profile.GapSuffixReplaceComma ? "," : "") + gapAddEnd;
}
2020-04-11 10:31:14 +02:00
}
else
{
// Make new last word
string addEnd = (profile.SuffixAddSpace ? " " : "") + profile.Suffix;
if (addEnd.Length == 0 || !newLastWord.EndsWith(addEnd)) {
newLastWord = newLastWord.TrimEnd(',') + ((lastWord.EndsWith(",") || addComma) && !profile.SuffixReplaceComma ? "," : "") + addEnd;
}
}
// Check if it's not surrounded by HTML tags, then we'll place it outside the tags (remove comma if present)
// Only if it's not a tag across the whole subtitle.
var wordIndex = originalText.LastIndexOf(lastWord.TrimEnd(','), StringComparison.Ordinal);
if (wordIndex >= 0 && wordIndex < originalText.Length - 3)
{
int currentIndex = wordIndex + lastWord.TrimEnd(',').Length;
if (((currentIndex < originalText.Length && originalText[currentIndex] == '<')
|| (currentIndex + 1 < originalText.Length && originalText[currentIndex + 1] == ',' && originalText[currentIndex] == '<'))
&& !IsFullLineTag(originalText, currentIndex))
{
if (currentIndex < originalText.Length && originalText[currentIndex] == ',')
{
originalText = originalText.Remove(currentIndex, 1);
}
while (currentIndex + 1 < originalText.Length && !(originalText[currentIndex] == '>' && originalText[currentIndex + 1] != '<'))
{
currentIndex++;
}
var suffix = newLastWord.Replace(lastWord.TrimEnd(','), "");
if (currentIndex + 1 < originalText.Length && originalText[currentIndex + 1] == ',')
{
originalText = originalText.Remove(currentIndex + 1, 1);
}
return originalText.Insert(currentIndex + 1, suffix);
}
2020-04-11 10:31:14 +02:00
}
// Replace it
return ReplaceLastOccurrence(originalText, lastWord, newLastWord);
}
public static string AddSuffixIfNeeded(string originalText, ContinuationProfile profile, bool gap)
2020-04-11 10:31:14 +02:00
{
return AddSuffixIfNeeded(originalText, profile, gap, false);
}
2020-04-12 15:24:09 +02:00
public static string AddPrefixIfNeeded(string originalText, ContinuationProfile profile, bool shouldRemoveDashesDuringSanitization, bool gap)
{
// Decide if we need to remove dashes
string textWithDash = SanitizeString(originalText, false);
string textWithoutDash = SanitizeString(originalText, true);
2020-04-12 15:24:09 +02:00
bool removeDashesDuringSanitization = shouldRemoveDashesDuringSanitization;
2020-04-12 13:55:29 +02:00
// Return if empty string
if (string.IsNullOrEmpty(textWithDash) && string.IsNullOrEmpty(textWithoutDash))
{
return originalText;
}
// If we're using a profile with dashes, count those as dashes instead of dialog dashes.
if (removeDashesDuringSanitization)
{
foreach (string prefix in DashPrefixes)
{
if ((!gap && profile.Prefix == prefix)
|| (gap && profile.UseDifferentStyleGap && profile.GapPrefix == prefix)
|| (gap && !profile.UseDifferentStyleGap && profile.Prefix == prefix))
{
removeDashesDuringSanitization = false;
break;
}
}
}
// If there is only a dash on the first line, count it as dash instead of dialog dash.
2020-04-12 15:30:37 +02:00
if (removeDashesDuringSanitization && textWithDash != null)
{
var split = textWithDash.SplitToLines();
int lastLineWithDash = -1;
for (var i = 0; i < split.Count; i++)
{
foreach (string prefix in DashPrefixes)
{
if (split[i].StartsWith(prefix))
{
lastLineWithDash = i;
break;
}
}
}
if (lastLineWithDash == 0)
{
removeDashesDuringSanitization = false;
}
}
2020-04-12 13:55:29 +02:00
string text = removeDashesDuringSanitization ? textWithoutDash : textWithDash;
// Return if empty string
if (string.IsNullOrEmpty(text))
{
return originalText;
}
// Return if only suffix/prefix
if (IsOnlySuffix(text, profile) || IsOnlyPrefix(text, profile))
{
return originalText;
}
// Get first word of the paragraph
string firstWord = GetFirstWord(text);
2020-04-11 10:31:14 +02:00
string newFirstWord = firstWord;
if (gap && profile.UseDifferentStyleGap)
{
// Make new first word
string gapAddStart = profile.GapPrefix + (profile.GapPrefixAddSpace ? " " : "");
if (gapAddStart.Length == 0 || !newFirstWord.StartsWith(gapAddStart))
{
newFirstWord = gapAddStart + newFirstWord;
}
2020-04-11 10:31:14 +02:00
}
else
{
// Make new first word
string addStart = profile.Prefix + (profile.PrefixAddSpace ? " " : "");
if (addStart.Length == 0 || !newFirstWord.StartsWith(addStart))
{
newFirstWord = addStart + newFirstWord;
}
2020-04-11 10:31:14 +02:00
}
// Check if it's not surrounded by HTML tags, then we'll place it outside the tags
// Only if it's not a tag across the whole subtitle.
var wordIndex = originalText.IndexOf(firstWord, StringComparison.Ordinal);
if (wordIndex >= 3)
{
int currentIndex = wordIndex - 1;
if (currentIndex >= 0 && originalText[currentIndex] == '>' && !IsFullLineTag(originalText, currentIndex))
{
while (currentIndex - 1 >= 0 && !(originalText[currentIndex - 1] != '>' && originalText[currentIndex] == '<'))
{
currentIndex--;
}
var prefix = newFirstWord.Replace(firstWord, "");
return originalText.Insert(currentIndex, prefix);
}
}
2020-04-11 10:31:14 +02:00
// Replace it
return ReplaceFirstOccurrence(originalText, firstWord, newFirstWord);
}
public static string AddPrefixIfNeeded(string originalText, ContinuationProfile profile, bool gap)
{
return AddPrefixIfNeeded(originalText, profile, true, gap);
}
public static string RemoveSuffix(string originalText, ContinuationProfile profile, List<string> additionalSuffixes, bool addComma)
2020-04-11 10:31:14 +02:00
{
string text = SanitizeString(originalText);
2020-04-12 13:55:29 +02:00
// Return if empty string
if (string.IsNullOrEmpty(text))
{
return originalText;
}
// Get last word
string lastWord = GetLastWord(text);
2020-04-11 10:31:14 +02:00
string newLastWord = lastWord;
foreach (string suffix in Suffixes.Union(additionalSuffixes))
2020-04-11 10:31:14 +02:00
{
if (newLastWord.EndsWith(suffix) && !newLastWord.EndsWith(Environment.NewLine + suffix))
2020-04-11 11:06:36 +02:00
{
newLastWord = newLastWord.Substring(0, newLastWord.Length - suffix.Length);
}
2020-04-11 10:31:14 +02:00
}
newLastWord = newLastWord.Trim();
if (addComma && !newLastWord.EndsWith(","))
2020-04-11 11:06:36 +02:00
{
newLastWord = newLastWord + ",";
}
2020-04-11 10:31:14 +02:00
2020-04-12 13:55:29 +02:00
string result;
// If we can find it...
if (originalText.LastIndexOf(lastWord, StringComparison.Ordinal) >= 0)
{
// Replace it
2020-04-12 13:55:29 +02:00
result = ReplaceLastOccurrence(originalText, lastWord, newLastWord);
}
else
{
// Just remove whatever suffix we need to remove
var suffix = lastWord.Replace(newLastWord, "");
2020-04-12 13:55:29 +02:00
result = ReplaceLastOccurrence(originalText, suffix, "");
}
2020-04-12 13:55:29 +02:00
// Return original if empty string
if (string.IsNullOrEmpty(result))
{
return originalText;
}
return result;
2020-04-11 10:31:14 +02:00
}
2020-04-11 12:08:03 +02:00
public static string RemoveSuffix(string originalText, ContinuationProfile profile)
{
return RemoveSuffix(originalText, profile, new List<string>(), false);
2020-04-11 12:08:03 +02:00
}
public static string RemoveSuffix(string originalText, ContinuationProfile profile, bool addComma)
2020-04-11 10:31:14 +02:00
{
return RemoveSuffix(originalText, profile, new List<string>(), addComma);
}
2020-04-12 15:24:09 +02:00
public static string RemovePrefix(string originalText, ContinuationProfile profile, bool shouldRemoveDashesDuringSanitization, bool gap)
{
// Decide if we need to remove dashes
string textWithDash = SanitizeString(originalText, false);
string textWithoutDash = SanitizeString(originalText, true);
string leadingDialogDash = null;
2020-04-12 15:24:09 +02:00
bool removeDashesDuringSanitization = shouldRemoveDashesDuringSanitization;
2020-04-12 13:55:29 +02:00
// Return if empty string
if (string.IsNullOrEmpty(textWithDash) && string.IsNullOrEmpty(textWithoutDash))
{
return originalText;
}
// If we're using a profile with dashes, count those as dashes instead of dialog dashes.
if (removeDashesDuringSanitization)
{
foreach (string prefix in DashPrefixes)
{
if ((!gap && profile.Prefix == prefix)
|| (gap && profile.UseDifferentStyleGap && profile.GapPrefix == prefix)
|| (gap && !profile.UseDifferentStyleGap && profile.Prefix == prefix))
{
removeDashesDuringSanitization = false;
leadingDialogDash = prefix;
break;
}
}
}
// If there is only a dash on the first line, count it as dash instead of dialog dash.
2020-04-12 15:24:09 +02:00
if (removeDashesDuringSanitization && textWithDash != null)
{
var split = textWithDash.SplitToLines();
int lastLineWithDash = -1;
for (var i = 0; i < split.Count; i++)
{
foreach (string prefix in DashPrefixes)
{
if (split[i].StartsWith(prefix))
{
lastLineWithDash = i;
break;
}
}
}
if (lastLineWithDash == 0)
{
removeDashesDuringSanitization = false;
}
}
string text = removeDashesDuringSanitization ? textWithoutDash : textWithDash;
2020-04-12 13:55:29 +02:00
// Return if empty string
if (string.IsNullOrEmpty(text))
{
return originalText;
}
// Get first word of the paragraph
string firstWord = GetFirstWord(text);
2020-04-11 10:31:14 +02:00
string newFirstWord = firstWord;
if (leadingDialogDash != null)
{
newFirstWord = newFirstWord.TrimStart(Convert.ToChar(leadingDialogDash)).Trim();
}
2020-04-11 10:31:14 +02:00
foreach (string prefix in Prefixes)
{
if (newFirstWord.StartsWith(prefix) && !newFirstWord.EndsWith(prefix + Environment.NewLine))
2020-04-11 11:06:36 +02:00
{
newFirstWord = newFirstWord.Substring(prefix.Length);
}
2020-04-11 10:31:14 +02:00
}
newFirstWord = newFirstWord.Trim();
2020-04-12 13:55:29 +02:00
string result;
// If we can find it...
if (originalText.IndexOf(firstWord, StringComparison.Ordinal) >= 0)
{
// Replace it
2020-04-12 13:55:29 +02:00
result = ReplaceFirstOccurrence(originalText, firstWord, newFirstWord);
}
else
{
// Just remove whatever prefix we need to remove
var prefix = firstWord.Replace(newFirstWord, "");
2020-04-12 13:55:29 +02:00
result = ReplaceFirstOccurrence(originalText, prefix, "");
}
2020-04-12 13:55:29 +02:00
// Return original if empty string
if (string.IsNullOrEmpty(result))
{
return originalText;
}
return result;
}
public static string RemovePrefix(string originalText, ContinuationProfile profile, bool gap)
{
return RemovePrefix(originalText, profile, true, gap);
}
public static string RemovePrefix(string originalText, ContinuationProfile profile)
{
return RemovePrefix(originalText, profile, true, false);
}
public static string RemoveAllPrefixes(string originalText, ContinuationProfile profile)
{
var text = originalText;
2020-04-12 13:55:29 +02:00
// Return if empty string
if (string.IsNullOrEmpty(text))
{
return originalText;
}
while (HasPrefix(SanitizeString(text, false), profile))
{
text = RemovePrefix(text, profile, false, false /* Not used because of false before */);
}
return text;
2020-04-11 10:31:14 +02:00
}
public static bool IsNewSentence(string input, bool iNewSentence)
2020-04-11 10:31:14 +02:00
{
2020-04-12 13:55:29 +02:00
// Return if empty string
if (string.IsNullOrEmpty(input))
{
return false;
}
if (!iNewSentence)
{
if (input.StartsWith("I ") || input.StartsWith("I'"))
{
// English I
return false;
}
}
if (input.Length > 0 && char.IsLetter(input[0]) && char.IsUpper(input[0]))
2020-04-11 10:31:14 +02:00
{
// First letter
return true;
}
if (input.Length > 1 && char.IsLetter(input[0]) && !char.IsUpper(input[0]) && char.IsLetter(input[1]) && char.IsUpper(input[1]))
2020-04-11 10:31:14 +02:00
{
// iPhone
return true;
}
if (input.Length > 3 && char.IsPunctuation(input[0]) && char.IsLetter(input[1]) && !char.IsUpper(input[1]) && char.IsWhiteSpace(input[2]) && char.IsLetter(input[3]) && char.IsUpper(input[3]))
2020-04-11 10:31:14 +02:00
{
// 's Avonds
return true;
}
if (input.Length > 1 && "¿¡".Contains(input[0]) && char.IsLetter(input[1]) && char.IsUpper(input[1]))
2020-04-11 10:31:14 +02:00
{
// Spanish
return true;
}
return false;
}
public static bool IsNewSentence(string input)
{
return IsNewSentence(input, false);
}
2020-04-11 10:31:14 +02:00
public static bool IsEndOfSentence(string input)
{
2020-04-12 13:55:29 +02:00
// Return if empty string
if (string.IsNullOrEmpty(input))
{
return false;
}
2020-04-11 11:06:36 +02:00
return (input.EndsWith('.') && !input.EndsWith("..", StringComparison.Ordinal)) ||
2020-04-11 10:31:14 +02:00
input.EndsWith('?') ||
input.EndsWith('!') ||
input.EndsWith(';') /* Greek question mark */ ||
input.EndsWith("--", StringComparison.Ordinal);
}
2020-04-12 13:55:29 +02:00
public static bool EndsWithNothing(string input, ContinuationProfile profile)
{
// Return if empty string
if (string.IsNullOrEmpty(input))
{
return true;
}
return !HasSuffixUnsafe(input, profile) && !IsEndOfSentence(input) && !input.EndsWith(",") && !input.EndsWith(":") && !input.EndsWith(";") && !input.EndsWith("-");
}
2020-04-11 10:31:14 +02:00
public static bool IsAllCaps(string input)
{
2020-04-12 13:55:29 +02:00
// Return if empty string
if (string.IsNullOrEmpty(input))
{
return false;
}
2020-04-11 10:31:14 +02:00
int totalCount = 0;
int allCapsCount = 0;
// Count all caps chars
2020-04-11 11:06:36 +02:00
foreach (var c in input)
2020-04-11 10:31:14 +02:00
{
2020-04-11 11:06:36 +02:00
if (char.IsLetter(c))
2020-04-11 10:31:14 +02:00
{
totalCount++;
2020-04-11 11:06:36 +02:00
if (char.IsUpper(c))
2020-04-11 10:31:14 +02:00
{
allCapsCount++;
}
}
}
return allCapsCount / (double)totalCount >= 0.80;
}
public static bool IsItalic(string input)
{
input = ExtractParagraphOnly(input);
2020-04-12 13:55:29 +02:00
// Return if empty string
if (string.IsNullOrEmpty(input))
{
return false;
}
while (input.IndexOf("<i>", StringComparison.Ordinal) >= 0)
2020-04-11 10:31:14 +02:00
{
var startIndex = input.IndexOf("<i>", StringComparison.Ordinal);
var endIndex = input.IndexOf("</i>", StringComparison.Ordinal);
var textToRemove = endIndex >= 0 ? input.Substring(startIndex, (endIndex + 4) - startIndex) : input.Substring(startIndex);
input = input.Replace(textToRemove, "");
2020-04-11 10:31:14 +02:00
}
foreach (var c in input)
{
if (c != '\n' && c != '\r')
{
return false;
}
}
return true;
}
public static bool IsFullLineTag(string input, int position)
{
input = ExtractParagraphOnly(input);
2020-04-12 13:55:29 +02:00
// Return if empty string
if (string.IsNullOrEmpty(input))
{
return false;
}
var lineStartIndex = (position > 0 && position < input.Length) ? input.LastIndexOf("\n", position, StringComparison.Ordinal) : 0;
if (lineStartIndex == -1)
{
lineStartIndex = 0;
}
var lineEndIndex = (position > 0 && position < input.Length) ? input.IndexOf("\n", position, StringComparison.Ordinal) : input.Length;
if (lineEndIndex == -1)
{
lineEndIndex = input.Length;
}
input = input.Substring(lineStartIndex, lineEndIndex - lineStartIndex);
var startIndex = input.IndexOf("<", StringComparison.Ordinal);
var endIndex = input.LastIndexOf("</", StringComparison.Ordinal);
if (endIndex >= 0)
{
if (startIndex == endIndex)
{
startIndex = 0;
endIndex = input.IndexOf(">", endIndex, StringComparison.Ordinal) + 1;
}
else
{
endIndex = input.IndexOf(">", endIndex, StringComparison.Ordinal) + 1;
}
}
else
{
endIndex = input.Length;
}
var textToRemove = input.Substring(startIndex, endIndex - startIndex);
input = input.Replace(textToRemove, "");
foreach (var c in input)
{
if (char.IsLetterOrDigit(c))
{
return false;
}
}
return true;
2020-04-11 10:31:14 +02:00
}
2020-04-12 13:55:29 +02:00
public static bool IsOnlyPrefix(string input, ContinuationProfile profile)
{
var checkString = input;
if (string.IsNullOrEmpty(input.Trim()))
{
return false;
}
if (profile.Prefix.Length > 0)
{
checkString = checkString.Replace(profile.Prefix, "");
}
if (profile.UseDifferentStyleGap && profile.GapPrefix.Length > 0)
{
checkString = checkString.Replace(profile.GapPrefix, "");
}
foreach (string prefix in Prefixes)
{
checkString = checkString.Replace(prefix, "");
}
checkString = checkString.Trim();
return string.IsNullOrEmpty(checkString);
}
public static bool IsOnlySuffix(string input, ContinuationProfile profile)
{
var checkString = input;
if (string.IsNullOrEmpty(input.Trim()))
{
return false;
}
if (profile.Suffix.Length > 0)
{
checkString = checkString.Replace(profile.Suffix, "");
}
if (profile.UseDifferentStyleGap && profile.GapSuffix.Length > 0)
{
checkString = checkString.Replace(profile.GapSuffix, "");
}
foreach (string suffix in Suffixes)
{
checkString = checkString.Replace(suffix, "");
}
checkString = checkString.Trim();
return string.IsNullOrEmpty(checkString);
}
2020-04-11 10:31:14 +02:00
public static bool HasPrefix(string input, ContinuationProfile profile)
{
2020-04-12 13:55:29 +02:00
// Return if only prefix
if (IsOnlyPrefix(input, profile))
{
return false;
}
if (profile.Prefix.Length > 0 && (input.StartsWith(profile.Prefix) && !input.StartsWith(profile.Prefix + Environment.NewLine)))
2020-04-11 10:31:14 +02:00
{
return true;
}
if (profile.UseDifferentStyleGap && profile.GapPrefix.Length > 0 && (input.StartsWith(profile.GapPrefix) && !input.StartsWith(profile.GapPrefix + Environment.NewLine)))
2020-04-11 10:31:14 +02:00
{
return true;
}
foreach (string prefix in Prefixes)
{
if (input.StartsWith(prefix) && !input.StartsWith(prefix + Environment.NewLine))
2020-04-11 10:31:14 +02:00
{
return true;
}
}
return false;
}
public static bool HasSuffix(string input, ContinuationProfile profile)
{
2020-04-12 13:55:29 +02:00
// Return if only suffix
if (IsOnlySuffix(input, profile))
{
return false;
}
if (profile.Suffix.Length > 0 && (input.EndsWith(profile.Suffix) && !input.EndsWith(Environment.NewLine + profile.Suffix)))
2020-04-11 10:31:14 +02:00
{
return true;
}
if (profile.UseDifferentStyleGap && profile.GapSuffix.Length > 0 && (input.EndsWith(profile.GapSuffix) && !input.EndsWith(Environment.NewLine + profile.GapSuffix)))
2020-04-11 10:31:14 +02:00
{
return true;
}
foreach (string suffix in Suffixes)
{
if ((input.EndsWith(suffix) && !input.EndsWith(Environment.NewLine + suffix)) && input.Length > suffix.Length)
2020-04-11 10:31:14 +02:00
{
return true;
}
}
return false;
}
2020-04-12 13:55:29 +02:00
private static bool HasSuffixUnsafe(string input, ContinuationProfile profile)
{
if (profile.Suffix.Length > 0 && (input.EndsWith(profile.Suffix) && !input.EndsWith(Environment.NewLine + profile.Suffix)))
2020-04-12 13:55:29 +02:00
{
return true;
}
if (profile.UseDifferentStyleGap && profile.GapSuffix.Length > 0 && (input.EndsWith(profile.GapSuffix) && !input.EndsWith(Environment.NewLine + profile.GapSuffix)))
2020-04-12 13:55:29 +02:00
{
return true;
}
foreach (string suffix in Suffixes)
{
if ((input.EndsWith(suffix) && !input.EndsWith(Environment.NewLine + suffix)) && input.Length > suffix.Length)
2020-04-12 13:55:29 +02:00
{
return true;
}
}
return false;
}
2020-04-11 10:31:14 +02:00
public static bool StartsWithConjunction(string input, string language)
{
2020-04-12 13:55:29 +02:00
// Return if empty string
if (string.IsNullOrEmpty(input))
{
return false;
}
2020-04-11 10:31:14 +02:00
List<string> conjunctions = null;
if (language == "nl")
{
conjunctions = new List<string>
{
"maar", "dus", "omdat", "aangezien", "want", "vermits", "zodat", "opdat", "zoals", "bijvoorbeeld",
"net", "behalve", "al", "alhoewel", "hoewel", "ofschoon", "tenzij", "waardoor", "waarna", "misschien", "waarschijnlijk", "vast"
};
}
else if (language == "en")
{
2020-04-11 11:06:36 +02:00
conjunctions = new List<string>
{
"and", "but", "for", "nor", "yet", "or", "so", "such as"
};
2020-04-11 10:31:14 +02:00
}
else if (language == "fr")
{
2020-04-11 11:06:36 +02:00
conjunctions = new List<string>
{
"mais", "car", "donc", "parce que", "par exemple"
};
2020-04-11 10:31:14 +02:00
}
if (conjunctions != null)
{
foreach (string conjunction in conjunctions)
{
if (input.StartsWith(conjunction + " ") || input.StartsWith(conjunction + ",") || input.StartsWith(conjunction + ":"))
{
return true;
}
}
}
return false;
}
public static Tuple<string, string> MergeHelper(string input, string nextInput, ContinuationProfile profile, string language)
{
var thisText = SanitizeString(input);
var nextText = SanitizeString(nextInput);
var nextTextWithDash = SanitizeString(nextInput, false);
2020-04-11 10:31:14 +02:00
2020-04-11 11:06:36 +02:00
// Remove any prefix and suffix when:
// - Title 1 ends with a suffix AND title 2 starts with a prefix
// - Title 2 is a continuing sentence
if (HasSuffix(thisText, profile) && HasPrefix(nextTextWithDash, profile)
2020-04-12 13:55:29 +02:00
|| !IsNewSentence(nextText) && !string.IsNullOrEmpty(nextText))
2020-04-11 10:31:14 +02:00
{
var newNextText = RemoveAllPrefixes(nextInput, profile);
2020-04-11 10:31:14 +02:00
var newText = RemoveSuffix(input, profile, StartsWithConjunction(newNextText, language));
return new Tuple<string, string>(newText, newNextText);
}
return new Tuple<string, string>(input, nextInput);
}
public static int GetMinimumGapMs()
{
return Math.Max(Configuration.Settings.General.MinimumMillisecondsBetweenLines + 5, 300);
}
public static string GetContinuationStyleName(ContinuationStyle continuationStyle)
{
switch (continuationStyle)
{
case ContinuationStyle.NoneTrailingDots:
return Configuration.Settings.Language.Settings.ContinuationStyleNoneTrailingDots;
case ContinuationStyle.NoneLeadingTrailingDots:
return Configuration.Settings.Language.Settings.ContinuationStyleNoneLeadingTrailingDots;
case ContinuationStyle.OnlyTrailingDots:
return Configuration.Settings.Language.Settings.ContinuationStyleOnlyTrailingDots;
case ContinuationStyle.LeadingTrailingDots:
return Configuration.Settings.Language.Settings.ContinuationStyleLeadingTrailingDots;
case ContinuationStyle.LeadingTrailingDash:
return Configuration.Settings.Language.Settings.ContinuationStyleLeadingTrailingDash;
case ContinuationStyle.LeadingTrailingDashDots:
return Configuration.Settings.Language.Settings.ContinuationStyleLeadingTrailingDashDots;
default:
return Configuration.Settings.Language.Settings.ContinuationStyleNone;
}
}
public static int GetIndexFromContinuationStyle(ContinuationStyle continuationStyle)
{
switch (continuationStyle)
{
case ContinuationStyle.NoneTrailingDots:
return 1;
case ContinuationStyle.NoneLeadingTrailingDots:
return 2;
case ContinuationStyle.OnlyTrailingDots:
return 3;
case ContinuationStyle.LeadingTrailingDots:
return 4;
case ContinuationStyle.LeadingTrailingDash:
return 5;
case ContinuationStyle.LeadingTrailingDashDots:
return 6;
default:
return 0;
}
}
public static ContinuationStyle GetContinuationStyleFromIndex(int index)
{
switch (index)
{
case 1:
return ContinuationStyle.NoneTrailingDots;
case 2:
return ContinuationStyle.NoneLeadingTrailingDots;
case 3:
return ContinuationStyle.OnlyTrailingDots;
case 4:
return ContinuationStyle.LeadingTrailingDots;
case 5:
return ContinuationStyle.LeadingTrailingDash;
case 6:
return ContinuationStyle.LeadingTrailingDashDots;
default:
return ContinuationStyle.None;
}
}
public static string GetContinuationStylePreview(ContinuationStyle continuationStyle)
{
string line1 = "Lorem ipsum dolor sit amet\nconsectetur adipiscing elit,";
string line2 = "donec eget turpis consequat\nturpis commodo hendrerit";
string line3 = "praesent vel velit rutrum tellus\npharetra tristique vel non orci";
string linePause = "(...)";
string line4 = "mauris mollis consectetur nibh,\nnec congue est viverra quis.";
var profile = GetContinuationProfile(continuationStyle);
return AddSuffixIfNeeded(line1, profile, false) + "\n\n"
+ AddSuffixIfNeeded(AddPrefixIfNeeded(line2, profile, false), profile, false) + "\n\n"
+ AddSuffixIfNeeded(AddPrefixIfNeeded(line3, profile, false), profile, true) + "\n\n"
+ linePause + "\n\n"
+ AddPrefixIfNeeded(line4, profile, true);
}
public static ContinuationProfile GetContinuationProfile(ContinuationStyle continuationStyle)
{
switch (continuationStyle)
{
case ContinuationStyle.NoneTrailingDots:
return new ContinuationProfile
{
Suffix = "",
SuffixAddSpace = false,
SuffixReplaceComma = false,
Prefix = "",
PrefixAddSpace = false,
UseDifferentStyleGap = true,
GapSuffix = "...",
GapSuffixAddSpace = false,
GapSuffixReplaceComma = true,
GapPrefix = "",
GapPrefixAddSpace = false
};
case ContinuationStyle.NoneLeadingTrailingDots:
return new ContinuationProfile
{
Suffix = "",
SuffixAddSpace = false,
SuffixReplaceComma = false,
Prefix = "",
PrefixAddSpace = false,
UseDifferentStyleGap = true,
GapSuffix = "...",
GapSuffixAddSpace = false,
GapSuffixReplaceComma = true,
GapPrefix = "...",
GapPrefixAddSpace = false
};
case ContinuationStyle.OnlyTrailingDots:
return new ContinuationProfile
{
Suffix = "...",
SuffixAddSpace = false,
SuffixReplaceComma = true,
Prefix = "",
PrefixAddSpace = false,
UseDifferentStyleGap = false
};
case ContinuationStyle.LeadingTrailingDots:
return new ContinuationProfile
{
Suffix = "...",
SuffixAddSpace = false,
SuffixReplaceComma = true,
Prefix = "...",
PrefixAddSpace = false,
UseDifferentStyleGap = false
};
case ContinuationStyle.LeadingTrailingDash:
return new ContinuationProfile
{
Suffix = "-",
SuffixAddSpace = true,
SuffixReplaceComma = true,
Prefix = "-",
PrefixAddSpace = true,
UseDifferentStyleGap = false
};
case ContinuationStyle.LeadingTrailingDashDots:
return new ContinuationProfile
{
Suffix = "-",
SuffixAddSpace = true,
SuffixReplaceComma = true,
Prefix = "-",
PrefixAddSpace = true,
UseDifferentStyleGap = true,
GapSuffix = "...",
GapSuffixAddSpace = false,
GapSuffixReplaceComma = true,
GapPrefix = "...",
GapPrefixAddSpace = false
};
default:
return new ContinuationProfile
{
Suffix = "",
SuffixAddSpace = false,
SuffixReplaceComma = false,
Prefix = "",
PrefixAddSpace = false,
UseDifferentStyleGap = false
};
}
}
public class ContinuationProfile
{
public string Suffix { get; set; }
public bool SuffixAddSpace { get; set; }
public bool SuffixReplaceComma { get; set; }
public string Prefix { get; set; }
public bool PrefixAddSpace { get; set; }
public bool UseDifferentStyleGap { get; set; }
public string GapSuffix { get; set; }
public bool GapSuffixAddSpace { get; set; }
public bool GapSuffixReplaceComma { get; set; }
public string GapPrefix { get; set; }
public bool GapPrefixAddSpace { get; set; }
}
}
}