mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2025-02-01 05:21:40 +01:00
Merge pull request #2091 from ivandrofly/strippable
[Rename] - Stripable => Strippable.
This commit is contained in:
commit
96b01df184
@ -101,7 +101,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
||||
{
|
||||
if (p.Text != p.Text.ToUpper())
|
||||
{
|
||||
var st = new StripableText(next.Text);
|
||||
var st = new StrippableText(next.Text);
|
||||
if (st.StrippedText.Length > 0 && st.StrippedText != st.StrippedText.ToUpper() &&
|
||||
char.IsUpper(st.StrippedText[0]))
|
||||
{
|
||||
@ -139,7 +139,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
||||
string oldText = p.Text;
|
||||
|
||||
string text = p.Text.Substring(0, indexOfNewLine);
|
||||
var st = new StripableText(text);
|
||||
var st = new StrippableText(text);
|
||||
if (st.Pre.TrimEnd().EndsWith('¿')) // Spanish ¿
|
||||
p.Text = p.Text.Insert(indexOfNewLine, "?");
|
||||
else if (st.Pre.TrimEnd().EndsWith('¡')) // Spanish ¡
|
||||
|
@ -121,7 +121,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
||||
part = part.Substring(speakerEnd + 1);
|
||||
}
|
||||
|
||||
var st = new StripableText(part);
|
||||
var st = new StrippableText(part);
|
||||
if (j == 0 && mark == '!' && st.Pre == "¿" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark))
|
||||
{
|
||||
p.Text = inverseMark + p.Text;
|
||||
|
@ -23,7 +23,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
||||
string lastText = HtmlUtil.RemoveHtmlTags(last.Text);
|
||||
if (lastText.EndsWith(':') || lastText.EndsWith(';'))
|
||||
{
|
||||
var st = new StripableText(p.Text);
|
||||
var st = new StrippableText(p.Text);
|
||||
if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]))
|
||||
p.Text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post;
|
||||
}
|
||||
|
@ -259,7 +259,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
||||
if (indexOfNewLine > 0)
|
||||
{
|
||||
string text = p.Text.Substring(indexOfNewLine + len);
|
||||
var st = new StripableText(text);
|
||||
var st = new StrippableText(text);
|
||||
|
||||
if (st.StrippedText.Length > 0 && Helper.IsTurkishLittleI(st.StrippedText[0], encoding, language) && !st.Pre.EndsWith('[') && !st.Pre.Contains("..."))
|
||||
{
|
||||
|
@ -35,7 +35,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
||||
string oldText = p.Text;
|
||||
if (p.Text.Length > 3 && callbacks.AllowFix(p, fixAction))
|
||||
{
|
||||
var st = new StripableText(p.Text);
|
||||
var st = new StrippableText(p.Text);
|
||||
string text = st.StrippedText;
|
||||
int start = text.IndexOfAny(ExpectedChars);
|
||||
while (start > 0 && start < text.Length)
|
||||
@ -51,7 +51,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
||||
}
|
||||
if ((start + 3 < text.Length) && (text[start + 1] == ' ') && !IsAbbreviation(text, start, callbacks))
|
||||
{
|
||||
var subText = new StripableText(text.Substring(start + 2));
|
||||
var subText = new StrippableText(text.Substring(start + 2));
|
||||
text = text.Substring(0, start + 2) + subText.CombineWithPrePost(ToUpperFirstLetter(subText.StrippedText, callbacks));
|
||||
}
|
||||
// Try to reach the last dot if char at *start is '.'.
|
||||
|
@ -35,7 +35,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
||||
match = match.NextMatch();
|
||||
}
|
||||
|
||||
var st = new StripableText(p.Text);
|
||||
var st = new StrippableText(p.Text);
|
||||
match = ReBeforeLowercaseLetter.Match(st.StrippedText);
|
||||
while (match.Success)
|
||||
{
|
||||
|
@ -269,16 +269,16 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
||||
if (remove)
|
||||
{
|
||||
int idx = text.IndexOf('-');
|
||||
var st = new StripableText(text);
|
||||
var st = new StrippableText(text);
|
||||
if (idx < 5 && st.Pre.Length >= idx)
|
||||
{
|
||||
text = text.Remove(idx, 1).TrimStart();
|
||||
idx = text.IndexOf('-');
|
||||
st = new StripableText(text);
|
||||
st = new StrippableText(text);
|
||||
if (idx < 5 && idx >= 0 && st.Pre.Length >= idx)
|
||||
{
|
||||
text = text.Remove(idx, 1).TrimStart();
|
||||
st = new StripableText(text);
|
||||
st = new StrippableText(text);
|
||||
}
|
||||
idx = text.IndexOf('-');
|
||||
if (idx < 5 && idx >= 0 && st.Pre.Length >= idx)
|
||||
@ -315,7 +315,7 @@ namespace Nikse.SubtitleEdit.Core.Forms.FixCommonErrors
|
||||
var prev = subtitle.GetParagraphOrDefault(i - 1);
|
||||
if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal))
|
||||
{
|
||||
var st = new StripableText(text);
|
||||
var st = new StrippableText(text);
|
||||
if (st.Pre.EndsWith('-') || st.Pre.EndsWith("- ", StringComparison.Ordinal))
|
||||
{
|
||||
text = st.Pre.TrimEnd('-', ' ') + st.StrippedText + st.Post;
|
||||
|
@ -184,7 +184,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
|
||||
}
|
||||
else
|
||||
{
|
||||
var preStripable = new StripableText(pre);
|
||||
var preStrippable = new StrippableText(pre);
|
||||
var remove = true;
|
||||
|
||||
if (indexOfColon < line.Length - 1)
|
||||
@ -243,7 +243,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
|
||||
else if (newText.EndsWith("</u>", StringComparison.Ordinal) && text.StartsWith("<u>", StringComparison.Ordinal) && !newText.StartsWith("<u>", StringComparison.Ordinal))
|
||||
newText = "<u>" + newText;
|
||||
|
||||
if (!IsHIDescription(preStripable.StrippedText))
|
||||
if (!IsHIDescription(preStrippable.StrippedText))
|
||||
noOfNames++;
|
||||
}
|
||||
else
|
||||
@ -324,9 +324,9 @@ namespace Nikse.SubtitleEdit.Core.Forms
|
||||
var arr = newText.SplitToLines();
|
||||
if (arr.Length == 2 && arr[0].Length > 1 && arr[1].Length > 1)
|
||||
{
|
||||
string arr0 = new StripableText(arr[0]).StrippedText;
|
||||
var arr1Stripable = new StripableText(arr[1]);
|
||||
string arr1 = arr1Stripable.StrippedText;
|
||||
string arr0 = new StrippableText(arr[0]).StrippedText;
|
||||
var arr1Strippable = new StrippableText(arr[1]);
|
||||
string arr1 = arr1Strippable.StrippedText;
|
||||
|
||||
if (arr0.Length > 0 && arr1.Length > 1)
|
||||
{
|
||||
@ -336,7 +336,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
|
||||
char c = arr0[arr0.Length - 1];
|
||||
if (char.IsLower(c) || c == ',') // first line ends with comma or lower case letter
|
||||
{
|
||||
if (!arr1Stripable.Pre.Contains("..."))
|
||||
if (!arr1Strippable.Pre.Contains("..."))
|
||||
{
|
||||
insertDash = false;
|
||||
}
|
||||
@ -348,7 +348,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
|
||||
string arr0QuoteTrimmed = arr[0].TrimEnd('"');
|
||||
if (arr0QuoteTrimmed.Length > 0 && !".?!".Contains(arr0QuoteTrimmed[arr0QuoteTrimmed.Length - 1]) && !arr0QuoteTrimmed.EndsWith("</i>", StringComparison.Ordinal) && !arr0QuoteTrimmed.EndsWith("--", StringComparison.Ordinal) && !arr0QuoteTrimmed.EndsWith("—", StringComparison.Ordinal))
|
||||
{
|
||||
if (!arr1Stripable.Pre.Contains('-'))
|
||||
if (!arr1Strippable.Pre.Contains('-'))
|
||||
{
|
||||
insertDash = false;
|
||||
}
|
||||
@ -367,7 +367,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
|
||||
{
|
||||
if (indexOfDialogChar < 0 || indexOfDialogChar > 4)
|
||||
{
|
||||
var st = new StripableText(newText, string.Empty, string.Empty);
|
||||
var st = new StrippableText(newText, string.Empty, string.Empty);
|
||||
newText = st.Pre + "- " + st.StrippedText + st.Post;
|
||||
}
|
||||
|
||||
@ -380,7 +380,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
|
||||
}
|
||||
if ((indexOfDialogChar < 0 || indexOfDialogChar > 6) && !second.StartsWith('-'))
|
||||
{
|
||||
var st = new StripableText(second, String.Empty, String.Empty);
|
||||
var st = new StrippableText(second, String.Empty, String.Empty);
|
||||
second = st.Pre + "- " + st.StrippedText + st.Post;
|
||||
newText = newText.Remove(indexOfNewLine) + Environment.NewLine + second;
|
||||
}
|
||||
@ -388,7 +388,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
|
||||
}
|
||||
else if (newText.Contains('-') && !newText.Contains(Environment.NewLine))
|
||||
{
|
||||
var st = new StripableText(newText);
|
||||
var st = new StrippableText(newText);
|
||||
if (st.Pre.Contains('-'))
|
||||
newText = st.Pre.Replace("-", string.Empty) + st.StrippedText + st.Post;
|
||||
}
|
||||
@ -467,7 +467,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
|
||||
pre = pre.Replace(Settings.CustomStart, string.Empty);
|
||||
post = post.Replace(Settings.CustomEnd, string.Empty);
|
||||
}
|
||||
var st = new StripableText(text, pre, post);
|
||||
var st = new StrippableText(text, pre, post);
|
||||
var sb = new StringBuilder();
|
||||
var parts = st.StrippedText.Trim().SplitToLines();
|
||||
int lineNumber = 0;
|
||||
@ -476,7 +476,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
|
||||
int noOfNamesRemovedNotInLineOne = 0;
|
||||
foreach (string s in parts)
|
||||
{
|
||||
var stSub = new StripableText(s, pre, post);
|
||||
var stSub = new StrippableText(s, pre, post);
|
||||
string strippedText = stSub.StrippedText;
|
||||
if ((lineNumber == parts.Length - 1 && st.Post.Contains('?')) || stSub.Post.Contains('?'))
|
||||
strippedText += "?";
|
||||
@ -561,7 +561,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
|
||||
if (Settings.RemoveInterjections)
|
||||
text = RemoveInterjections(text);
|
||||
|
||||
st = new StripableText(text, " >-\"'‘`´♪¿¡.…—", " -\"'`´♪.!?:…—");
|
||||
st = new StrippableText(text, " >-\"'‘`´♪¿¡.…—", " -\"'`´♪.!?:…—");
|
||||
text = st.StrippedText;
|
||||
if (StartsAndEndsWithHearImpairedTags(text))
|
||||
{
|
||||
@ -577,7 +577,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
|
||||
var splitParts = HtmlUtil.RemoveHtmlTags(text).Replace(" ", string.Empty).Split(splitChars, StringSplitOptions.RemoveEmptyEntries);
|
||||
if (splitParts.Length == 2)
|
||||
{
|
||||
var temp = new StripableText(text);
|
||||
var temp = new StrippableText(text);
|
||||
temp.StrippedText = temp.StrippedText.Replace(Environment.NewLine, " ");
|
||||
int splitIndex = temp.StrippedText.LastIndexOfAny(splitChars);
|
||||
if (splitIndex > 0)
|
||||
@ -1017,7 +1017,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
|
||||
if (temp.EndsWith(Environment.NewLine + "- ", StringComparison.Ordinal))
|
||||
temp = temp.Remove(temp.Length - 2).TrimEnd();
|
||||
|
||||
var st = new StripableText(temp);
|
||||
var st = new StrippableText(temp);
|
||||
if (st.StrippedText.Length == 0)
|
||||
return string.Empty;
|
||||
|
||||
|
@ -190,7 +190,7 @@
|
||||
<Compile Include="SpellCheck\UndoObject.cs" />
|
||||
<Compile Include="SsaStyle.cs" />
|
||||
<Compile Include="StringExtensions.cs" />
|
||||
<Compile Include="StripableText.cs" />
|
||||
<Compile Include="StrippableText.cs" />
|
||||
<Compile Include="SubtitleEditRegex.cs" />
|
||||
<Compile Include="Subtitle.cs" />
|
||||
<Compile Include="SubtitleFormats\AribB36.cs" />
|
||||
|
@ -1,281 +1,281 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Core
|
||||
{
|
||||
public class StripableText
|
||||
{
|
||||
public string Pre { get; set; }
|
||||
public string Post { get; set; }
|
||||
public string StrippedText { get; set; }
|
||||
public string OriginalText { get; private set; }
|
||||
|
||||
public string MergedString
|
||||
{
|
||||
get { return Pre + StrippedText + Post; }
|
||||
}
|
||||
|
||||
public StripableText(string text)
|
||||
: this(text, " >-\"”“['‘`´¶(♪¿¡.…—", " -\"”“]'`´¶)♪.!?:…—")
|
||||
{
|
||||
}
|
||||
|
||||
public StripableText(string text, string stripStartCharacters, string stripEndCharacters)
|
||||
{
|
||||
OriginalText = text;
|
||||
|
||||
Pre = string.Empty;
|
||||
if (text.Length > 0 && ("<{" + stripStartCharacters).Contains(text[0]))
|
||||
{
|
||||
int beginLength;
|
||||
do
|
||||
{
|
||||
beginLength = text.Length;
|
||||
|
||||
while (text.Length > 0 && stripStartCharacters.Contains(text[0]))
|
||||
{
|
||||
Pre += text[0];
|
||||
text = text.Remove(0, 1);
|
||||
}
|
||||
|
||||
// ASS/SSA codes like {\an9}
|
||||
int endIndex = text.IndexOf('}');
|
||||
if (endIndex > 0 && text.StartsWith("{\\", StringComparison.Ordinal))
|
||||
{
|
||||
int nextStartIndex = text.IndexOf('{', 2);
|
||||
if (nextStartIndex == -1 || nextStartIndex > endIndex)
|
||||
{
|
||||
endIndex++;
|
||||
Pre += text.Substring(0, endIndex);
|
||||
text = text.Remove(0, endIndex);
|
||||
}
|
||||
}
|
||||
|
||||
// tags like <i> or <font face="Segoe Print" color="#ff0000">
|
||||
endIndex = text.IndexOf('>');
|
||||
if (text.StartsWith('<') && endIndex >= 2)
|
||||
{
|
||||
endIndex++;
|
||||
Pre += text.Substring(0, endIndex);
|
||||
text = text.Remove(0, endIndex);
|
||||
}
|
||||
}
|
||||
while (text.Length < beginLength);
|
||||
}
|
||||
|
||||
Post = string.Empty;
|
||||
if (text.Length > 0 && (">" + stripEndCharacters).Contains(text[text.Length - 1]))
|
||||
{
|
||||
int beginLength;
|
||||
do
|
||||
{
|
||||
beginLength = text.Length;
|
||||
|
||||
while (text.Length > 0 && stripEndCharacters.Contains(text[text.Length - 1]))
|
||||
{
|
||||
Post = text[text.Length - 1] + Post;
|
||||
text = text.Substring(0, text.Length - 1);
|
||||
}
|
||||
|
||||
if (text.EndsWith('>'))
|
||||
{
|
||||
// tags </i> </b> </u>
|
||||
if (text.EndsWith("</i>", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.EndsWith("</b>", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.EndsWith("</u>", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Post = text.Substring(text.Length - 4) + Post;
|
||||
text = text.Substring(0, text.Length - 4);
|
||||
}
|
||||
|
||||
// tag </font>
|
||||
if (text.EndsWith("</font>", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Post = text.Substring(text.Length - 7) + Post;
|
||||
text = text.Substring(0, text.Length - 7);
|
||||
}
|
||||
}
|
||||
}
|
||||
while (text.Length < beginLength);
|
||||
}
|
||||
|
||||
StrippedText = text;
|
||||
}
|
||||
|
||||
private static string GetAndInsertNextId(List<string> replaceIds, List<string> replaceNames, string name, int idName)
|
||||
{
|
||||
string id = $"_@{idName}_";
|
||||
replaceIds.Add(id);
|
||||
replaceNames.Add(name);
|
||||
return id;
|
||||
}
|
||||
|
||||
private void ReplaceNames1Remove(List<string> namesEtc, List<string> replaceIds, List<string> replaceNames, List<string> originalNames)
|
||||
{
|
||||
if (Post.StartsWith('.'))
|
||||
{
|
||||
StrippedText += ".";
|
||||
Post = Post.Remove(0, 1);
|
||||
}
|
||||
|
||||
string lower = StrippedText.ToLower();
|
||||
int idName = 0;
|
||||
foreach (string name in namesEtc)
|
||||
{
|
||||
int start = lower.IndexOf(name, StringComparison.OrdinalIgnoreCase);
|
||||
while (start >= 0 && start < lower.Length)
|
||||
{
|
||||
bool startOk = (start == 0) || (lower[start - 1] == ' ') || (lower[start - 1] == '-') ||
|
||||
(lower[start - 1] == '"') || (lower[start - 1] == '\'') || (lower[start - 1] == '>') ||
|
||||
Environment.NewLine.EndsWith(lower[start - 1]);
|
||||
|
||||
if (startOk && string.CompareOrdinal(name, "Don") == 0 && lower.Substring(start).StartsWith("don't"))
|
||||
startOk = false;
|
||||
|
||||
if (startOk)
|
||||
{
|
||||
int end = start + name.Length;
|
||||
bool endOk = end <= lower.Length;
|
||||
if (endOk)
|
||||
endOk = end == lower.Length || (@" ,.!?:;')- <""" + Environment.NewLine).Contains(lower[end]);
|
||||
|
||||
if (endOk && StrippedText.Length >= start + name.Length)
|
||||
{
|
||||
string originalName = StrippedText.Substring(start, name.Length);
|
||||
originalNames.Add(originalName);
|
||||
StrippedText = StrippedText.Remove(start, name.Length);
|
||||
StrippedText = StrippedText.Insert(start, GetAndInsertNextId(replaceIds, replaceNames, name, idName++));
|
||||
lower = StrippedText.ToLower();
|
||||
}
|
||||
}
|
||||
if (start + 3 > lower.Length)
|
||||
start = lower.Length + 1;
|
||||
else
|
||||
start = lower.IndexOf(name, start + 3, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
}
|
||||
|
||||
if (StrippedText.EndsWith('.'))
|
||||
{
|
||||
Post = "." + Post;
|
||||
StrippedText = StrippedText.TrimEnd('.');
|
||||
}
|
||||
}
|
||||
|
||||
private void ReplaceNames2Fix(List<string> replaceIds, List<string> replaceNames)
|
||||
{
|
||||
for (int i = 0; i < replaceIds.Count; i++)
|
||||
{
|
||||
StrippedText = StrippedText.Replace(replaceIds[i], replaceNames[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly char[] ExpectedCharsArray = { '.', '!', '?', ':', ';', ')', ']', '}', '(', '[', '{' };
|
||||
public void FixCasing(List<string> namesEtc, bool changeNameCases, bool makeUppercaseAfterBreak, bool checkLastLine, string lastLine)
|
||||
{
|
||||
var replaceIds = new List<string>();
|
||||
var replaceNames = new List<string>();
|
||||
var originalNames = new List<string>();
|
||||
ReplaceNames1Remove(namesEtc, replaceIds, replaceNames, originalNames);
|
||||
|
||||
if (checkLastLine)
|
||||
{
|
||||
string s = HtmlUtil.RemoveHtmlTags(lastLine).TrimEnd().TrimEnd('\"').TrimEnd();
|
||||
|
||||
bool startWithUppercase = string.IsNullOrEmpty(s) ||
|
||||
s.EndsWith('.') ||
|
||||
s.EndsWith('!') ||
|
||||
s.EndsWith('?') ||
|
||||
s.EndsWith(". ♪", StringComparison.Ordinal) ||
|
||||
s.EndsWith("! ♪", StringComparison.Ordinal) ||
|
||||
s.EndsWith("? ♪", StringComparison.Ordinal) ||
|
||||
s.EndsWith(']') ||
|
||||
s.EndsWith(')') ||
|
||||
s.EndsWith(':');
|
||||
|
||||
// start with uppercase after music symbol - but only if next line does not start with music symbol
|
||||
if (!startWithUppercase && (s.EndsWith('♪') || s.EndsWith('♫')))
|
||||
{
|
||||
if (!Pre.Contains(new[] { '♪', '♫' }))
|
||||
startWithUppercase = true;
|
||||
}
|
||||
|
||||
if (startWithUppercase && StrippedText.Length > 0 && !Pre.Contains("..."))
|
||||
{
|
||||
StrippedText = char.ToUpper(StrippedText[0]) + StrippedText.Substring(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (makeUppercaseAfterBreak && StrippedText.Contains(ExpectedCharsArray))
|
||||
{
|
||||
const string breakAfterChars = @".!?:;)]}([{";
|
||||
const string expectedChars = "\"`´'()<>!?.- \r\n";
|
||||
var sb = new StringBuilder();
|
||||
bool lastWasBreak = false;
|
||||
for (int i = 0; i < StrippedText.Length; i++)
|
||||
{
|
||||
var s = StrippedText[i];
|
||||
if (lastWasBreak)
|
||||
{
|
||||
if (expectedChars.Contains(s))
|
||||
{
|
||||
sb.Append(s);
|
||||
}
|
||||
else if ((sb.EndsWith('<') || sb.ToString().EndsWith("</", StringComparison.Ordinal)) && i + 1 < StrippedText.Length && StrippedText[i + 1] == '>')
|
||||
{ // tags
|
||||
sb.Append(s);
|
||||
}
|
||||
else if (sb.EndsWith('<') && s == '/' && i + 2 < StrippedText.Length && StrippedText[i + 2] == '>')
|
||||
{ // tags
|
||||
sb.Append(s);
|
||||
}
|
||||
else if (sb.ToString().EndsWith("... ", StringComparison.Ordinal))
|
||||
{
|
||||
sb.Append(s);
|
||||
lastWasBreak = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (breakAfterChars.Contains(s))
|
||||
{
|
||||
sb.Append(s);
|
||||
}
|
||||
else
|
||||
{
|
||||
lastWasBreak = false;
|
||||
sb.Append(char.ToUpper(s));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.Append(s);
|
||||
if (breakAfterChars.Contains(s))
|
||||
{
|
||||
var idx = sb.ToString().IndexOf('[');
|
||||
if (s == ']' && idx > 1)
|
||||
{ // I [Motor roaring] love you!
|
||||
string temp = sb.ToString(0, idx - 1).Trim();
|
||||
if (temp.Length > 0 && !char.IsLower(temp[temp.Length - 1]))
|
||||
lastWasBreak = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
lastWasBreak = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
StrippedText = sb.ToString();
|
||||
}
|
||||
|
||||
ReplaceNames2Fix(replaceIds, changeNameCases ? replaceNames : originalNames);
|
||||
}
|
||||
|
||||
public string CombineWithPrePost(string text)
|
||||
{
|
||||
return Pre + text + Post;
|
||||
}
|
||||
}
|
||||
}
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Core
|
||||
{
|
||||
public class StrippableText
|
||||
{
|
||||
public string Pre { get; set; }
|
||||
public string Post { get; set; }
|
||||
public string StrippedText { get; set; }
|
||||
public string OriginalText { get; private set; }
|
||||
|
||||
public string MergedString
|
||||
{
|
||||
get { return Pre + StrippedText + Post; }
|
||||
}
|
||||
|
||||
public StrippableText(string text)
|
||||
: this(text, " >-\"”“['‘`´¶(♪¿¡.…—", " -\"”“]'`´¶)♪.!?:…—")
|
||||
{
|
||||
}
|
||||
|
||||
public StrippableText(string text, string stripStartCharacters, string stripEndCharacters)
|
||||
{
|
||||
OriginalText = text;
|
||||
|
||||
Pre = string.Empty;
|
||||
if (text.Length > 0 && ("<{" + stripStartCharacters).Contains(text[0]))
|
||||
{
|
||||
int beginLength;
|
||||
do
|
||||
{
|
||||
beginLength = text.Length;
|
||||
|
||||
while (text.Length > 0 && stripStartCharacters.Contains(text[0]))
|
||||
{
|
||||
Pre += text[0];
|
||||
text = text.Remove(0, 1);
|
||||
}
|
||||
|
||||
// ASS/SSA codes like {\an9}
|
||||
int endIndex = text.IndexOf('}');
|
||||
if (endIndex > 0 && text.StartsWith("{\\", StringComparison.Ordinal))
|
||||
{
|
||||
int nextStartIndex = text.IndexOf('{', 2);
|
||||
if (nextStartIndex == -1 || nextStartIndex > endIndex)
|
||||
{
|
||||
endIndex++;
|
||||
Pre += text.Substring(0, endIndex);
|
||||
text = text.Remove(0, endIndex);
|
||||
}
|
||||
}
|
||||
|
||||
// tags like <i> or <font face="Segoe Print" color="#ff0000">
|
||||
endIndex = text.IndexOf('>');
|
||||
if (text.StartsWith('<') && endIndex >= 2)
|
||||
{
|
||||
endIndex++;
|
||||
Pre += text.Substring(0, endIndex);
|
||||
text = text.Remove(0, endIndex);
|
||||
}
|
||||
}
|
||||
while (text.Length < beginLength);
|
||||
}
|
||||
|
||||
Post = string.Empty;
|
||||
if (text.Length > 0 && (">" + stripEndCharacters).Contains(text[text.Length - 1]))
|
||||
{
|
||||
int beginLength;
|
||||
do
|
||||
{
|
||||
beginLength = text.Length;
|
||||
|
||||
while (text.Length > 0 && stripEndCharacters.Contains(text[text.Length - 1]))
|
||||
{
|
||||
Post = text[text.Length - 1] + Post;
|
||||
text = text.Substring(0, text.Length - 1);
|
||||
}
|
||||
|
||||
if (text.EndsWith('>'))
|
||||
{
|
||||
// tags </i> </b> </u>
|
||||
if (text.EndsWith("</i>", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.EndsWith("</b>", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.EndsWith("</u>", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Post = text.Substring(text.Length - 4) + Post;
|
||||
text = text.Substring(0, text.Length - 4);
|
||||
}
|
||||
|
||||
// tag </font>
|
||||
if (text.EndsWith("</font>", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Post = text.Substring(text.Length - 7) + Post;
|
||||
text = text.Substring(0, text.Length - 7);
|
||||
}
|
||||
}
|
||||
}
|
||||
while (text.Length < beginLength);
|
||||
}
|
||||
|
||||
StrippedText = text;
|
||||
}
|
||||
|
||||
private static string GetAndInsertNextId(List<string> replaceIds, List<string> replaceNames, string name, int idName)
|
||||
{
|
||||
string id = $"_@{idName}_";
|
||||
replaceIds.Add(id);
|
||||
replaceNames.Add(name);
|
||||
return id;
|
||||
}
|
||||
|
||||
private void ReplaceNames1Remove(List<string> namesEtc, List<string> replaceIds, List<string> replaceNames, List<string> originalNames)
|
||||
{
|
||||
if (Post.StartsWith('.'))
|
||||
{
|
||||
StrippedText += ".";
|
||||
Post = Post.Remove(0, 1);
|
||||
}
|
||||
|
||||
string lower = StrippedText.ToLower();
|
||||
int idName = 0;
|
||||
foreach (string name in namesEtc)
|
||||
{
|
||||
int start = lower.IndexOf(name, StringComparison.OrdinalIgnoreCase);
|
||||
while (start >= 0 && start < lower.Length)
|
||||
{
|
||||
bool startOk = (start == 0) || (lower[start - 1] == ' ') || (lower[start - 1] == '-') ||
|
||||
(lower[start - 1] == '"') || (lower[start - 1] == '\'') || (lower[start - 1] == '>') ||
|
||||
Environment.NewLine.EndsWith(lower[start - 1]);
|
||||
|
||||
if (startOk && string.CompareOrdinal(name, "Don") == 0 && lower.Substring(start).StartsWith("don't"))
|
||||
startOk = false;
|
||||
|
||||
if (startOk)
|
||||
{
|
||||
int end = start + name.Length;
|
||||
bool endOk = end <= lower.Length;
|
||||
if (endOk)
|
||||
endOk = end == lower.Length || (@" ,.!?:;')- <""" + Environment.NewLine).Contains(lower[end]);
|
||||
|
||||
if (endOk && StrippedText.Length >= start + name.Length)
|
||||
{
|
||||
string originalName = StrippedText.Substring(start, name.Length);
|
||||
originalNames.Add(originalName);
|
||||
StrippedText = StrippedText.Remove(start, name.Length);
|
||||
StrippedText = StrippedText.Insert(start, GetAndInsertNextId(replaceIds, replaceNames, name, idName++));
|
||||
lower = StrippedText.ToLower();
|
||||
}
|
||||
}
|
||||
if (start + 3 > lower.Length)
|
||||
start = lower.Length + 1;
|
||||
else
|
||||
start = lower.IndexOf(name, start + 3, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
}
|
||||
|
||||
if (StrippedText.EndsWith('.'))
|
||||
{
|
||||
Post = "." + Post;
|
||||
StrippedText = StrippedText.TrimEnd('.');
|
||||
}
|
||||
}
|
||||
|
||||
private void ReplaceNames2Fix(List<string> replaceIds, List<string> replaceNames)
|
||||
{
|
||||
for (int i = 0; i < replaceIds.Count; i++)
|
||||
{
|
||||
StrippedText = StrippedText.Replace(replaceIds[i], replaceNames[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly char[] ExpectedCharsArray = { '.', '!', '?', ':', ';', ')', ']', '}', '(', '[', '{' };
|
||||
public void FixCasing(List<string> namesEtc, bool changeNameCases, bool makeUppercaseAfterBreak, bool checkLastLine, string lastLine)
|
||||
{
|
||||
var replaceIds = new List<string>();
|
||||
var replaceNames = new List<string>();
|
||||
var originalNames = new List<string>();
|
||||
ReplaceNames1Remove(namesEtc, replaceIds, replaceNames, originalNames);
|
||||
|
||||
if (checkLastLine)
|
||||
{
|
||||
string s = HtmlUtil.RemoveHtmlTags(lastLine).TrimEnd().TrimEnd('\"').TrimEnd();
|
||||
|
||||
bool startWithUppercase = string.IsNullOrEmpty(s) ||
|
||||
s.EndsWith('.') ||
|
||||
s.EndsWith('!') ||
|
||||
s.EndsWith('?') ||
|
||||
s.EndsWith(". ♪", StringComparison.Ordinal) ||
|
||||
s.EndsWith("! ♪", StringComparison.Ordinal) ||
|
||||
s.EndsWith("? ♪", StringComparison.Ordinal) ||
|
||||
s.EndsWith(']') ||
|
||||
s.EndsWith(')') ||
|
||||
s.EndsWith(':');
|
||||
|
||||
// start with uppercase after music symbol - but only if next line does not start with music symbol
|
||||
if (!startWithUppercase && (s.EndsWith('♪') || s.EndsWith('♫')))
|
||||
{
|
||||
if (!Pre.Contains(new[] { '♪', '♫' }))
|
||||
startWithUppercase = true;
|
||||
}
|
||||
|
||||
if (startWithUppercase && StrippedText.Length > 0 && !Pre.Contains("..."))
|
||||
{
|
||||
StrippedText = char.ToUpper(StrippedText[0]) + StrippedText.Substring(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (makeUppercaseAfterBreak && StrippedText.Contains(ExpectedCharsArray))
|
||||
{
|
||||
const string breakAfterChars = @".!?:;)]}([{";
|
||||
const string expectedChars = "\"`´'()<>!?.- \r\n";
|
||||
var sb = new StringBuilder();
|
||||
bool lastWasBreak = false;
|
||||
for (int i = 0; i < StrippedText.Length; i++)
|
||||
{
|
||||
var s = StrippedText[i];
|
||||
if (lastWasBreak)
|
||||
{
|
||||
if (expectedChars.Contains(s))
|
||||
{
|
||||
sb.Append(s);
|
||||
}
|
||||
else if ((sb.EndsWith('<') || sb.ToString().EndsWith("</", StringComparison.Ordinal)) && i + 1 < StrippedText.Length && StrippedText[i + 1] == '>')
|
||||
{ // tags
|
||||
sb.Append(s);
|
||||
}
|
||||
else if (sb.EndsWith('<') && s == '/' && i + 2 < StrippedText.Length && StrippedText[i + 2] == '>')
|
||||
{ // tags
|
||||
sb.Append(s);
|
||||
}
|
||||
else if (sb.ToString().EndsWith("... ", StringComparison.Ordinal))
|
||||
{
|
||||
sb.Append(s);
|
||||
lastWasBreak = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (breakAfterChars.Contains(s))
|
||||
{
|
||||
sb.Append(s);
|
||||
}
|
||||
else
|
||||
{
|
||||
lastWasBreak = false;
|
||||
sb.Append(char.ToUpper(s));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.Append(s);
|
||||
if (breakAfterChars.Contains(s))
|
||||
{
|
||||
var idx = sb.ToString().IndexOf('[');
|
||||
if (s == ']' && idx > 1)
|
||||
{ // I [Motor roaring] love you!
|
||||
string temp = sb.ToString(0, idx - 1).Trim();
|
||||
if (temp.Length > 0 && !char.IsLower(temp[temp.Length - 1]))
|
||||
lastWasBreak = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
lastWasBreak = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
StrippedText = sb.ToString();
|
||||
}
|
||||
|
||||
ReplaceNames2Fix(replaceIds, changeNameCases ? replaceNames : originalNames);
|
||||
}
|
||||
|
||||
public string CombineWithPrePost(string text)
|
||||
{
|
||||
return Pre + text + Post;
|
||||
}
|
||||
}
|
||||
}
|
@ -110,14 +110,14 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
// first all to lower
|
||||
text = text.ToLower().Trim();
|
||||
text = text.FixExtraSpaces();
|
||||
var st = new StripableText(text);
|
||||
var st = new StrippableText(text);
|
||||
st.FixCasing(namesEtc, false, true, true, lastLine); // fix all casing but names (that's a seperate option)
|
||||
text = st.MergedString;
|
||||
}
|
||||
}
|
||||
else if (radioButtonUppercase.Checked)
|
||||
{
|
||||
var st = new StripableText(text);
|
||||
var st = new StrippableText(text);
|
||||
text = st.Pre + st.StrippedText.ToUpper() + st.Post;
|
||||
text = HtmlUtil.FixUpperTags(text); // tags inside text
|
||||
}
|
||||
|
@ -85,7 +85,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
{
|
||||
if (item.Checked && text != null && text.Contains(name, StringComparison.OrdinalIgnoreCase) && name.Length > 1 && name != name.ToLower())
|
||||
{
|
||||
var st = new StripableText(text);
|
||||
var st = new StrippableText(text);
|
||||
st.FixCasing(new List<string> { name }, true, false, false, string.Empty);
|
||||
text = st.MergedString;
|
||||
}
|
||||
|
@ -220,7 +220,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
if (merge && (p.Text.TrimEnd().EndsWith('!') || p.Text.TrimEnd().EndsWith('.')))
|
||||
{
|
||||
var st = new StripableText(p.Text);
|
||||
var st = new StrippableText(p.Text);
|
||||
if (st.StrippedText.Length > 0 && char.IsUpper(st.StrippedText[0]))
|
||||
merge = false;
|
||||
}
|
||||
|
@ -642,7 +642,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
lastLine.EndsWith('!') ||
|
||||
lastLine.EndsWith('?'))
|
||||
{
|
||||
var st = new StripableText(l);
|
||||
var st = new StrippableText(l);
|
||||
if (st.StrippedText.StartsWith('i') && !st.Pre.EndsWith('[') && !st.Pre.EndsWith('(') && !st.Pre.EndsWith("...", StringComparison.Ordinal))
|
||||
{
|
||||
if (string.IsNullOrEmpty(lastLine) || (!lastLine.EndsWith("...", StringComparison.Ordinal) && !EndsWithAbbreviation(lastLine, _abbreviationList)))
|
||||
@ -815,7 +815,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
lastLine.EndsWith('♪'))
|
||||
{
|
||||
lastLine = HtmlUtil.RemoveHtmlTags(lastLine);
|
||||
var st = new StripableText(input);
|
||||
var st = new StrippableText(input);
|
||||
if (lastLine == null || (!lastLine.EndsWith("...", StringComparison.Ordinal) && !EndsWithAbbreviation(lastLine, abbreviationList)))
|
||||
{
|
||||
if (st.StrippedText.Length > 0 && !char.IsUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.EndsWith('(') && !st.Pre.EndsWith("..."))
|
||||
|
@ -4,112 +4,112 @@ using Nikse.SubtitleEdit.Core;
|
||||
namespace Test.Logic
|
||||
{
|
||||
[TestClass]
|
||||
public class StripableTextTest
|
||||
public class StrippableTextTest
|
||||
{
|
||||
|
||||
[TestMethod]
|
||||
public void StripableTextItalic()
|
||||
public void StrippableTextItalic()
|
||||
{
|
||||
var st = new StripableText("<i>Hi!</i>");
|
||||
var st = new StrippableText("<i>Hi!</i>");
|
||||
Assert.AreEqual(st.Pre, "<i>");
|
||||
Assert.AreEqual(st.Post, "!</i>");
|
||||
Assert.AreEqual(st.StrippedText, "Hi");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void StripableTextAss()
|
||||
public void StrippableTextAss()
|
||||
{
|
||||
var st = new StripableText("{\\an9}Hi!");
|
||||
var st = new StrippableText("{\\an9}Hi!");
|
||||
Assert.AreEqual(st.Pre, "{\\an9}");
|
||||
Assert.AreEqual(st.Post, "!");
|
||||
Assert.AreEqual(st.StrippedText, "Hi");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void StripableTextFont()
|
||||
public void StrippableTextFont()
|
||||
{
|
||||
var st = new StripableText("<font color=\"red\">Hi!</font>");
|
||||
var st = new StrippableText("<font color=\"red\">Hi!</font>");
|
||||
Assert.AreEqual(st.Pre, "<font color=\"red\">");
|
||||
Assert.AreEqual(st.Post, "!</font>");
|
||||
Assert.AreEqual(st.StrippedText, "Hi");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void StripableTextItalic2()
|
||||
public void StrippableTextItalic2()
|
||||
{
|
||||
var st = new StripableText("<i>O</i>");
|
||||
var st = new StrippableText("<i>O</i>");
|
||||
Assert.AreEqual(st.Pre, "<i>");
|
||||
Assert.AreEqual(st.Post, "</i>");
|
||||
Assert.AreEqual(st.StrippedText, "O");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void StripableTextItalic3()
|
||||
public void StrippableTextItalic3()
|
||||
{
|
||||
var st = new StripableText("<i>Hi!");
|
||||
var st = new StrippableText("<i>Hi!");
|
||||
Assert.AreEqual(st.Pre, "<i>");
|
||||
Assert.AreEqual(st.Post, "!");
|
||||
Assert.AreEqual(st.StrippedText, "Hi");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void StripableTextFontDontTouch()
|
||||
public void StrippableTextFontDontTouch()
|
||||
{
|
||||
var st = new StripableText("{MAN} Hi, how are you today!");
|
||||
var st = new StrippableText("{MAN} Hi, how are you today!");
|
||||
Assert.AreEqual(st.Pre, "");
|
||||
Assert.AreEqual(st.Post, "!");
|
||||
Assert.AreEqual(st.StrippedText, "{MAN} Hi, how are you today");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void StripableOnlyPre()
|
||||
public void StrippableOnlyPre()
|
||||
{
|
||||
var st = new StripableText("(");
|
||||
var st = new StrippableText("(");
|
||||
Assert.AreEqual(st.Pre, "(");
|
||||
Assert.AreEqual(st.Post, "");
|
||||
Assert.AreEqual(st.StrippedText, "");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void StripableOnlyPre2()
|
||||
public void StrippableOnlyPre2()
|
||||
{
|
||||
var st = new StripableText("<");
|
||||
var st = new StrippableText("<");
|
||||
Assert.AreEqual(st.Pre, "");
|
||||
Assert.AreEqual(st.Post, "");
|
||||
Assert.AreEqual(st.StrippedText, "<");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void StripableOnlyPre3()
|
||||
public void StrippableOnlyPre3()
|
||||
{
|
||||
var st = new StripableText("<i>");
|
||||
var st = new StrippableText("<i>");
|
||||
Assert.AreEqual(st.Pre, "<i>");
|
||||
Assert.AreEqual(st.Post, "");
|
||||
Assert.AreEqual(st.StrippedText, "");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void StripableOnlyText()
|
||||
public void StrippableOnlyText()
|
||||
{
|
||||
var st = new StripableText("H");
|
||||
var st = new StrippableText("H");
|
||||
Assert.AreEqual(st.Pre, "");
|
||||
Assert.AreEqual(st.Post, "");
|
||||
Assert.AreEqual(st.StrippedText, "H");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void StripableTextItalicAndFont()
|
||||
public void StrippableTextItalicAndFont()
|
||||
{
|
||||
var st = new StripableText("<i><font color=\"red\">Hi!</font></i>");
|
||||
var st = new StrippableText("<i><font color=\"red\">Hi!</font></i>");
|
||||
Assert.AreEqual(st.Pre, "<i><font color=\"red\">");
|
||||
Assert.AreEqual(st.Post, "!</font></i>");
|
||||
Assert.AreEqual(st.StrippedText, "Hi");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void StripableTextItalicAndMore()
|
||||
public void StrippableTextItalicAndMore()
|
||||
{
|
||||
var st = new StripableText("<i>...<b>Hi!</b></i>");
|
||||
var st = new StrippableText("<i>...<b>Hi!</b></i>");
|
||||
Assert.AreEqual(st.Pre, "<i>...<b>");
|
||||
Assert.AreEqual(st.Post, "!</b></i>");
|
||||
Assert.AreEqual(st.StrippedText, "Hi");
|
@ -61,7 +61,7 @@
|
||||
<Compile Include="Logic\TarFileTest.cs" />
|
||||
<Compile Include="Logic\TransportStream\TransportStreamTest.cs" />
|
||||
<Compile Include="Logic\ParagraphTest.cs" />
|
||||
<Compile Include="Logic\StripableTextTest.cs" />
|
||||
<Compile Include="Logic\StrippableTextTest.cs" />
|
||||
<Compile Include="Logic\TimeCodeTest.cs" />
|
||||
<Compile Include="Logic\VideoFormats\MatroskaTest.cs" />
|
||||
<Compile Include="Logic\VobSub\VobSubTest.cs" />
|
||||
|
Loading…
x
Reference in New Issue
Block a user