From 2754f1cdcfef993c51b6e857e91ebd6a147b295d Mon Sep 17 00:00:00 2001 From: niksedk Date: Mon, 16 Nov 2015 18:12:29 +0100 Subject: [PATCH] Starting to move "System.Windows.Forms" out of LibSe --- libse/LibSE.csproj | 2 +- libse/RichTextToPlainText.cs | 222 ++++++++++++++++++++ libse/StringExtensions.cs | 27 +++ src/Forms/DoNotBreakAfterListEdit.cs | 1 + src/Forms/FindDialog.cs | 1 + src/Forms/ModifySelection.cs | 1 + src/Forms/MultipleReplace.cs | 1 + src/Forms/ReplaceDialog.cs | 1 + src/Logic/FindReplaceDialogHelper.cs | 292 +++++++++++++++++++++++++++ src/SubtitleEdit.csproj | 1 + 10 files changed, 548 insertions(+), 1 deletion(-) create mode 100644 libse/RichTextToPlainText.cs create mode 100644 src/Logic/FindReplaceDialogHelper.cs diff --git a/libse/LibSE.csproj b/libse/LibSE.csproj index ed66c1ed8..37d0762aa 100644 --- a/libse/LibSE.csproj +++ b/libse/LibSE.csproj @@ -126,7 +126,6 @@ - @@ -185,6 +184,7 @@ + diff --git a/libse/RichTextToPlainText.cs b/libse/RichTextToPlainText.cs new file mode 100644 index 000000000..6cfd22d0a --- /dev/null +++ b/libse/RichTextToPlainText.cs @@ -0,0 +1,222 @@ +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; + +namespace Nikse.SubtitleEdit.Core +{ + /// + /// Rich Text to plain text + /// + /// + /// Translated from Python located at: + /// http://stackoverflow.com/a/188877/448 + /// to C# by Chris Benard - http://chrisbenard.net/2014/08/20/Extract-Text-from-RTF-in-.Net + /// + public static class RichTextToPlainText + { + private class StackEntry + { + public int NumberOfCharactersToSkip { get; private set; } + public bool Ignorable { get; private set; } + + public StackEntry(int numberOfCharactersToSkip, bool ignorable) + { + NumberOfCharactersToSkip = numberOfCharactersToSkip; + Ignorable = ignorable; + } + } + + private static readonly Regex RtfRegex = new Regex(@"\\([a-z]{1,32})(-?\d{1,10})?[ ]?|\\'([0-9a-f]{2})|\\([^a-z])|([{}])|[\r\n]+|(.)", RegexOptions.Singleline | RegexOptions.IgnoreCase); + + private static readonly List Destinations = new List + { + "aftncn","aftnsep","aftnsepc","annotation","atnauthor","atndate","atnicn","atnid", + "atnparent","atnref","atntime","atrfend","atrfstart","author","background", + "bkmkend","bkmkstart","blipuid","buptim","category","colorschememapping", + "colortbl","comment","company","creatim","datafield","datastore","defchp","defpap", + "do","doccomm","docvar","dptxbxtext","ebcend","ebcstart","factoidname","falt", + "fchars","ffdeftext","ffentrymcr","ffexitmcr","ffformat","ffhelptext","ffl", + "ffname","ffstattext","field","file","filetbl","fldinst","fldrslt","fldtype", + "fname","fontemb","fontfile","fonttbl","footer","footerf","footerl","footerr", + "footnote","formfield","ftncn","ftnsep","ftnsepc","g","generator","gridtbl", + "header","headerf","headerl","headerr","hl","hlfr","hlinkbase","hlloc","hlsrc", + "hsv","htmltag","info","keycode","keywords","latentstyles","lchars","levelnumbers", + "leveltext","lfolevel","linkval","list","listlevel","listname","listoverride", + "listoverridetable","listpicture","liststylename","listtable","listtext", + "lsdlockedexcept","macc","maccPr","mailmerge","maln","malnScr","manager","margPr", + "mbar","mbarPr","mbaseJc","mbegChr","mborderBox","mborderBoxPr","mbox","mboxPr", + "mchr","mcount","mctrlPr","md","mdeg","mdegHide","mden","mdiff","mdPr","me", + "mendChr","meqArr","meqArrPr","mf","mfName","mfPr","mfunc","mfuncPr","mgroupChr", + "mgroupChrPr","mgrow","mhideBot","mhideLeft","mhideRight","mhideTop","mhtmltag", + "mlim","mlimloc","mlimlow","mlimlowPr","mlimupp","mlimuppPr","mm","mmaddfieldname", + "mmath","mmathPict","mmathPr","mmaxdist","mmc","mmcJc","mmconnectstr", + "mmconnectstrdata","mmcPr","mmcs","mmdatasource","mmheadersource","mmmailsubject", + "mmodso","mmodsofilter","mmodsofldmpdata","mmodsomappedname","mmodsoname", + "mmodsorecipdata","mmodsosort","mmodsosrc","mmodsotable","mmodsoudl", + "mmodsoudldata","mmodsouniquetag","mmPr","mmquery","mmr","mnary","mnaryPr", + "mnoBreak","mnum","mobjDist","moMath","moMathPara","moMathParaPr","mopEmu", + "mphant","mphantPr","mplcHide","mpos","mr","mrad","mradPr","mrPr","msepChr", + "mshow","mshp","msPre","msPrePr","msSub","msSubPr","msSubSup","msSubSupPr","msSup", + "msSupPr","mstrikeBLTR","mstrikeH","mstrikeTLBR","mstrikeV","msub","msubHide", + "msup","msupHide","mtransp","mtype","mvertJc","mvfmf","mvfml","mvtof","mvtol", + "mzeroAsc","mzeroDesc","mzeroWid","nesttableprops","nextfile","nonesttables", + "objalias","objclass","objdata","object","objname","objsect","objtime","oldcprops", + "oldpprops","oldsprops","oldtprops","oleclsid","operator","panose","password", + "passwordhash","pgp","pgptbl","picprop","pict","pn","pnseclvl","pntext","pntxta", + "pntxtb","printim","private","propname","protend","protstart","protusertbl","pxe", + "result","revtbl","revtim","rsidtbl","rxe","shp","shpgrp","shpinst", + "shppict","shprslt","shptxt","sn","sp","staticval","stylesheet","subject","sv", + "svb","tc","template","themedata","title","txe","ud","upr","userprops", + "wgrffmtfilter","windowcaption","writereservation","writereservhash","xe","xform", + "xmlattrname","xmlattrvalue","xmlclose","xmlname","xmlnstbl", + "xmlopen" + }; + + private static readonly Dictionary SpecialCharacters = new Dictionary + { + { "par", "\n" }, + { "sect", "\n\n" }, + { "page", "\n\n" }, + { "line", "\n" }, + { "tab", "\t" }, + { "emdash", "\u2014" }, + { "endash", "\u2013" }, + { "emspace", "\u2003" }, + { "enspace", "\u2002" }, + { "qmspace", "\u2005" }, + { "bullet", "\u2022" }, + { "lquote", "\u2018" }, + { "rquote", "\u2019" }, + { "ldblquote", "\u201C" }, + { "rdblquote", "\u201D" }, + }; + + /// + /// Strip RTF Tags from RTF Text + /// + /// RTF formatted text + /// Plain text from RTF + public static string StripRichTextFormat(string inputRtf) + { + if (inputRtf == null) + { + return null; + } + + var stack = new Stack(); + bool ignorable = false; // Whether this group (and all inside it) are "ignorable". + int ucskip = 1; // Number of ASCII characters to skip after a unicode character. + int curskip = 0; // Number of ASCII characters left to skip + var outList = new List(); // Output buffer. + + MatchCollection matches = RtfRegex.Matches(inputRtf); + + if (matches.Count > 0) + { + foreach (Match match in matches) + { + string word = match.Groups[1].Value; + string arg = match.Groups[2].Value; + string hex = match.Groups[3].Value; + string character = match.Groups[4].Value; + string brace = match.Groups[5].Value; + string tchar = match.Groups[6].Value; + + if (!String.IsNullOrEmpty(brace)) + { + curskip = 0; + if (brace == "{") + { + // Push state + stack.Push(new StackEntry(ucskip, ignorable)); + } + else if (brace == "}") + { + // Pop state + StackEntry entry = stack.Pop(); + ucskip = entry.NumberOfCharactersToSkip; + ignorable = entry.Ignorable; + } + } + else if (!String.IsNullOrEmpty(character)) // \x (not a letter) + { + curskip = 0; + if (character == "~") + { + if (!ignorable) + { + outList.Add("\xA0"); + } + } + else if ("{}\\".Contains(character)) + { + if (!ignorable) + { + outList.Add(character); + } + } + else if (character == "*") + { + ignorable = true; + } + } + else if (!String.IsNullOrEmpty(word)) // \foo + { + curskip = 0; + if (Destinations.Contains(word)) + { + ignorable = true; + } + else if (ignorable) + { + } + else if (SpecialCharacters.ContainsKey(word)) + { + outList.Add(SpecialCharacters[word]); + } + else if (word == "uc") + { + ucskip = Int32.Parse(arg); + } + else if (word == "u") + { + int c = Int32.Parse(arg); + if (c < 0) + { + c += 0x10000; + } + outList.Add(Char.ConvertFromUtf32(c)); + curskip = ucskip; + } + } + else if (!String.IsNullOrEmpty(hex)) // \'xx + { + if (curskip > 0) + { + curskip -= 1; + } + else if (!ignorable) + { + int c = Int32.Parse(hex, System.Globalization.NumberStyles.HexNumber); + outList.Add(Char.ConvertFromUtf32(c)); + } + } + else if (!String.IsNullOrEmpty(tchar)) + { + if (curskip > 0) + { + curskip -= 1; + } + else if (!ignorable) + { + outList.Add(tchar); + } + } + } + } + return String.Join(String.Empty, outList.ToArray()); + } + + } + +} diff --git a/libse/StringExtensions.cs b/libse/StringExtensions.cs index f45067c8d..ab9a2f95c 100644 --- a/libse/StringExtensions.cs +++ b/libse/StringExtensions.cs @@ -198,5 +198,32 @@ namespace Nikse.SubtitleEdit.Core return s; } + public static string ToRtf(this string value) + { + // special RTF chars + var backslashed = new StringBuilder(value); + backslashed.Replace(@"\", @"\\"); + backslashed.Replace(@"{", @"\{"); + backslashed.Replace(@"}", @"\}"); + backslashed.Replace(Environment.NewLine, @"\par" + Environment.NewLine); + + // convert string char by char + var sb = new StringBuilder(); + foreach (char character in backslashed.ToString()) + { + if (character <= 0x7f) + sb.Append(character); + else + sb.Append("\\u" + Convert.ToUInt32(character) + "?"); + } + + return @"{\rtf1\ansi\ansicpg1252\deff0{\fonttbl\f0\fswiss Helvetica;}\f0\pard " + sb + @"\par" + Environment.NewLine + "}"; + } + + public static string FromRtf(this string value) + { + return RichTextToPlainText.StripRichTextFormat(value); + } + } } \ No newline at end of file diff --git a/src/Forms/DoNotBreakAfterListEdit.cs b/src/Forms/DoNotBreakAfterListEdit.cs index 7cb4d8737..52b7b7538 100644 --- a/src/Forms/DoNotBreakAfterListEdit.cs +++ b/src/Forms/DoNotBreakAfterListEdit.cs @@ -1,4 +1,5 @@ using Nikse.SubtitleEdit.Core; +using Nikse.SubtitleEdit.Logic; using System; using System.Collections.Generic; using System.Globalization; diff --git a/src/Forms/FindDialog.cs b/src/Forms/FindDialog.cs index 8dc382c90..01a49789f 100644 --- a/src/Forms/FindDialog.cs +++ b/src/Forms/FindDialog.cs @@ -4,6 +4,7 @@ using System; using System.Drawing; using System.Text.RegularExpressions; using System.Windows.Forms; +using Nikse.SubtitleEdit.Logic; namespace Nikse.SubtitleEdit.Forms { diff --git a/src/Forms/ModifySelection.cs b/src/Forms/ModifySelection.cs index 86108cb17..4fa3227be 100644 --- a/src/Forms/ModifySelection.cs +++ b/src/Forms/ModifySelection.cs @@ -1,5 +1,6 @@ using Nikse.SubtitleEdit.Controls; using Nikse.SubtitleEdit.Core; +using Nikse.SubtitleEdit.Logic; using System; using System.Text.RegularExpressions; using System.Windows.Forms; diff --git a/src/Forms/MultipleReplace.cs b/src/Forms/MultipleReplace.cs index 41133e708..8761802f3 100644 --- a/src/Forms/MultipleReplace.cs +++ b/src/Forms/MultipleReplace.cs @@ -1,4 +1,5 @@ using Nikse.SubtitleEdit.Core; +using Nikse.SubtitleEdit.Logic; using System; using System.Collections.Generic; using System.Globalization; diff --git a/src/Forms/ReplaceDialog.cs b/src/Forms/ReplaceDialog.cs index 478facd5d..5815f54a8 100644 --- a/src/Forms/ReplaceDialog.cs +++ b/src/Forms/ReplaceDialog.cs @@ -4,6 +4,7 @@ using System.Drawing; using System.Text.RegularExpressions; using System.Windows.Forms; using Nikse.SubtitleEdit.Core.Enums; +using Nikse.SubtitleEdit.Logic; namespace Nikse.SubtitleEdit.Forms { diff --git a/src/Logic/FindReplaceDialogHelper.cs b/src/Logic/FindReplaceDialogHelper.cs new file mode 100644 index 000000000..7e2082b00 --- /dev/null +++ b/src/Logic/FindReplaceDialogHelper.cs @@ -0,0 +1,292 @@ +using System; +using System.Text.RegularExpressions; +using System.Windows.Forms; +using Nikse.SubtitleEdit.Core; +using Nikse.SubtitleEdit.Core.Enums; + +namespace Nikse.SubtitleEdit.Logic +{ + public class FindReplaceDialogHelper + { + private const string StartChars = " >-\"”“['‘`´¶(♪¿¡.…—"; + private const string EndChars = " -\"”“]'`´¶)♪.!?:…—\r\n"; + private readonly string _findText = string.Empty; + private readonly string _replaceText = string.Empty; + private Regex _regEx; + private int _findTextLenght; + + public bool Success { get; set; } + public FindType FindType { get; set; } + public int SelectedIndex { get; set; } + public int SelectedPosition { get; set; } + public int WindowPositionLeft { get; set; } + public int WindowPositionTop { get; set; } + public int StartLineIndex { get; set; } + public bool MatchInOriginal { get; set; } + + public int FindTextLength + { + get + { + return _findTextLenght; + } + } + + public string FindText + { + get + { + return _findText; + } + } + + public string ReplaceText + { + get + { + return _replaceText; + } + } + + public FindReplaceDialogHelper(FindType findType, string findText, Regex regEx, string replaceText, int left, int top, int startLineIndex) + { + FindType = findType; + _findText = findText; + _replaceText = replaceText; + _regEx = regEx; + _findTextLenght = findText.Length; + WindowPositionLeft = left; + WindowPositionTop = top; + StartLineIndex = startLineIndex; + } + + public bool Find(Subtitle subtitle, Subtitle originalSubtitle, int startIndex) + { + return FindNext(subtitle, originalSubtitle, startIndex, 0, Configuration.Settings.General.AllowEditOfOriginalSubtitle); + } + + public bool Find(TextBox textBox, int startIndex) + { + return FindNext(textBox.Text, startIndex); + } + + private int FindPositionInText(string text, int startIndex) + { + if (startIndex >= text.Length && !(FindType == FindType.RegEx && startIndex == 0)) + return -1; + + switch (FindType) + { + case FindType.Normal: + return (text.IndexOf(_findText, startIndex, System.StringComparison.OrdinalIgnoreCase)); + case FindType.CaseSensitive: + return (text.IndexOf(_findText, startIndex, System.StringComparison.Ordinal)); + case FindType.RegEx: + { + Match match = _regEx.Match(text, startIndex); + if (match.Success) + { + string groupName = Utilities.GetRegExGroup(_findText); + if (groupName != null && match.Groups[groupName] != null && match.Groups[groupName].Success) + { + _findTextLenght = match.Groups[groupName].Length; + return match.Groups[groupName].Index; + } + _findTextLenght = match.Length; + return match.Index; + } + return -1; + } + } + return -1; + } + + public bool FindNext(Subtitle subtitle, Subtitle originalSubtitle, int startIndex, int position, bool allowEditOfOriginalSubtitle) + { + Success = false; + int index = 0; + if (position < 0) + position = 0; + foreach (Paragraph p in subtitle.Paragraphs) + { + if (index >= startIndex) + { + int pos = 0; + if (!MatchInOriginal) + { + pos = FindPositionInText(p.Text, position); + if (pos >= 0) + { + MatchInOriginal = false; + SelectedIndex = index; + SelectedPosition = pos; + Success = true; + return true; + } + position = 0; + } + MatchInOriginal = false; + + if (originalSubtitle != null && allowEditOfOriginalSubtitle) + { + Paragraph o = Utilities.GetOriginalParagraph(index, p, originalSubtitle.Paragraphs); + if (o != null) + { + pos = FindPositionInText(o.Text, position); + if (pos >= 0) + { + MatchInOriginal = true; + SelectedIndex = index; + SelectedPosition = pos; + Success = true; + return true; + } + } + } + } + index++; + } + return false; + } + + public static ContextMenu GetRegExContextMenu(TextBox textBox) + { + var cm = new ContextMenu(); + var l = Configuration.Settings.Language.RegularExpressionContextMenu; + cm.MenuItems.Add(l.WordBoundary, delegate { textBox.SelectedText = "\\b"; }); + cm.MenuItems.Add(l.NonWordBoundary, delegate { textBox.SelectedText = "\\B"; }); + cm.MenuItems.Add(l.NewLine, delegate { textBox.SelectedText = "\\r\\n"; }); + cm.MenuItems.Add(l.AnyDigit, delegate { textBox.SelectedText = "\\d"; }); + cm.MenuItems.Add(l.NonDigit, delegate { textBox.SelectedText = "\\D"; }); + cm.MenuItems.Add(l.AnyCharacter, delegate { textBox.SelectedText = "."; }); + cm.MenuItems.Add(l.AnyWhitespace, delegate { textBox.SelectedText = "\\s"; }); + cm.MenuItems.Add(l.NonSpaceCharacter, delegate { textBox.SelectedText = "\\S"; }); + cm.MenuItems.Add(l.ZeroOrMore, delegate { textBox.SelectedText = "*"; }); + cm.MenuItems.Add(l.OneOrMore, delegate { textBox.SelectedText = "+"; }); + cm.MenuItems.Add(l.InCharacterGroup, delegate { textBox.SelectedText = "[test]"; }); + cm.MenuItems.Add(l.NotInCharacterGroup, delegate { textBox.SelectedText = "[^test]"; }); + return cm; + } + + public static ContextMenu GetRegExContextMenu(ComboBox comboBox) + { + var cm = new ContextMenu(); + var l = Configuration.Settings.Language.RegularExpressionContextMenu; + cm.MenuItems.Add(l.WordBoundary, delegate { comboBox.SelectedText = "\\b"; }); + cm.MenuItems.Add(l.NonWordBoundary, delegate { comboBox.SelectedText = "\\B"; }); + cm.MenuItems.Add(l.NewLine, delegate { comboBox.SelectedText = "\\r\\n"; }); + cm.MenuItems.Add(l.AnyDigit, delegate { comboBox.SelectedText = "\\d"; }); + cm.MenuItems.Add(l.NonDigit, delegate { comboBox.SelectedText = "\\D"; }); + cm.MenuItems.Add(l.AnyCharacter, delegate { comboBox.SelectedText = "."; }); + cm.MenuItems.Add(l.AnyWhitespace, delegate { comboBox.SelectedText = "\\s"; }); + cm.MenuItems.Add(l.NonSpaceCharacter, delegate { comboBox.SelectedText = "\\S"; }); + cm.MenuItems.Add(l.ZeroOrMore, delegate { comboBox.SelectedText = "*"; }); + cm.MenuItems.Add(l.OneOrMore, delegate { comboBox.SelectedText = "+"; }); + cm.MenuItems.Add(l.InCharacterGroup, delegate { comboBox.SelectedText = "[test]"; }); + cm.MenuItems.Add(l.NotInCharacterGroup, delegate { comboBox.SelectedText = "[^test]"; }); + return cm; + } + + public static ContextMenu GetReplaceTextContextMenu(TextBox textBox) + { + var cm = new ContextMenu(); + cm.MenuItems.Add(Configuration.Settings.Language.RegularExpressionContextMenu.NewLineShort, delegate { textBox.SelectedText = "\\n"; }); + return cm; + } + + public bool FindNext(string text, int startIndex) + { + Success = false; + startIndex++; + if (startIndex < text.Length) + { + if (FindType == FindType.RegEx) + { + Match match = _regEx.Match(text, startIndex); + if (match.Success) + { + string groupName = Utilities.GetRegExGroup(_findText); + if (groupName != null && match.Groups[groupName] != null && match.Groups[groupName].Success) + { + _findTextLenght = match.Groups[groupName].Length; + SelectedIndex = match.Groups[groupName].Index; + } + else + { + _findTextLenght = match.Length; + SelectedIndex = match.Index; + } + Success = true; + } + return match.Success; + } + string searchText = text.Substring(startIndex); + int pos = FindPositionInText(searchText, 0); + if (pos >= 0) + { + SelectedIndex = pos + startIndex; + return true; + } + } + return false; + } + + public int FindCount(Subtitle subtitle, bool wholeWord) + { + var count = 0; + // validate pattern if find type is regex + if (FindType == FindType.RegEx) + { + if (!Utilities.IsValidRegex(FindText)) + { + MessageBox.Show(Configuration.Settings.Language.General.RegularExpressionIsNotValid); + return count; + } + _regEx = new Regex(_findText); + } + + // count matches + foreach (var p in subtitle.Paragraphs) + { + if (p.Text.Length < FindText.Length) + continue; + + switch (FindType) + { + case FindType.Normal: + count += GetWordCount(p.Text, _findText, wholeWord, StringComparison.OrdinalIgnoreCase); + break; + case FindType.CaseSensitive: + count += GetWordCount(p.Text, _findText, wholeWord, StringComparison.Ordinal); + break; + case FindType.RegEx: + count += _regEx.Matches(p.Text).Count; + break; + } + } + return count; + } + + private int GetWordCount(string text, string pattern, bool matchWholeWord, StringComparison comparison) + { + var idx = text.IndexOf(pattern, comparison); + var count = 0; + while (idx >= 0) + { + if (matchWholeWord) + { + var startOk = (idx == 0) || (StartChars.Contains(text[idx - 1])); + var endOk = (idx + pattern.Length == text.Length) || (EndChars.Contains(text[idx + pattern.Length])); + if (startOk && endOk) + count++; + } + else + { + count++; + } + idx = text.IndexOf(pattern, idx + pattern.Length, comparison); + } + return count; + } + } +} \ No newline at end of file diff --git a/src/SubtitleEdit.csproj b/src/SubtitleEdit.csproj index 5ef3a5ece..980d7305f 100644 --- a/src/SubtitleEdit.csproj +++ b/src/SubtitleEdit.csproj @@ -805,6 +805,7 @@ Code +