using System; using System.Collections.Generic; using System.IO; using System.Text; using System.Text.RegularExpressions; using System.Xml; namespace Nikse.SubtitleEdit.Core.Dictionaries { public class OcrFixReplaceList { private static readonly Regex RegExQuestion = new Regex(@"\S\?[A-ZÆØÅÄÖÉÈÀÙÂÊÎÔÛËÏa-zæøåäöéèàùâêîôûëï]", RegexOptions.Compiled); private static readonly Regex RegExIandZero = new Regex(@"[a-zæøåöääöéèàùâêîôûëï][I1]", RegexOptions.Compiled); private static readonly Regex RegExTime1 = new Regex(@"[a-zæøåöääöéèàùâêîôûëï]0", RegexOptions.Compiled); private static readonly Regex RegExTime2 = new Regex(@"0[a-zæøåöääöéèàùâêîôûëï]", RegexOptions.Compiled); private static readonly Regex HexNumber = new Regex(@"^#?[\dABDEFabcdef]+$", RegexOptions.Compiled); private static readonly Regex StartEndEndsWithNumber = new Regex(@"^\d+.+\d$", RegexOptions.Compiled); public readonly Dictionary WordReplaceList; public readonly Dictionary PartialLineWordBoundaryReplaceList; private readonly Dictionary _partialLineAlwaysReplaceList; private readonly Dictionary _beginLineReplaceList; private readonly Dictionary _endLineReplaceList; private readonly Dictionary _wholeLineReplaceList; private readonly Dictionary _partialWordReplaceListAlways; private readonly Dictionary _partialWordReplaceList; private readonly Dictionary _regExList; private readonly string _replaceListXmlFileName; public OcrFixReplaceList(string replaceListXmlFileName) { _replaceListXmlFileName = replaceListXmlFileName; WordReplaceList = new Dictionary(); PartialLineWordBoundaryReplaceList = new Dictionary(); _partialLineAlwaysReplaceList = new Dictionary(); _beginLineReplaceList = new Dictionary(); _endLineReplaceList = new Dictionary(); _wholeLineReplaceList = new Dictionary(); _partialWordReplaceListAlways = new Dictionary(); _partialWordReplaceList = new Dictionary(); _regExList = new Dictionary(); var doc = LoadXmlReplaceListDocument(); var userDoc = LoadXmlReplaceListUserDocument(); WordReplaceList = LoadReplaceList(doc, "WholeWords"); _partialWordReplaceListAlways = LoadReplaceList(doc, "PartialWordsAlways"); _partialWordReplaceList = LoadReplaceList(doc, "PartialWords"); PartialLineWordBoundaryReplaceList = LoadReplaceList(doc, "PartialLines"); _partialLineAlwaysReplaceList = LoadReplaceList(doc, "PartialAlwaysLines"); _beginLineReplaceList = LoadReplaceList(doc, "BeginLines"); _endLineReplaceList = LoadReplaceList(doc, "EndLines"); _wholeLineReplaceList = LoadReplaceList(doc, "WholeLines"); _regExList = LoadRegExList(doc, "RegularExpressions"); foreach (var kp in LoadReplaceList(userDoc, "RemovedWholeWords")) { if (WordReplaceList.ContainsKey(kp.Key)) WordReplaceList.Remove(kp.Key); } foreach (var kp in LoadReplaceList(userDoc, "WholeWords")) { if (!WordReplaceList.ContainsKey(kp.Key)) WordReplaceList.Add(kp.Key, kp.Value); } foreach (var kp in LoadReplaceList(userDoc, "RemovedPartialLines")) { if (PartialLineWordBoundaryReplaceList.ContainsKey(kp.Key)) PartialLineWordBoundaryReplaceList.Remove(kp.Key); } foreach (var kp in LoadReplaceList(userDoc, "PartialLines")) { if (!PartialLineWordBoundaryReplaceList.ContainsKey(kp.Key)) PartialLineWordBoundaryReplaceList.Add(kp.Key, kp.Value); } } public static OcrFixReplaceList FromLanguageId(string languageId) { return new OcrFixReplaceList(Configuration.DictionariesFolder + languageId + "_OCRFixReplaceList.xml"); } private static Dictionary LoadReplaceList(XmlDocument doc, string name) { var list = new Dictionary(); if (!IsValidXmlDocument(doc, name)) return list; foreach (XmlNode item in doc.DocumentElement.SelectSingleNode(name).ChildNodes) { if (HasValidAttributes(item, false)) { string to = item.Attributes["to"].Value; string from = item.Attributes["from"].Value; if (!list.ContainsKey(from)) list.Add(from, to); } } return list; } private static Dictionary LoadRegExList(XmlDocument doc, string name) { var list = new Dictionary(); if (!IsValidXmlDocument(doc, name)) return list; foreach (XmlNode item in doc.DocumentElement.SelectSingleNode(name).ChildNodes) { if (HasValidAttributes(item, true)) { string to = item.Attributes["replaceWith"].Value; string from = item.Attributes["find"].Value; if (!list.ContainsKey(from)) list.Add(from, to); } } return list; } private static bool IsValidXmlDocument(XmlDocument doc, string elementName) { if (doc.DocumentElement == null || doc.DocumentElement.SelectSingleNode(elementName) == null) return false; return true; } private static bool HasValidAttributes(XmlNode node, bool isRegex) { if (node == null || node.Attributes == null) return false; if (isRegex) { if (node.Attributes["find"] != null && node.Attributes["replaceWith"] != null) { return Utilities.IsValidRegex(node.Attributes["find"].Value); } } else { if (node.Attributes["from"] != null && node.Attributes["to"] != null) { return (node.Attributes["from"].Value != node.Attributes["to"].Value); } } return false; } public string FixOcrErrorViaLineReplaceList(string input) { // Whole fromLine foreach (string from in _wholeLineReplaceList.Keys) { if (input == from) return _wholeLineReplaceList[from]; } string newText = input; string pre = string.Empty; if (newText.StartsWith("", StringComparison.Ordinal)) { pre += ""; newText = newText.Remove(0, 3); } while (newText.Length > 1 && @" -""['¶(".Contains(newText[0])) { pre += newText[0]; newText = newText.Substring(1); } if (newText.StartsWith("", StringComparison.Ordinal)) { pre += ""; newText = newText.Remove(0, 3); } // begin fromLine var lines = newText.SplitToLines(); var sb = new StringBuilder(); foreach (string l in lines) { string s = l; foreach (string from in _beginLineReplaceList.Keys) { if (s.Contains(from)) { if (s.StartsWith(from)) s = s.Remove(0, from.Length).Insert(0, _beginLineReplaceList[from]); if (s.Contains(". " + from)) s = s.Replace(". " + from, ". " + _beginLineReplaceList[from]); if (s.Contains("! " + from)) s = s.Replace("! " + from, "! " + _beginLineReplaceList[from]); if (s.Contains("? " + from)) s = s.Replace("? " + from, "? " + _beginLineReplaceList[from]); if (s.Contains("." + Environment.NewLine + from)) s = s.Replace(". " + Environment.NewLine + from, ". " + Environment.NewLine + _beginLineReplaceList[from]); if (s.Contains("! " + Environment.NewLine + from)) s = s.Replace("! " + Environment.NewLine + from, "! " + Environment.NewLine + _beginLineReplaceList[from]); if (s.Contains("? " + Environment.NewLine + from)) s = s.Replace("? " + Environment.NewLine + from, "? " + Environment.NewLine + _beginLineReplaceList[from]); if (s.StartsWith('"') && !from.StartsWith('"') && s.StartsWith("\"" + from)) s = s.Replace("\"" + from, "\"" + _beginLineReplaceList[from]); } } sb.AppendLine(s); } newText = pre + sb.ToString().TrimEnd('\r', '\n'); string post = string.Empty; if (newText.EndsWith("", StringComparison.Ordinal)) { newText = newText.Remove(newText.Length - 4, 4); post = ""; } foreach (string from in _endLineReplaceList.Keys) { if (newText.EndsWith(from, StringComparison.Ordinal)) { int position = (newText.Length - from.Length); newText = newText.Remove(position).Insert(position, _endLineReplaceList[from]); } } newText += post; foreach (string from in PartialLineWordBoundaryReplaceList.Keys) { if (newText.FastIndexOf(from) >= 0) newText = ReplaceWord(newText, from, PartialLineWordBoundaryReplaceList[from]); } foreach (string from in _partialLineAlwaysReplaceList.Keys) { if (newText.FastIndexOf(from) >= 0) newText = newText.Replace(from, _partialLineAlwaysReplaceList[from]); } foreach (string findWhat in _regExList.Keys) { newText = Regex.Replace(newText, findWhat, _regExList[findWhat], RegexOptions.Multiline); } return newText; } private static string AddToGuessList(List list, string word, int index, string letter, string replaceLetters) { if (string.IsNullOrEmpty(word) || index < 0 || index + letter.Length - 1 >= word.Length) return word; string s = word.Remove(index, letter.Length); if (index >= s.Length) s += replaceLetters; else s = s.Insert(index, replaceLetters); if (!list.Contains(s)) list.Add(s); return s; } public IEnumerable CreateGuessesFromLetters(string word) { var list = new List(); foreach (string letter in _partialWordReplaceList.Keys) { string s = word; int i = 0; while (s.Contains(letter) && i < 10) { int index = s.FastIndexOf(letter); s = AddToGuessList(list, s, index, letter, _partialWordReplaceList[letter]); AddToGuessList(list, word, index, letter, _partialWordReplaceList[letter]); i++; } s = word; i = 0; while (s.Contains(letter) && i < 10) { int index = s.LastIndexOf(letter, StringComparison.Ordinal); s = AddToGuessList(list, s, index, letter, _partialWordReplaceList[letter]); AddToGuessList(list, word, index, letter, _partialWordReplaceList[letter]); i++; } } return list; } public string FixCommonWordErrors(string word) { if (Configuration.Settings.Tools.OcrFixUseHardcodedRules) { word = word.Replace("fi", "fi"); word = word.Replace('ν', 'v'); // NOTE: first 'v' is a special unicode character!!!! if (word.Contains('’')) word = word.Replace('’', '\''); if (word.Contains('`')) word = word.Replace('`', '\''); if (word.Contains('‘')) word = word.Replace('‘', '\''); if (word.Contains('—')) word = word.Replace('—', '-'); while (word.Contains("--")) word = word.Replace("--", "-"); if (word.Contains('|')) word = word.Replace('|', 'l'); if (word.Contains("vx/")) word = word.Replace("vx/", "w"); if (word.Contains('¤')) { if (Regex.IsMatch(word, "[A-ZÆØÅÄÖÉÈÀÙÂÊÎÔÛËÏa-zæøåäöéèàùâêîôûëï]¤")) word = word.Replace('¤', 'o'); } } //always replace list foreach (string letter in _partialWordReplaceListAlways.Keys) word = word.Replace(letter, _partialWordReplaceListAlways[letter]); string pre = string.Empty; string post = string.Empty; if (word.StartsWith("", StringComparison.Ordinal)) { pre += ""; word = word.Remove(0, 3); } while (word.Length > 2 && word.StartsWith(Environment.NewLine, StringComparison.Ordinal)) { pre += Environment.NewLine; word = word.Substring(2); } while (word.Length > 1 && word[0] == '-') { pre += "-"; word = word.Substring(1); } while (word.Length > 1 && word[0] == '.') { pre += "."; word = word.Substring(1); } while (word.Length > 1 && word[0] == '"') { pre += "\""; word = word.Substring(1); } if (word.Length > 1 && word[0] == '(') { pre += "("; word = word.Substring(1); } if (word.StartsWith("", StringComparison.Ordinal)) { pre += ""; word = word.Remove(0, 3); } while (word.Length > 2 && word.EndsWith(Environment.NewLine)) { post += Environment.NewLine; word = word.Substring(0, word.Length - 2); } while (word.Length > 1 && word.EndsWith('"')) { post = post + "\""; word = word.Substring(0, word.Length - 1); } while (word.Length > 1 && word.EndsWith('.')) { post = post + "."; word = word.Substring(0, word.Length - 1); } while (word.EndsWith(',') && word.Length > 1) { post = post + ","; word = word.Substring(0, word.Length - 1); } while (word.EndsWith('?') && word.Length > 1) { post = post + "?"; word = word.Substring(0, word.Length - 1); } while (word.EndsWith('!') && word.Length > 1) { post = post + "!"; word = word.Substring(0, word.Length - 1); } while (word.EndsWith(')') && word.Length > 1) { post = post + ")"; word = word.Substring(0, word.Length - 1); } if (word.EndsWith("", StringComparison.Ordinal)) { post = post + ""; word = word.Remove(word.Length - 4, 4); } string preWordPost = pre + word + post; if (word.Length == 0) return preWordPost; if (word.Contains('?')) { var match = RegExQuestion.Match(word); if (match.Success) word = word.Insert(match.Index + 2, " "); } foreach (string from in WordReplaceList.Keys) { if (word.Length == from.Length) { if (word == from) return pre + WordReplaceList[from] + post; } else if (word.Length + post.Length == from.Length) { if (string.CompareOrdinal(word + post, from) == 0) return pre + WordReplaceList[from]; } if (pre.Length + word.Length + post.Length == from.Length && string.CompareOrdinal(preWordPost, from) == 0) { return WordReplaceList[from]; } } if (Configuration.Settings.Tools.OcrFixUseHardcodedRules) { // uppercase I or 1 inside lowercase fromWord (will be replaced by lowercase L) word = FixIor1InsideLowerCaseWord(word); // uppercase 0 inside lowercase fromWord (will be replaced by lowercase L) word = Fix0InsideLowerCaseWord(word); // uppercase I or 1 inside lowercase fromWord (will be replaced by lowercase L) word = FixIor1InsideLowerCaseWord(word); word = FixLowerCaseLInsideUpperCaseWord(word); // eg. SCARLETTl => SCARLETTI } // Retry fromWord replace list foreach (string from in WordReplaceList.Keys) { if (word.Length == from.Length) { if (string.CompareOrdinal(word, from) == 0) return pre + WordReplaceList[from] + post; } else if (word.Length + post.Length == from.Length) { if (string.CompareOrdinal(word + post, from) == 0) return pre + WordReplaceList[from]; } if (pre.Length + word.Length + post.Length == from.Length && string.CompareOrdinal(preWordPost, from) == 0) { return WordReplaceList[from]; } } return preWordPost; } public static string FixLowerCaseLInsideUpperCaseWord(string word) { if (word.Length > 3 && word.Replace("l", string.Empty).ToUpper() == word.Replace("l", string.Empty)) { if (!word.Contains('<') && !word.Contains('>') && !word.Contains('\'')) { word = word.Replace('l', 'I'); } } return word; } public static string FixIor1InsideLowerCaseWord(string word) { if (StartEndEndsWithNumber.IsMatch(word)) return word; if (word.Contains(new[] { '2', '3', '4', '5', '6', '7', '8', '9' })) return word; if (HexNumber.IsMatch(word)) return word; if (word.LastIndexOf('I') > 0 || word.LastIndexOf('1') > 0) { var match = RegExIandZero.Match(word); while (match.Success) { if (word[match.Index + 1] == 'I' || word[match.Index + 1] == '1') { bool doFix = word[match.Index + 1] != 'I' && match.Index >= 1 && word.Substring(match.Index - 1).StartsWith("Mc"); if (word[match.Index + 1] == 'I' && match.Index >= 2 && word.Substring(match.Index - 2).StartsWith("Mac")) doFix = false; if (doFix) { string oldText = word; word = word.Substring(0, match.Index + 1) + "l"; if (match.Index + 2 < oldText.Length) word += oldText.Substring(match.Index + 2); } } match = RegExIandZero.Match(word, match.Index + 1); } } return word; } public static string Fix0InsideLowerCaseWord(string word) { if (StartEndEndsWithNumber.IsMatch(word)) return word; if (word.Contains(new[] { '1', '2', '3', '4', '5', '6', '7', '8', '9' }) || word.EndsWith("a.m", StringComparison.Ordinal) || word.EndsWith("p.m", StringComparison.Ordinal) || word.EndsWith("am", StringComparison.Ordinal) || word.EndsWith("pm", StringComparison.Ordinal)) return word; if (HexNumber.IsMatch(word)) return word; if (word.LastIndexOf('0') > 0) { Match match = RegExTime1.Match(word); while (match.Success) { if (word[match.Index + 1] == '0') { string oldText = word; word = word.Substring(0, match.Index + 1) + "o"; if (match.Index + 2 < oldText.Length) word += oldText.Substring(match.Index + 2); } match = RegExTime1.Match(word); } const string expectedDigits = "123456789"; match = RegExTime2.Match(word); while (match.Success) { if (word[match.Index] == '0') { if (match.Index == 0 || !expectedDigits.Contains(word[match.Index - 1])) { string oldText = word; word = word.Substring(0, match.Index) + "o"; if (match.Index + 1 < oldText.Length) word += oldText.Substring(match.Index + 1); } } match = RegExTime2.Match(word, match.Index + 1); } } return word; } public string FixCommonWordErrorsQuick(string word) { //always replace list foreach (string letter in _partialWordReplaceListAlways.Keys) word = word.Replace(letter, _partialWordReplaceListAlways[letter]); string pre = string.Empty; string post = string.Empty; if (word.StartsWith("", StringComparison.Ordinal)) { pre += ""; word = word.Remove(0, 3); } while (word.StartsWith(Environment.NewLine) && word.Length > 2) { pre += Environment.NewLine; word = word.Substring(2); } while (word.Length > 1 && word[0] == '-') { pre += "-"; word = word.Substring(1); } while (word.Length > 1 && word[0] == '.') { pre += "."; word = word.Substring(1); } while (word.Length > 1 && word[0] == '"') { pre += "\""; word = word.Substring(1); } if (word.Length > 1 && word[0] == '(') { pre += "("; word = word.Substring(1); } if (word.StartsWith("", StringComparison.Ordinal)) { pre += ""; word = word.Remove(0, 3); } while (word.EndsWith(Environment.NewLine) && word.Length > 2) { post += Environment.NewLine; word = word.Substring(0, word.Length - 2); } while (word.EndsWith('"') && word.Length > 1) { post = post + "\""; word = word.Substring(0, word.Length - 1); } while (word.EndsWith('.') && word.Length > 1) { post = post + "."; word = word.Substring(0, word.Length - 1); } while (word.EndsWith(',') && word.Length > 1) { post = post + ","; word = word.Substring(0, word.Length - 1); } while (word.EndsWith('?') && word.Length > 1) { post = post + "?"; word = word.Substring(0, word.Length - 1); } while (word.EndsWith('!') && word.Length > 1) { post = post + "!"; word = word.Substring(0, word.Length - 1); } while (word.EndsWith(')') && word.Length > 1) { post = post + ")"; word = word.Substring(0, word.Length - 1); } if (word.EndsWith("", StringComparison.Ordinal)) { post = post + ""; word = word.Remove(word.Length - 4, 4); } string preWordPost = pre + word + post; if (word.Length == 0) return preWordPost; foreach (string from in WordReplaceList.Keys) { if (word.Length == from.Length) { if (string.CompareOrdinal(word, from) == 0) return pre + WordReplaceList[from] + post; } else if (word.Length + post.Length == from.Length) { if (string.CompareOrdinal(word + post, from) == 0) return pre + WordReplaceList[from]; } if (pre.Length + word.Length + post.Length == from.Length && string.CompareOrdinal(preWordPost, from) == 0) { return WordReplaceList[from]; } } return preWordPost; } public bool RemoveWordOrPartial(string word) { if (word.Contains(' ')) { if (DeletePartialLineFromWordList(word)) { if (PartialLineWordBoundaryReplaceList.ContainsKey(word)) PartialLineWordBoundaryReplaceList.Remove(word); return true; } return false; } if (DeleteWordFromWordList(word)) { if (WordReplaceList.ContainsKey(word)) WordReplaceList.Remove(word); return true; } return false; } private bool DeleteWordFromWordList(string fromWord) { const string replaceListName = "WholeWords"; var doc = LoadXmlReplaceListDocument(); var list = LoadReplaceList(doc, replaceListName); var userDoc = LoadXmlReplaceListUserDocument(); var userList = LoadReplaceList(userDoc, replaceListName); return DeleteFromList(fromWord, userDoc, replaceListName, "Word", list, userList); } private bool DeletePartialLineFromWordList(string fromWord) { const string replaceListName = "PartialLines"; var doc = LoadXmlReplaceListDocument(); var list = LoadReplaceList(doc, replaceListName); var userDoc = LoadXmlReplaceListUserDocument(); var userList = LoadReplaceList(userDoc, replaceListName); return DeleteFromList(fromWord, userDoc, replaceListName, "LinePart", list, userList); } private bool DeleteFromList(string word, XmlDocument userDoc, string replaceListName, string elementName, Dictionary dictionary, Dictionary userDictionary) { if (dictionary == null) throw new ArgumentNullException("dictionary"); if (userDictionary == null) throw new ArgumentNullException("userDictionary"); bool removed = false; if (userDictionary.ContainsKey((word))) { userDictionary.Remove(word); XmlNode wholeWordsNode = userDoc.DocumentElement.SelectSingleNode(replaceListName); if (wholeWordsNode != null) { wholeWordsNode.RemoveAll(); foreach (var kvp in userDictionary) { XmlNode newNode = userDoc.CreateNode(XmlNodeType.Element, elementName, null); XmlAttribute aFrom = userDoc.CreateAttribute("from"); XmlAttribute aTo = userDoc.CreateAttribute("to"); aFrom.InnerText = kvp.Key; aTo.InnerText = kvp.Value; newNode.Attributes.Append(aTo); newNode.Attributes.Append(aFrom); wholeWordsNode.AppendChild(newNode); } userDoc.Save(ReplaceListXmlFileNameUser); removed = true; } } if (dictionary.ContainsKey((word))) { XmlNode wholeWordsNode = userDoc.DocumentElement.SelectSingleNode("Removed" + replaceListName); if (wholeWordsNode != null) { XmlNode newNode = userDoc.CreateNode(XmlNodeType.Element, elementName, null); XmlAttribute aFrom = userDoc.CreateAttribute("from"); XmlAttribute aTo = userDoc.CreateAttribute("to"); aFrom.InnerText = word; aTo.InnerText = string.Empty; newNode.Attributes.Append(aTo); newNode.Attributes.Append(aFrom); wholeWordsNode.AppendChild(newNode); userDoc.Save(ReplaceListXmlFileNameUser); removed = true; } } return removed; } private XmlDocument LoadXmlReplaceListDocument() { const string xmlText = ""; var doc = new XmlDocument(); if (File.Exists(_replaceListXmlFileName)) { try { doc.Load(_replaceListXmlFileName); } catch { doc.LoadXml(xmlText); } } else { doc.LoadXml(xmlText); } return doc; } private string ReplaceListXmlFileNameUser { get { return Path.Combine(Path.GetDirectoryName(_replaceListXmlFileName), Path.GetFileNameWithoutExtension(_replaceListXmlFileName) + "_User" + Path.GetExtension(_replaceListXmlFileName)); } } private XmlDocument LoadXmlReplaceListUserDocument() { const string xmlText = ""; var doc = new XmlDocument(); if (File.Exists(ReplaceListXmlFileNameUser)) { try { doc.Load(ReplaceListXmlFileNameUser); } catch { doc.LoadXml(xmlText); } } else { doc.LoadXml(xmlText); } return doc; } public bool AddWordOrPartial(string fromWord, string toWord) { if (fromWord.Contains(' ')) { if (SavePartialLineToWordList(fromWord, toWord)) { if (!PartialLineWordBoundaryReplaceList.ContainsKey(fromWord)) PartialLineWordBoundaryReplaceList.Add(fromWord, toWord); return true; } return false; } if (SaveWordToWordList(fromWord, toWord)) { if (!WordReplaceList.ContainsKey(fromWord)) WordReplaceList.Add(fromWord, toWord); return true; } return false; } private bool SaveWordToWordList(string fromWord, string toWord) { const string replaceListName = "WholeWords"; var doc = LoadXmlReplaceListDocument(); var list = LoadReplaceList(doc, replaceListName); var userDoc = LoadXmlReplaceListUserDocument(); var userList = LoadReplaceList(userDoc, replaceListName); return SaveToList(fromWord, toWord, userDoc, replaceListName, "Word", list, userList); } private bool SavePartialLineToWordList(string fromWord, string toWord) { const string replaceListName = "PartialLines"; var doc = LoadXmlReplaceListDocument(); var list = LoadReplaceList(doc, replaceListName); var userDoc = LoadXmlReplaceListUserDocument(); var userList = LoadReplaceList(userDoc, replaceListName); return SaveToList(fromWord, toWord, userDoc, replaceListName, "LinePart", list, userList); } private bool SaveToList(string fromWord, string toWord, XmlDocument userDoc, string replaceListName, string elementName, Dictionary dictionary, Dictionary userDictionary) { if (dictionary == null) throw new ArgumentNullException("dictionary"); if (userDictionary == null) throw new ArgumentNullException("userDictionary"); if (userDictionary.ContainsKey(fromWord)) return false; userDictionary.Add(fromWord, toWord); XmlNode wholeWordsNode = userDoc.DocumentElement.SelectSingleNode(replaceListName); if (wholeWordsNode != null) { XmlNode newNode = userDoc.CreateNode(XmlNodeType.Element, elementName, null); XmlAttribute aFrom = userDoc.CreateAttribute("from"); XmlAttribute aTo = userDoc.CreateAttribute("to"); aTo.InnerText = toWord; aFrom.InnerText = fromWord; newNode.Attributes.Append(aFrom); newNode.Attributes.Append(aTo); wholeWordsNode.AppendChild(newNode); userDoc.Save(ReplaceListXmlFileNameUser); } return true; } public void AddToWholeLineList(string fromLine, string toLine) { var userDocument = LoadXmlReplaceListUserDocument(); if (!_wholeLineReplaceList.ContainsKey(fromLine)) _wholeLineReplaceList.Add(fromLine, toLine); XmlNode wholeWordsNode = userDocument.DocumentElement.SelectSingleNode("WholeLines"); if (wholeWordsNode != null) { XmlNode newNode = userDocument.CreateNode(XmlNodeType.Element, "Line", null); XmlAttribute aFrom = userDocument.CreateAttribute("from"); XmlAttribute aTo = userDocument.CreateAttribute("to"); aTo.InnerText = toLine; aFrom.InnerText = fromLine; newNode.Attributes.Append(aFrom); newNode.Attributes.Append(aTo); wholeWordsNode.AppendChild(newNode); userDocument.Save(_replaceListXmlFileName); } } public static string ReplaceWord(string text, string word, string newWord) { var sb = new StringBuilder(); if (word != null && text != null && text.Contains(word)) { const string startChars = @" ¡¿<>-""”“()[]'‘`´¶♪¿¡.…—!?,:;/"; int appendFrom = 0; for (int i = 0; i < text.Length; i++) { if (text.Substring(i).StartsWith(word) && i >= appendFrom) { bool startOk = i == 0; if (!startOk) startOk = (startChars + Environment.NewLine).Contains(text[i - 1]); if (!startOk && word.StartsWith(' ')) startOk = true; if (startOk) { bool endOk = (i + word.Length == text.Length); if (!endOk) endOk = (startChars + Environment.NewLine).Contains(text[i + word.Length]); if (!endOk) endOk = newWord.EndsWith(' '); if (endOk) { sb.Append(newWord); appendFrom = i + word.Length; } } } if (i >= appendFrom) sb.Append(text[i]); } } return sb.ToString(); } } }