diff --git a/libse/Dictionaries/OcrFixReplaceList.cs b/libse/Dictionaries/OcrFixReplaceList.cs index 5efc4b8b8..e3e4f0a56 100644 --- a/libse/Dictionaries/OcrFixReplaceList.cs +++ b/libse/Dictionaries/OcrFixReplaceList.cs @@ -17,8 +17,8 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries private static readonly Regex HexNumber = new Regex(@"^#?[\dABDEFabcdef]+$", RegexOptions.Compiled); private static readonly Regex StartEndEndsWithNumber = new Regex(@"^\d+.+\d$", RegexOptions.Compiled); - public Dictionary WordReplaceList; - public Dictionary PartialLineWordBoundaryReplaceList; + public readonly Dictionary WordReplaceList; + public readonly Dictionary PartialLineWordBoundaryReplaceList; private readonly Dictionary _partialLineAlwaysReplaceList; private readonly Dictionary _beginLineReplaceList; private readonly Dictionary _endLineReplaceList; @@ -85,21 +85,16 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries private static Dictionary LoadReplaceList(XmlDocument doc, string name) { var list = new Dictionary(); - if (doc.DocumentElement != null) + if (!IsValidXmlDocument(doc, name)) + return list; + foreach (XmlNode item in doc.DocumentElement.SelectSingleNode(name).ChildNodes) { - XmlNode node = doc.DocumentElement.SelectSingleNode(name); - if (node != null) + if (HasValidAttributes(item, false)) { - foreach (XmlNode item in node.ChildNodes) - { - if (item.Attributes != null && item.Attributes["to"] != null && item.Attributes["from"] != null) - { - string to = item.Attributes["to"].InnerText; - string from = item.Attributes["from"].InnerText; - if (!list.ContainsKey(from)) - list.Add(from, to); - } - } + string to = item.Attributes["to"].Value; + string from = item.Attributes["from"].Value; + if (!list.ContainsKey(from)) + list.Add(from, to); } } return list; @@ -108,26 +103,49 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries private static Dictionary LoadRegExList(XmlDocument doc, string name) { var list = new Dictionary(); - if (doc.DocumentElement != null) + if (!IsValidXmlDocument(doc, name)) + return list; + foreach (XmlNode item in doc.DocumentElement.SelectSingleNode(name).ChildNodes) { - XmlNode node = doc.DocumentElement.SelectSingleNode(name); - if (node != null) + if (HasValidAttributes(item, true)) { - foreach (XmlNode item in node.ChildNodes) - { - if (item.Attributes != null && item.Attributes["replaceWith"] != null && item.Attributes["find"] != null) - { - string to = item.Attributes["replaceWith"].InnerText; - string from = item.Attributes["find"].InnerText; - if (!list.ContainsKey(from)) - list.Add(from, to); - } - } + string to = item.Attributes["replaceWith"].Value; + string from = item.Attributes["find"].Value; + if (!list.ContainsKey(from)) + list.Add(from, to); } } return list; } + private static bool IsValidXmlDocument(XmlDocument doc, string elementName) + { + if (doc.DocumentElement == null || doc.DocumentElement.SelectSingleNode(elementName) == null) + return false; + return true; + } + + private static bool HasValidAttributes(XmlNode node, bool isRegex) + { + if (node == null || node.Attributes == null) + return false; + if (isRegex) + { + if (node.Attributes["find"] != null && node.Attributes["replaceWith"] != null) + { + return Utilities.IsValidRegex(node.Attributes["find"].Value); + } + } + else + { + if (node.Attributes["from"] != null && node.Attributes["to"] != null) + { + return (node.Attributes["from"].Value != node.Attributes["to"].Value); + } + } + return false; + } + public string FixOcrErrorViaLineReplaceList(string input) { // Whole fromLine