From 03e1d962a5e1eaab31f0b4caac80b06417be6ee6 Mon Sep 17 00:00:00 2001 From: niksedk Date: Sun, 18 Dec 2022 08:56:32 +0100 Subject: [PATCH] Minor refact --- src/libse/Dictionaries/OcrFixReplaceList.cs | 73 ++++++------ src/ui/Logic/Ocr/OcrFixEngine.cs | 126 ++++++++++---------- 2 files changed, 103 insertions(+), 96 deletions(-) diff --git a/src/libse/Dictionaries/OcrFixReplaceList.cs b/src/libse/Dictionaries/OcrFixReplaceList.cs index a5bf7c3e0..6e6b6e47f 100644 --- a/src/libse/Dictionaries/OcrFixReplaceList.cs +++ b/src/libse/Dictionaries/OcrFixReplaceList.cs @@ -246,7 +246,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries public string FixOcrErrorViaLineReplaceList(string input) { // Whole fromLine - foreach (string from in _wholeLineReplaceList.Keys) + foreach (var from in _wholeLineReplaceList.Keys) { if (input == from) { @@ -254,8 +254,8 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries } } - string newText = input; - string pre = string.Empty; + var newText = input; + var pre = string.Empty; if (newText.StartsWith("", StringComparison.Ordinal)) { pre += ""; @@ -275,14 +275,14 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries // begin fromLine var lines = newText.SplitToLines(); var sb = new StringBuilder(input.Length + 2); - foreach (string l in lines) + foreach (var l in lines) { - string s = l; + var s = l; foreach (string from in _beginLineReplaceList.Keys) { if (s.FastIndexOf(from) >= 0) { - string with = _beginLineReplaceList[from]; + var with = _beginLineReplaceList[from]; if (s.StartsWith(from, StringComparison.Ordinal)) { s = s.Remove(0, from.Length).Insert(0, with); @@ -300,14 +300,14 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries } newText = pre + sb.ToString().TrimEnd(Utilities.NewLineChars); - string post = string.Empty; + var post = string.Empty; if (newText.EndsWith("", StringComparison.Ordinal)) { newText = newText.Remove(newText.Length - 4, 4); post = ""; } - foreach (string from in _endLineReplaceList.Keys) + foreach (var from in _endLineReplaceList.Keys) { if (newText.EndsWith(from, StringComparison.Ordinal)) { @@ -317,7 +317,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries } newText += post; - foreach (string from in PartialLineWordBoundaryReplaceList.Keys) + foreach (var from in PartialLineWordBoundaryReplaceList.Keys) { if (newText.FastIndexOf(from) >= 0) { @@ -325,7 +325,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries } } - foreach (string from in _partialLineAlwaysReplaceList.Keys) + foreach (var from in _partialLineAlwaysReplaceList.Keys) { if (newText.FastIndexOf(from) >= 0) { @@ -336,7 +336,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries if (_replaceRegExes == null || _regExList.Count != _replaceRegExes.Count) { _replaceRegExes = new List(); - foreach (string findWhat in _regExList.Keys) + foreach (var findWhat in _regExList.Keys) { var regex = new Regex(findWhat, RegexOptions.Multiline | RegexOptions.Compiled); _replaceRegExes.Add(regex); @@ -345,8 +345,8 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries } else { - int i = 0; - foreach (string findWhat in _regExList.Keys) + var i = 0; + foreach (var findWhat in _regExList.Keys) { var regex = _replaceRegExes[i]; newText = regex.Replace(newText, _regExList[findWhat]); @@ -376,10 +376,10 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries { var list = new List(); var previousGuesses = new List(); - foreach (string letter in _partialWordReplaceList.Keys) + foreach (var letter in _partialWordReplaceList.Keys) { var indexes = new List(); - for (int i = 0; i <= word.Length - letter.Length; i++) + for (var i = 0; i <= word.Length - letter.Length; i++) { if (word.Substring(i).StartsWith(letter, StringComparison.Ordinal)) { @@ -403,7 +403,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries if (!_partialWordReplaceList[letter].Contains(' ')) { var multiGuess = word; - for (int i = indexes.Count - 1; i >= 0; i--) + for (var i = indexes.Count - 1; i >= 0; i--) { var idx = indexes[i]; multiGuess = multiGuess.Remove(idx, letter.Length).Insert(idx, _partialWordReplaceList[letter]); @@ -420,7 +420,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries if (indexes.Count > 0) { - for (int i = indexes.Count - 1; i >= 0; i--) + for (var i = indexes.Count - 1; i >= 0; i--) { var idx = indexes[i]; if (idx > 1 && idx < word.Length - 2) @@ -433,7 +433,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries foreach (var previousGuess in previousGuesses) { - for (int i = 0; i < previousGuess.Length - letter.Length; i++) + for (var i = 0; i < previousGuess.Length - letter.Length; i++) { if (previousGuess.Substring(i).StartsWith(letter, StringComparison.Ordinal)) { @@ -498,13 +498,13 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries } //always replace list - foreach (string letter in _partialWordAlwaysReplaceList.Keys) + foreach (var letter in _partialWordAlwaysReplaceList.Keys) { word = word.Replace(letter, _partialWordAlwaysReplaceList[letter]); } - string pre = string.Empty; - string post = string.Empty; + var pre = string.Empty; + var post = string.Empty; if (word.StartsWith("", StringComparison.Ordinal)) { @@ -563,7 +563,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries word = word.Remove(word.Length - 4, 4); } - string preWordPost = pre + word + post; + var preWordPost = pre + word + post; if (word.Length == 0) { return preWordPost; @@ -663,6 +663,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries word = word.Replace('l', 'I'); } } + return word; } @@ -691,7 +692,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries { if (word[match.Index + 1] == 'I' || word[match.Index + 1] == '1') { - bool doFix = word[match.Index + 1] != 'I' && match.Index >= 1 && word.Substring(match.Index - 1).StartsWith("Mc", StringComparison.Ordinal); + var doFix = word[match.Index + 1] != 'I' && match.Index >= 1 && word.Substring(match.Index - 1).StartsWith("Mc", StringComparison.Ordinal); if (word[match.Index + 1] == 'I' && match.Index >= 2 && word.Substring(match.Index - 2).StartsWith("Mac", StringComparison.Ordinal)) { doFix = false; @@ -699,7 +700,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries if (doFix) { - string oldText = word; + var oldText = word; word = word.Substring(0, match.Index + 1) + "l"; if (match.Index + 2 < oldText.Length) { @@ -737,12 +738,12 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries if (word.LastIndexOf('0') > 0) { - Match match = RegExTime1.Match(word); + var match = RegExTime1.Match(word); while (match.Success) { if (word[match.Index + 1] == '0') { - string oldText = word; + var oldText = word; word = word.Substring(0, match.Index + 1) + "o"; if (match.Index + 2 < oldText.Length) { @@ -760,7 +761,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries { if (match.Index == 0 || !expectedDigits.Contains(word[match.Index - 1])) { - string oldText = word; + var oldText = word; word = word.Substring(0, match.Index) + "o"; if (match.Index + 1 < oldText.Length) { @@ -779,13 +780,13 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries var word = input; //always replace list - foreach (string letter in _partialWordAlwaysReplaceList.Keys) + foreach (var letter in _partialWordAlwaysReplaceList.Keys) { word = word.Replace(letter, _partialWordAlwaysReplaceList[letter]); } - string pre = string.Empty; - string post = string.Empty; + var pre = string.Empty; + var post = string.Empty; if (word.StartsWith("", StringComparison.Ordinal)) { @@ -864,7 +865,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries word = word.Remove(word.Length - 4, 4); } - string preWordPost = pre + word + post; + var preWordPost = pre + word + post; if (word.Length == 0) { return preWordPost; @@ -1048,6 +1049,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries } return false; } + if (SaveWordToWordList(fromWord, toWord)) { if (!WordReplaceList.ContainsKey(fromWord)) @@ -1056,6 +1058,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries } return true; } + return false; } @@ -1152,12 +1155,12 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries if (text.Contains(word)) { const string separatorChars = @" ¡¿<>-""”“()[]'‘`´¶♪¿¡.…—!?,:;/"; - int appendFrom = 0; - for (int i = 0; i < text.Length; i++) + var appendFrom = 0; + for (var i = 0; i < text.Length; i++) { if (text[i] == word[0] && i >= appendFrom && text.Substring(i).StartsWith(word, StringComparison.Ordinal)) { - bool startOk = i == 0; + var startOk = i == 0; if (!startOk) { var prevChar = text[i - 1]; @@ -1169,7 +1172,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries } if (startOk) { - bool endOk = i + word.Length == text.Length; + var endOk = i + word.Length == text.Length; if (!endOk) { var nextChar = text[i + word.Length]; diff --git a/src/ui/Logic/Ocr/OcrFixEngine.cs b/src/ui/Logic/Ocr/OcrFixEngine.cs index a1f8c9e3a..2bd1f6cbf 100644 --- a/src/ui/Logic/Ocr/OcrFixEngine.cs +++ b/src/ui/Logic/Ocr/OcrFixEngine.cs @@ -158,7 +158,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr private void LoadSpellingDictionaries(string threeLetterIsoLanguageName, string hunspellName) { - string dictionaryFolder = Utilities.DictionaryFolder; + var dictionaryFolder = Utilities.DictionaryFolder; if (!Directory.Exists(dictionaryFolder)) { return; @@ -207,51 +207,54 @@ namespace Nikse.SubtitleEdit.Logic.Ocr foreach (var culture in Iso639Dash2LanguageCode.List) { - if (culture.ThreeLetterCode == threeLetterIsoLanguageName) + if (culture.ThreeLetterCode != threeLetterIsoLanguageName) { - string dictionaryFileName = null; - if (!string.IsNullOrEmpty(hunspellName) && hunspellName.StartsWith(culture.TwoLetterCode, StringComparison.OrdinalIgnoreCase) && File.Exists(Path.Combine(dictionaryFolder, hunspellName + ".dic"))) - { - dictionaryFileName = Path.Combine(dictionaryFolder, hunspellName + ".dic"); - LoadSpellingDictionariesViaDictionaryFileName(threeLetterIsoLanguageName, dictionaryFileName, true); - return; - } - foreach (string dic in Directory.GetFiles(dictionaryFolder, "*.dic")) - { - string name = Path.GetFileNameWithoutExtension(dic); - if (!string.IsNullOrEmpty(name) && !name.StartsWith("hyph", StringComparison.Ordinal)) - { - try - { - name = name.Replace('_', '-'); - if (name.Length > 5) - { - name = name.Substring(0, 5); - } - - var ci = CultureInfo.GetCultureInfo(name); - if (ci.GetThreeLetterIsoLanguageName() == threeLetterIsoLanguageName || - ci.GetThreeLetterIsoLanguageName().Equals(threeLetterIsoLanguageName, StringComparison.OrdinalIgnoreCase)) - { - dictionaryFileName = dic; - break; - } - } - catch (Exception exception) - { - System.Diagnostics.Debug.WriteLine(exception.Message); - } - } - } - - if (dictionaryFileName == null) - { - return; - } + continue; + } + string dictionaryFileName = null; + if (!string.IsNullOrEmpty(hunspellName) && hunspellName.StartsWith(culture.TwoLetterCode, StringComparison.OrdinalIgnoreCase) && File.Exists(Path.Combine(dictionaryFolder, hunspellName + ".dic"))) + { + dictionaryFileName = Path.Combine(dictionaryFolder, hunspellName + ".dic"); LoadSpellingDictionariesViaDictionaryFileName(threeLetterIsoLanguageName, dictionaryFileName, true); return; } + + foreach (var dic in Directory.GetFiles(dictionaryFolder, "*.dic")) + { + var name = Path.GetFileNameWithoutExtension(dic); + if (!string.IsNullOrEmpty(name) && !name.StartsWith("hyph", StringComparison.Ordinal)) + { + try + { + name = name.Replace('_', '-'); + if (name.Length > 5) + { + name = name.Substring(0, 5); + } + + var ci = CultureInfo.GetCultureInfo(name); + if (ci.GetThreeLetterIsoLanguageName() == threeLetterIsoLanguageName || + ci.GetThreeLetterIsoLanguageName().Equals(threeLetterIsoLanguageName, StringComparison.OrdinalIgnoreCase)) + { + dictionaryFileName = dic; + break; + } + } + catch (Exception exception) + { + System.Diagnostics.Debug.WriteLine(exception.Message); + } + } + } + + if (dictionaryFileName == null) + { + return; + } + + LoadSpellingDictionariesViaDictionaryFileName(threeLetterIsoLanguageName, dictionaryFileName, true); + return; } string dicFileName = null; @@ -484,10 +487,10 @@ namespace Nikse.SubtitleEdit.Logic.Ocr text = text.Replace(".", "."); text = text.TrimStart(); - int len = text.Length; - for (int i = 0; i < len; i++) + var len = text.Length; + for (var i = 0; i < len; i++) { - char ch = text[i]; + var ch = text[i]; switch (ch) { case 'fi': @@ -695,7 +698,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr } } - int start = text.IndexOf(tag, StringComparison.Ordinal); + var start = text.IndexOf(tag, StringComparison.Ordinal); while (start > 0) { lastLine = HtmlUtil.RemoveHtmlTags(text.Substring(0, start)).TrimEnd().TrimEnd('-').TrimEnd(); @@ -709,7 +712,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr if (start > 1) { - string beforeThis = HtmlUtil.RemoveHtmlTags(text.Substring(0, start)); + var beforeThis = HtmlUtil.RemoveHtmlTags(text.Substring(0, start)); endingBeforeThis = beforeThis.EndsWith('.') || beforeThis.EndsWith('!') || beforeThis.EndsWith('?'); } @@ -735,7 +738,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr { if (start > 1) { - string beforeThis = HtmlUtil.RemoveHtmlTags(text.Substring(0, start)); + var beforeThis = HtmlUtil.RemoveHtmlTags(text.Substring(0, start)); endingBeforeThis = beforeThis.EndsWith('.') || beforeThis.EndsWith('!') || beforeThis.EndsWith('?'); } @@ -987,7 +990,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr text = "." + text; } - string pre = string.Empty; + var pre = string.Empty; if (text.StartsWith("- ", StringComparison.Ordinal)) { pre = "- "; @@ -1262,7 +1265,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr var match = RegexUppercaseI.Match(text); while (match.Success) { - bool doFix = !(match.Index >= 1 && text.Substring(match.Index - 1).StartsWith("Mc", StringComparison.Ordinal)); + var doFix = !(match.Index >= 1 && text.Substring(match.Index - 1).StartsWith("Mc", StringComparison.Ordinal)); if (match.Index >= 2 && text.Substring(match.Index - 2).StartsWith("Mac", StringComparison.Ordinal)) { doFix = false; @@ -1339,7 +1342,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr return line; } - string tempLine = line; + var tempLine = line; const string p = " ¡¿,.!?:;()[]{}+-$£\"„”“#&%…—♪\r\n"; var trimChars = p.ToArray(); bool hasAllUpperWord = false; @@ -1771,9 +1774,9 @@ namespace Nikse.SubtitleEdit.Logic.Ocr private static string GetWordWithDominatedCasing(string word) { - int lowercase = 0; - int uppercase = 0; - for (int i = 0; i < word.Length; i++) + var lowercase = 0; + var uppercase = 0; + for (var i = 0; i < word.Length; i++) { var ch = word[i]; if (char.IsLower(ch)) @@ -1953,18 +1956,18 @@ namespace Nikse.SubtitleEdit.Logic.Ocr { if (s.Length > 10 && s.Contains('/')) { - string[] ar = s.Split('/'); + var ar = s.Split('/'); if (ar.Length == 2) { if (ar[0].Length > 3 && ar[1].Length > 3) { - string a = ar[0]; + var a = ar[0]; if (a == a.ToUpperInvariant()) { a = a[0] + a.Substring(1).ToLowerInvariant(); } - string b = ar[0]; + var b = ar[0]; if (b == b.ToUpperInvariant()) { b = b[0] + b.Substring(1).ToLowerInvariant(); @@ -2044,22 +2047,22 @@ namespace Nikse.SubtitleEdit.Logic.Ocr return 0; } - int minLength = 2; + var minLength = 2; if (Configuration.Settings.Tools.CheckOneLetterWords) { minLength = 1; } - int wordsNotFound = 0; + var wordsNotFound = 0; var words = HtmlUtil.RemoveOpenCloseTags(line, HtmlUtil.TagItalic).Split(" \r\n\t".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < words.Length; i++) { - string word = words[i].Trim(SpellCheckWordLists.SplitChars.ToArray()); + var word = words[i].Trim(SpellCheckWordLists.SplitChars.ToArray()); if (word.Length >= minLength) { if (!IsWordKnownOrNumber(word, line)) { - bool correct = word.Length > 1 && _hunspell.Spell(word); + var correct = word.Length > 1 && _hunspell.Spell(word); if (!correct) { correct = word.Length > 2 && _hunspell.Spell(word.Trim('\'')); @@ -2085,6 +2088,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr } } } + return wordsNotFound; } @@ -2095,12 +2099,12 @@ namespace Nikse.SubtitleEdit.Logic.Ocr _hunspell.Dispose(); _hunspell = null; } + if (_spellCheck != null) { _spellCheck.Dispose(); _spellCheck = null; } } - } }