From 4b5ab0d75f0d32afd046e33367efccf93fe3787c Mon Sep 17 00:00:00 2001
From: niksedk <nikse.dk@gmail.com>
Date: Wed, 28 Sep 2022 22:29:07 +0200
Subject: [PATCH] Fix for `do use *_se.xml words in OCR` Somewhat related to
 #6292

---
 src/libse/Common/Utilities.cs               | 24 -------------
 src/libse/SpellCheck/SpellCheckWordLists.cs |  6 +++-
 src/ui/Logic/Ocr/OcrFixEngine.cs            | 38 ++++++++-------------
 3 files changed, 19 insertions(+), 49 deletions(-)
diff --git a/src/libse/Common/Utilities.cs b/src/libse/Common/Utilities.cs
index b99f2f385..e35e587c9 100644
--- a/src/libse/Common/Utilities.cs
+++ b/src/libse/Common/Utilities.cs
@@ -1084,30 +1084,6 @@ namespace Nikse.SubtitleEdit.Core.Common
             return userWordListXmlFileName;
         }
 
-        public static string LoadUserWordList(HashSet<string> userWordList, string languageName)
-        {
-            userWordList.Clear();
-            var userWordDictionary = new XmlDocument();
-            string userWordListXmlFileName = DictionaryFolder + languageName + "_user.xml";
-            if (File.Exists(userWordListXmlFileName))
-            {
-                userWordDictionary.Load(userWordListXmlFileName);
-                var nodes = userWordDictionary.DocumentElement?.SelectNodes("word");
-                if (nodes != null)
-                {
-                    foreach (XmlNode node in nodes)
-                    {
-                        string s = node.InnerText.ToLowerInvariant();
-                        if (!userWordList.Contains(s))
-                        {
-                            userWordList.Add(s);
-                        }
-                    }
-                }
-            }
-            return userWordListXmlFileName;
-        }
-
         public static readonly string UppercaseLetters = Configuration.Settings.General.UppercaseLetters.ToUpperInvariant() + "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ";
         public static readonly string LowercaseLetters = Configuration.Settings.General.UppercaseLetters.ToLowerInvariant() + "αβγδεζηθικλμνξοπρσςτυφχψωήάόέ";
         public static readonly string LowercaseLettersWithNumbers = LowercaseLetters + "0123456789";
diff --git a/src/libse/SpellCheck/SpellCheckWordLists.cs b/src/libse/SpellCheck/SpellCheckWordLists.cs
index b315c7304..fb00f648d 100644
--- a/src/libse/SpellCheck/SpellCheckWordLists.cs
+++ b/src/libse/SpellCheck/SpellCheckWordLists.cs
@@ -31,7 +31,6 @@ namespace Nikse.SubtitleEdit.Core.SpellCheck
         private readonly HashSet<string> _namesListWithApostrophe = new HashSet<string>();
         private readonly HashSet<string> _wordsWithDashesOrPeriods = new HashSet<string>();
         private readonly HashSet<string> _userWordList = new HashSet<string>();
-        private readonly HashSet<string> _seWordList = new HashSet<string>();
         private readonly HashSet<string> _userPhraseList = new HashSet<string>();
         private readonly string _dictionaryFolder;
         private HashSet<string> _skipAllList = new HashSet<string>();
@@ -247,6 +246,11 @@ namespace Nikse.SubtitleEdit.Core.SpellCheck
             Utilities.RemoveFromUserDictionary(word, _languageName);
         }
 
+        public HashSet<string> GetSeAndUserWords()
+        {
+            return _userWordList;
+        }
+
         public void RemoveName(string word)
         {
             if (word == null || word.Length <= 1 || !_names.Contains(word))
diff --git a/src/ui/Logic/Ocr/OcrFixEngine.cs b/src/ui/Logic/Ocr/OcrFixEngine.cs
index 8532cb119..aa0bb6e45 100644
--- a/src/ui/Logic/Ocr/OcrFixEngine.cs
+++ b/src/ui/Logic/Ocr/OcrFixEngine.cs
@@ -16,7 +16,6 @@ using System.Linq;
 using System.Text;
 using System.Text.RegularExpressions;
 using System.Windows.Forms;
-using Nikse.SubtitleEdit.Core.SubtitleFormats;
 
 namespace Nikse.SubtitleEdit.Logic.Ocr
 {
@@ -62,7 +61,6 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
             Aggressive
         }
 
-        private string _userWordListXmlFileName;
         private string _fiveLetterWordListLanguageName;
 
         private readonly OcrFixReplaceList _ocrFixReplaceList;
@@ -73,7 +71,6 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
         private HashSet<string> _nameMultiWordList = new HashSet<string>(); // case sensitive phrases
         private List<string> _nameMultiWordListAndWordsWithPeriods;
         private HashSet<string> _abbreviationList;
-        private HashSet<string> _userWordList = new HashSet<string>();
         private HashSet<string> _wordSkipList = new HashSet<string>();
         private readonly HashSet<string> _wordSpellOkList = new HashSet<string>();
         private string[] _wordSplitList;
@@ -365,17 +362,6 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
                 }
             }
 
-            // Load user words
-            _userWordList = new HashSet<string>();
-            _userWordListXmlFileName = Utilities.LoadUserWordList(_userWordList, _fiveLetterWordListLanguageName);
-            foreach (var name in _userWordList)
-            {
-                if (name.EndsWith('.'))
-                {
-                    _abbreviationList.Add(name);
-                }
-            }
-
             // Load Hunspell spell checker
             try
             {
@@ -416,6 +402,14 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
                     _changeAllDictionary = _spellCheckWordLists.GetUseAlwaysList();
                 }
             }
+
+            foreach (var word in _spellCheckWordLists?.GetSeAndUserWords())
+            {
+                if (word.EndsWith('.'))
+                {
+                    _abbreviationList.Add(word);
+                }
+            }
         }
 
         private static string[] LoadWordSplitList(string threeLetterIsoLanguageName, NameList nameList)
@@ -1449,7 +1443,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
                         correct = !Configuration.Settings.Tools.CheckOneLetterWords; // hunspell allows too many single letter words
                     }
 
-                    if (!correct && _userWordList.Contains(word))
+                    if (!correct && _spellCheckWordLists.HasUserWord(word))
                     {
                         correct = true;
                     }
@@ -1479,7 +1473,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
                         var trimmed = word.Trim('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', ',', '،', '؟', '»');
                         if (trimmed != word)
                         {
-                            if (_userWordList.Contains(trimmed))
+                            if (_spellCheckWordLists.HasUserWord(trimmed))
                             {
                                 correct = true;
                             }
@@ -1830,11 +1824,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
                     Abort = true;
                     break;
                 case OcrSpellCheck.Action.AddToUserDictionary:
-                    if (_userWordListXmlFileName != null)
-                    {
-                        Utilities.AddToUserDictionary(_spellCheck.Word.Trim().ToLowerInvariant(), _fiveLetterWordListLanguageName);
-                        _userWordList.Add(_spellCheck.Word.Trim().ToLowerInvariant());
-                    }
+                    _spellCheckWordLists.AddUserWord(_spellCheck.Word.Trim().ToLowerInvariant());
                     result.Word = _spellCheck.Word;
                     result.Fixed = true;
                     result.Line = line;
@@ -1973,7 +1963,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
             {
                 if (!DoSpell(s) &&
                     !_nameList.Contains(s) &&
-                    !_userWordList.Contains(s) &&
+                    !_spellCheckWordLists.HasUserWord(s) &&
                     !IsWordKnownOrNumber(s, word))
                 {
                     if (s.Length > 10 && s.Contains('/'))
@@ -2033,12 +2023,12 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
                 return true;
             }
 
-            if (_userWordList.Contains(word.ToLowerInvariant()))
+            if (_spellCheckWordLists.HasUserWord(word.ToLowerInvariant()))
             {
                 return true;
             }
 
-            if (_userWordList.Contains(word.Trim('\'').ToLowerInvariant()))
+            if (_spellCheckWordLists.HasUserWord(word.Trim('\'').ToLowerInvariant()))
             {
                 return true;
             }