Refactor "Spell check Form"

This commit is contained in:
Nikolaj Olsson 2016-02-02 20:40:47 +01:00
parent 3cdff0d18b
commit f75f35b103
8 changed files with 6017 additions and 5938 deletions

View File

@ -0,0 +1,7 @@
namespace Nikse.SubtitleEdit.Core.Interfaces
{
public interface IDoSpell
{
bool DoSpell(string word);
}
}

View File

@ -180,7 +180,10 @@
<Compile Include="RegistryUtil.cs" /> <Compile Include="RegistryUtil.cs" />
<Compile Include="RichTextToPlainText.cs" /> <Compile Include="RichTextToPlainText.cs" />
<Compile Include="Settings.cs" /> <Compile Include="Settings.cs" />
<Compile Include="SpellCheckWord.cs" /> <Compile Include="Interfaces\IDoSpell.cs" />
<Compile Include="SpellCheck\SpellCheckWord.cs" />
<Compile Include="SpellCheck\SpellCheckWordLists.cs" />
<Compile Include="SpellCheck\UndoObject.cs" />
<Compile Include="SsaStyle.cs" /> <Compile Include="SsaStyle.cs" />
<Compile Include="StringExtensions.cs" /> <Compile Include="StringExtensions.cs" />
<Compile Include="StripableText.cs" /> <Compile Include="StripableText.cs" />

View File

@ -1,4 +1,4 @@
namespace Nikse.SubtitleEdit.Core namespace Nikse.SubtitleEdit.Core.SpellCheck
{ {
public class SpellCheckWord public class SpellCheckWord
{ {

View File

@ -0,0 +1,328 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Xml;
using Nikse.SubtitleEdit.Core.Dictionaries;
using Nikse.SubtitleEdit.Core.Interfaces;
namespace Nikse.SubtitleEdit.Core.SpellCheck
{
public class SpellCheckWordLists
{
private static readonly char[] SplitChars2 = { ' ', '.', ',', '?', '!', ':', ';', '"', '“', '”', '(', ')', '[', ']', '{', '}', '|', '<', '>', '/', '+', '\r', '\n', '¿', '¡', '…', '—', '', '♪', '♫', '„', '“' };
public static readonly string SplitChars = " -.,?!:;\"“”()[]{}|<>/+\r\n¿¡…—♪♫„“";
private readonly NamesList _namesList;
private readonly HashSet<string> _namesEtcList;
private readonly List<string> _namesEtcListUppercase = new List<string>();
private readonly List<string> _namesEtcListWithApostrophe = new List<string>();
private readonly List<string> _wordsWithDashesOrPeriods;
private readonly List<string> _userWordList;
private readonly List<string> _userPhraseList;
private readonly string _languageName;
private readonly IDoSpell _doSpell;
public SpellCheckWordLists(string dictionaryFolder, string languageName, IDoSpell doSpell)
{
if (languageName == null)
throw new NullReferenceException("languageName");
if (doSpell == null)
throw new NullReferenceException("doSpell");
_languageName = languageName;
_doSpell = doSpell;
_namesList = new NamesList(Configuration.DictionariesFolder, languageName, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl);
_namesEtcList = _namesList.GetNames();
var namesEtcMultiWordList = _namesList.GetMultiNames();
foreach (string namesItem in _namesEtcList)
_namesEtcListUppercase.Add(namesItem.ToUpper());
if (languageName.StartsWith("en_", StringComparison.OrdinalIgnoreCase))
{
foreach (string namesItem in _namesEtcList)
{
if (!namesItem.EndsWith('s'))
{
_namesEtcListWithApostrophe.Add(namesItem + "'s");
_namesEtcListWithApostrophe.Add(namesItem + "s");
}
else if (!namesItem.EndsWith('\''))
{
_namesEtcListWithApostrophe.Add(namesItem + "'");
}
}
}
_userWordList = new List<string>();
_userPhraseList = new List<string>();
if (File.Exists(dictionaryFolder + languageName + "_user.xml"))
{
var userWordDictionary = new XmlDocument();
userWordDictionary.Load(dictionaryFolder + languageName + "_user.xml");
if (userWordDictionary.DocumentElement != null)
{
var xmlNodeList = userWordDictionary.DocumentElement.SelectNodes("word");
if (xmlNodeList != null)
{
foreach (XmlNode node in xmlNodeList)
{
string word = node.InnerText.Trim().ToLower();
if (word.Contains(' '))
_userPhraseList.Add(word);
else
_userWordList.Add(word);
}
}
}
}
// Add names/userdic with "." or " " or "-"
_wordsWithDashesOrPeriods = new List<string>();
_wordsWithDashesOrPeriods.AddRange(namesEtcMultiWordList);
foreach (string name in _namesEtcList)
{
if (name.Contains(new[] { '.', '-' }))
_wordsWithDashesOrPeriods.Add(name);
}
foreach (string word in _userWordList)
{
if (word.Contains(new[] { '.', '-' }))
_wordsWithDashesOrPeriods.Add(word);
}
_wordsWithDashesOrPeriods.AddRange(_userPhraseList);
}
public void RemoveUserWord(string word)
{
_userWordList.Remove(word);
_userPhraseList.Remove(word);
Utilities.RemoveFromUserDictionary(word, _languageName);
}
public void RemoveName(string word)
{
if (word == null || word.Length <= 1 || !_namesEtcList.Contains(word))
return;
_namesEtcList.Remove(word);
_namesEtcListUppercase.Remove(word.ToUpper());
if (_languageName.StartsWith("en_", StringComparison.Ordinal) && !word.EndsWith('s'))
{
_namesEtcList.Remove(word + "s");
_namesEtcListUppercase.Remove(word.ToUpper() + "S");
}
if (!word.EndsWith('s'))
{
_namesEtcListWithApostrophe.Remove(word + "'s");
_namesEtcListUppercase.Remove(word.ToUpper() + "'S");
}
if (!word.EndsWith('\''))
_namesEtcListWithApostrophe.Remove(word + "'");
_namesList.Remove(word);
}
public string ReplaceKnownWordsOrNamesWithBlanks(string s)
{
var replaceIds = new List<string>();
var replaceNames = new List<string>();
GetTextWithoutUserWordsAndNames(replaceIds, replaceNames, s);
foreach (string name in replaceNames)
{
int start = s.IndexOf(name, StringComparison.Ordinal);
while (start >= 0)
{
bool startOk = start == 0 || SplitChars.Contains(s[start - 1]);
if (startOk)
{
int end = start + name.Length;
bool endOk = end >= s.Length || SplitChars.Contains(s[end]);
if (endOk)
s = s.Remove(start, name.Length).Insert(start, string.Empty.PadLeft(name.Length));
}
if (start + 1 < s.Length)
start = s.IndexOf(name, start + 1, StringComparison.Ordinal);
else
start = -1;
}
}
return s;
}
public string ReplaceHtmlTagsWithBlanks(string s)
{
int start = s.IndexOf('<');
while (start >= 0)
{
int end = s.IndexOf('>', start + 1);
if (end < start)
break;
int l = end - start + 1;
s = s.Remove(start, l).Insert(start, string.Empty.PadLeft(l));
end++;
if (end >= s.Length)
break;
start = s.IndexOf('<', end);
}
return s;
}
public bool IsWordInUserPhrases(int index, List<SpellCheckWord> words)
{
string current = words[index].Text;
string prev = "-";
if (index > 0)
prev = words[index - 1].Text;
string next = "-";
if (index < words.Count - 1)
next = words[index + 1].Text;
foreach (string userPhrase in _userPhraseList)
{
if (userPhrase == current + " " + next)
return true;
if (userPhrase == prev + " " + current)
return true;
}
return false;
}
/// <summary>
/// Removes words with dash'es that are correct, so spell check can ignore the combination (do not split correct words with dash'es)
/// </summary>
private void GetTextWithoutUserWordsAndNames(List<string> replaceIds, List<string> replaceNames, string text)
{
string[] wordsWithDash = text.Split(SplitChars2, StringSplitOptions.RemoveEmptyEntries);
foreach (string w in wordsWithDash)
{
if (w.Contains('-') && _doSpell.DoSpell(w) && !_wordsWithDashesOrPeriods.Contains(w))
_wordsWithDashesOrPeriods.Add(w);
}
if (text.Contains(new[] { '.', '-' }))
{
int i = 0;
foreach (string wordWithDashesOrPeriods in _wordsWithDashesOrPeriods)
{
bool found = true;
int startSearchIndex = 0;
while (found)
{
int indexStart = text.IndexOf(wordWithDashesOrPeriods, startSearchIndex, StringComparison.Ordinal);
if (indexStart >= 0)
{
int endIndexPlus = indexStart + wordWithDashesOrPeriods.Length;
bool startOk = indexStart == 0 || (@" (['""" + Environment.NewLine).Contains(text[indexStart - 1]);
bool endOk = endIndexPlus == text.Length;
if (!endOk && endIndexPlus < text.Length && @",!?:;. ])<'""".Contains(text[endIndexPlus]))
endOk = true;
if (startOk && endOk)
{
i++;
string id = string.Format("_@{0}_", i);
replaceIds.Add(id);
replaceNames.Add(wordWithDashesOrPeriods);
text = text.Remove(indexStart, wordWithDashesOrPeriods.Length).Insert(indexStart, id);
}
else
{
startSearchIndex = indexStart + 1;
}
}
else
{
found = false;
}
}
}
}
}
public bool AddName(string word)
{
if (word == null || word.Length <= 1 || _namesEtcList.Contains(word))
return false;
_namesEtcList.Add(word);
_namesEtcListUppercase.Add(word.ToUpper());
if (_languageName.StartsWith("en_", StringComparison.Ordinal) && !word.EndsWith('s'))
{
_namesEtcList.Add(word + "s");
_namesEtcListUppercase.Add(word.ToUpper() + "S");
}
if (!word.EndsWith('s'))
{
_namesEtcListWithApostrophe.Add(word + "'s");
_namesEtcListUppercase.Add(word.ToUpper() + "'S");
}
if (!word.EndsWith('\''))
_namesEtcListWithApostrophe.Add(word + "'");
var namesList = new NamesList(Configuration.DictionariesFolder, _languageName, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl);
namesList.Add(word);
return true;
}
public bool AddUserWord(string word)
{
if (word == null)
return false;
word = word.Trim().ToLower();
if (word.Length <= 1 || _userWordList.IndexOf(word) >= 0)
return false;
if (word.Contains(' '))
_userPhraseList.Add(word);
else
_userWordList.Add(word);
Utilities.AddToUserDictionary(word, _languageName);
return true;
}
public bool HasName(string word)
{
return _namesEtcList.Contains(word) || (word.StartsWith('\'') || word.EndsWith('\'')) && _namesEtcList.Contains(word.Trim('\''));
}
public bool HasNameExtended(string word, string text)
{
return _namesEtcListUppercase.Contains(word) || _namesEtcListWithApostrophe.Contains(word) || _namesList.IsInNamesEtcMultiWordList(text, word);
}
public bool HasUserWord(string word)
{
string s = word.ToLower();
return _userWordList.Contains(s) || (s.StartsWith('\'') || s.EndsWith('\'')) && _userWordList.Contains(s.Trim('\''));
}
public static List<SpellCheckWord> Split(string s)
{
var list = new List<SpellCheckWord>();
var sb = new StringBuilder();
for (int i = 0; i < s.Length; i++)
{
if (SplitChars.Contains(s[i]))
{
if (sb.Length > 0)
list.Add(new SpellCheckWord { Text = sb.ToString(), Index = i - sb.Length });
sb.Clear();
}
else
{
sb.Append(s[i]);
}
}
if (sb.Length > 0)
list.Add(new SpellCheckWord { Text = sb.ToString(), Index = s.Length - 1 - sb.Length });
return list;
}
}
}

View File

@ -0,0 +1,19 @@
using Nikse.SubtitleEdit.Core.Enums;
namespace Nikse.SubtitleEdit.Core.SpellCheck
{
public class UndoObject
{
public int CurrentIndex { get; set; }
public string UndoText { get; set; }
public string UndoWord { get; set; }
public string CurrentWord { get; set; }
public SpellCheckAction Action { get; set; }
public Subtitle Subtitle { get; set; }
public int NoOfSkippedWords { get; set; }
public int NoOfChangedWords { get; set; }
public int NoOfCorrectWords { get; set; }
public int NoOfNamesEtc { get; set; }
public int NoOfAddedWords { get; set; }
}
}

View File

@ -24,6 +24,7 @@ using System.Net;
using System.Text; using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Windows.Forms; using System.Windows.Forms;
using Nikse.SubtitleEdit.Core.SpellCheck;
namespace Nikse.SubtitleEdit.Forms namespace Nikse.SubtitleEdit.Forms
{ {
@ -5638,7 +5639,7 @@ namespace Nikse.SubtitleEdit.Forms
int index = -1; int index = -1;
for (int i = 0; i < text.Length; i++) for (int i = 0; i < text.Length; i++)
{ {
if (Forms.SpellCheck.SplitChars.Contains(text[i])) if (SpellCheckWordLists.SplitChars.Contains(text[i]))
{ {
if (sb.Length > 0) if (sb.Length > 0)
{ {

View File

@ -1,5 +1,4 @@
using Nikse.SubtitleEdit.Core; using Nikse.SubtitleEdit.Core;
using Nikse.SubtitleEdit.Core.Dictionaries;
using Nikse.SubtitleEdit.Core.Enums; using Nikse.SubtitleEdit.Core.Enums;
using Nikse.SubtitleEdit.Logic; using Nikse.SubtitleEdit.Logic;
using Nikse.SubtitleEdit.Logic.SpellCheck; using Nikse.SubtitleEdit.Logic.SpellCheck;
@ -7,30 +6,15 @@ using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Drawing; using System.Drawing;
using System.IO; using System.IO;
using System.Text;
using System.Windows.Forms; using System.Windows.Forms;
using System.Xml; using Nikse.SubtitleEdit.Core.Interfaces;
using Nikse.SubtitleEdit.Core.SpellCheck;
namespace Nikse.SubtitleEdit.Forms namespace Nikse.SubtitleEdit.Forms
{ {
public sealed partial class SpellCheck : Form public sealed partial class SpellCheck : Form, IDoSpell
{ {
private class UndoObject
{
public int CurrentIndex { get; set; }
public string UndoText { get; set; }
public string UndoWord { get; set; }
public string CurrentWord { get; set; }
public SpellCheckAction Action { get; set; }
public Subtitle Subtitle { get; set; }
public int NoOfSkippedWords { get; set; }
public int NoOfChangedWords { get; set; }
public int NoOfCorrectWords { get; set; }
public int NoOfNamesEtc { get; set; }
public int NoOfAddedWords { get; set; }
}
private List<UndoObject> _undoList = new List<UndoObject>(); private List<UndoObject> _undoList = new List<UndoObject>();
private SpellCheckAction _action = SpellCheckAction.Skip; private SpellCheckAction _action = SpellCheckAction.Skip;
private List<string> _suggestions; private List<string> _suggestions;
private string _currentAction; private string _currentAction;
@ -39,22 +23,9 @@ namespace Nikse.SubtitleEdit.Forms
public string ChangeWholeText { get { return textBoxWholeText.Text; } } public string ChangeWholeText { get { return textBoxWholeText.Text; } }
public bool AutoFixNames { get { return checkBoxAutoChangeNames.Checked; } } public bool AutoFixNames { get { return checkBoxAutoChangeNames.Checked; } }
public int CurrentLineIndex private SpellCheckWordLists _spellCheckWordLists;
{
get { return _currentIndex; }
}
private NamesList _namesList;
private HashSet<string> _namesEtcList = new HashSet<string>();
private HashSet<string> _namesEtcMultiWordList = new HashSet<string>();
private List<string> _namesEtcListUppercase = new List<string>();
private List<string> _namesEtcListWithApostrophe = new List<string>();
private List<string> _skipAllList = new List<string>(); private List<string> _skipAllList = new List<string>();
private List<string> _wordsWithDashesOrPeriods = new List<string>();
private Dictionary<string, string> _changeAllDictionary = new Dictionary<string, string>(); private Dictionary<string, string> _changeAllDictionary = new Dictionary<string, string>();
private List<string> _userWordList = new List<string>();
private List<string> _userPhraseList = new List<string>();
private string _prefix = string.Empty; private string _prefix = string.Empty;
private string _postfix = string.Empty; private string _postfix = string.Empty;
private Hunspell _hunspell; private Hunspell _hunspell;
@ -66,7 +37,6 @@ namespace Nikse.SubtitleEdit.Forms
private int _wordsIndex; private int _wordsIndex;
private Subtitle _subtitle; private Subtitle _subtitle;
private string _originalWord; private string _originalWord;
private int _noOfSkippedWords; private int _noOfSkippedWords;
private int _noOfChangedWords; private int _noOfChangedWords;
private int _noOfCorrectWords; private int _noOfCorrectWords;
@ -75,13 +45,9 @@ namespace Nikse.SubtitleEdit.Forms
private bool _firstChange = true; private bool _firstChange = true;
private string _languageName; private string _languageName;
private Main _mainWindow; private Main _mainWindow;
private string _currentDictionary; private string _currentDictionary;
public const string SplitChars = " -.,?!:;\"“”()[]{}|<>/+\r\n¿¡…—♪♫„“"; private static readonly char[] ExpectedChars = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '%', '&', '@', '$', '*', '=', '£', '#', '_', '½', '^' };
private static readonly char[] ExpectedChars = new[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '%', '&', '@', '$', '*', '=', '£', '#', '_', '½', '^' };
private static readonly char[] SplitChars2 = new[] { ' ', '.', ',', '?', '!', ':', ';', '"', '“', '”', '(', ')', '[', ']', '{', '}', '|', '<', '>', '/', '+', '\r', '\n', '¿', '¡', '…', '—', '', '♪', '♫', '„', '“' };
public class SuggestionParameter public class SuggestionParameter
{ {
@ -283,7 +249,7 @@ namespace Nikse.SubtitleEdit.Forms
Configuration.Settings.Save(); Configuration.Settings.Save();
_languageName = LanguageString; _languageName = LanguageString;
string dictionary = Utilities.DictionaryFolder + _languageName; string dictionary = Utilities.DictionaryFolder + _languageName;
LoadDictionaries(Utilities.DictionaryFolder, dictionary); LoadDictionaries(Utilities.DictionaryFolder, dictionary, _languageName);
_wordsIndex--; _wordsIndex--;
PrepareNextWord(); PrepareNextWord();
} }
@ -445,38 +411,11 @@ namespace Nikse.SubtitleEdit.Forms
_skipAllList.Add(ChangeWord.ToUpper().Trim('\'')); _skipAllList.Add(ChangeWord.ToUpper().Trim('\''));
break; break;
case SpellCheckAction.AddToDictionary: case SpellCheckAction.AddToDictionary:
if (_userWordList.IndexOf(ChangeWord) < 0) if (_spellCheckWordLists.AddUserWord(ChangeWord))
{
_noOfAddedWords++; _noOfAddedWords++;
string s = ChangeWord.Trim().ToLower();
if (s.Contains(' '))
_userPhraseList.Add(s);
else
_userWordList.Add(s);
Utilities.AddToUserDictionary(s, _languageName);
}
break; break;
case SpellCheckAction.AddToNamesEtc: case SpellCheckAction.AddToNamesEtc:
if (ChangeWord.Length > 1 && !_namesEtcList.Contains(ChangeWord)) _spellCheckWordLists.AddName(ChangeWord);
{
_namesEtcList.Add(ChangeWord);
_namesEtcListUppercase.Add(ChangeWord.ToUpper());
if (_languageName.StartsWith("en_", StringComparison.Ordinal) && !ChangeWord.EndsWith('s'))
{
_namesEtcList.Add(ChangeWord + "s");
_namesEtcListUppercase.Add(ChangeWord.ToUpper() + "S");
}
if (!ChangeWord.EndsWith('s'))
{
_namesEtcListWithApostrophe.Add(ChangeWord + "'s");
_namesEtcListUppercase.Add(ChangeWord.ToUpper() + "'S");
}
if (!ChangeWord.EndsWith('\''))
_namesEtcListWithApostrophe.Add(ChangeWord + "'");
var namesList = new NamesList(Configuration.DictionariesFolder, _languageName, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl);
namesList.Add(ChangeWord);
}
break; break;
case SpellCheckAction.ChangeWholeText: case SpellCheckAction.ChangeWholeText:
_mainWindow.ShowStatus(string.Format(Configuration.Settings.Language.Main.SpellCheckChangedXToY, _currentParagraph.Text.Replace(Environment.NewLine, " "), ChangeWholeText.Replace(Environment.NewLine, " "))); _mainWindow.ShowStatus(string.Format(Configuration.Settings.Language.Main.SpellCheckChangedXToY, _currentParagraph.Text.Replace(Environment.NewLine, " "), ChangeWholeText.Replace(Environment.NewLine, " ")));
@ -503,25 +442,6 @@ namespace Nikse.SubtitleEdit.Forms
ShowActionInfo(_currentAction, textBoxWord.Text); ShowActionInfo(_currentAction, textBoxWord.Text);
} }
public static bool IsWordInUserPhrases(List<string> userPhraseList, int index, List<SpellCheckWord> words)
{
string current = words[index].Text;
string prev = "-";
if (index > 0)
prev = words[index - 1].Text;
string next = "-";
if (index < words.Count - 1)
next = words[index + 1].Text;
foreach (string userPhrase in userPhraseList)
{
if (userPhrase == current + " " + next)
return true;
if (userPhrase == prev + " " + current)
return true;
}
return false;
}
private void PrepareNextWord() private void PrepareNextWord()
{ {
while (true) while (true)
@ -580,8 +500,7 @@ namespace Nikse.SubtitleEdit.Forms
_currentWord = _currentWord.Substring(1); _currentWord = _currentWord.Substring(1);
} }
} }
if (_namesEtcList.Contains(_currentWord) if (_spellCheckWordLists.HasName(_currentWord))
|| (_currentWord.StartsWith('\'') || _currentWord.EndsWith('\'')) && _namesEtcList.Contains(_currentWord.Trim('\'')))
{ {
_noOfNamesEtc++; _noOfNamesEtc++;
} }
@ -590,8 +509,7 @@ namespace Nikse.SubtitleEdit.Forms
{ {
_noOfSkippedWords++; _noOfSkippedWords++;
} }
else if (_userWordList.Contains(_currentWord.ToLower()) else if (_spellCheckWordLists.HasUserWord(_currentWord))
|| (_currentWord.StartsWith('\'') || _currentWord.EndsWith('\'')) && _userWordList.Contains(_currentWord.Trim('\'').ToLower()))
{ {
_noOfCorrectWords++; _noOfCorrectWords++;
} }
@ -605,13 +523,11 @@ namespace Nikse.SubtitleEdit.Forms
_noOfChangedWords++; _noOfChangedWords++;
_mainWindow.CorrectWord(_changeAllDictionary[_currentWord], _currentParagraph, _currentWord.Trim('\''), ref _firstChange, -1); _mainWindow.CorrectWord(_changeAllDictionary[_currentWord], _currentParagraph, _currentWord.Trim('\''), ref _firstChange, -1);
} }
else if (_namesEtcListUppercase.Contains(_currentWord) else if (_spellCheckWordLists.HasNameExtended(_currentWord, _currentParagraph.Text)) // TODO: Verify this!
|| _namesEtcListWithApostrophe.Contains(_currentWord)
|| _namesList.IsInNamesEtcMultiWordList(_currentParagraph.Text, _currentWord)) // TODO: Verify this!
{ {
_noOfNamesEtc++; _noOfNamesEtc++;
} }
else if (IsWordInUserPhrases(_userPhraseList, _wordsIndex, _words)) else if (_spellCheckWordLists.IsWordInUserPhrases(_wordsIndex, _words))
{ {
_noOfCorrectWords++; _noOfCorrectWords++;
} }
@ -619,7 +535,7 @@ namespace Nikse.SubtitleEdit.Forms
{ {
bool correct; bool correct;
if (_prefix == "'" && _currentWord.Length >= 1 && (DoSpell(_prefix + _currentWord) || _userWordList.Contains(_prefix + _currentWord))) if (_prefix == "'" && _currentWord.Length >= 1 && (DoSpell(_prefix + _currentWord) || _spellCheckWordLists.HasUserWord(_prefix + _currentWord)))
{ {
correct = true; correct = true;
} }
@ -714,7 +630,7 @@ namespace Nikse.SubtitleEdit.Forms
DoAction(SpellCheckAction.ChangeAll); DoAction(SpellCheckAction.ChangeAll);
return; return;
} }
if (AutoFixNames && _currentWord.Length > 1 && _namesEtcList.Contains(char.ToUpper(_currentWord[0]) + _currentWord.Substring(1))) if (AutoFixNames && _currentWord.Length > 1 && _spellCheckWordLists.HasName(char.ToUpper(_currentWord[0]) + _currentWord.Substring(1)))
{ {
ChangeWord = char.ToUpper(_currentWord[0]) + _currentWord.Substring(1); ChangeWord = char.ToUpper(_currentWord[0]) + _currentWord.Substring(1);
DoAction(SpellCheckAction.ChangeAll); DoAction(SpellCheckAction.ChangeAll);
@ -752,131 +668,12 @@ namespace Nikse.SubtitleEdit.Forms
} }
} }
private static List<SpellCheckWord> Split(string s)
{
var list = new List<SpellCheckWord>();
var sb = new StringBuilder();
for (int i = 0; i < s.Length; i++)
{
if (SplitChars.Contains(s[i]))
{
if (sb.Length > 0)
list.Add(new SpellCheckWord { Text = sb.ToString(), Index = i - sb.Length });
sb.Clear();
}
else
{
sb.Append(s[i]);
}
}
if (sb.Length > 0)
list.Add(new SpellCheckWord { Text = sb.ToString(), Index = s.Length - 1 - sb.Length });
return list;
}
private void SetWords(string s) private void SetWords(string s)
{ {
s = ReplaceHtmlTagsWithBlanks(s); s = _spellCheckWordLists.ReplaceHtmlTagsWithBlanks(s);
s = ReplaceKnownWordsOrNamesWithBlanks(s); s = _spellCheckWordLists.ReplaceKnownWordsOrNamesWithBlanks(s);
_words = Split(s); _words = SpellCheckWordLists.Split(s);
}
private string ReplaceKnownWordsOrNamesWithBlanks(string s)
{
var replaceIds = new List<string>();
var replaceNames = new List<string>();
GetTextWithoutUserWordsAndNames(replaceIds, replaceNames, s);
foreach (string name in replaceNames)
{
int start = s.IndexOf(name, StringComparison.Ordinal);
while (start >= 0)
{
bool startOk = start == 0 || SplitChars.Contains(s[start - 1]);
if (startOk)
{
int end = start + name.Length;
bool endOk = end >= s.Length || SplitChars.Contains(s[end]);
if (endOk)
s = s.Remove(start, name.Length).Insert(start, string.Empty.PadLeft(name.Length));
}
if (start + 1 < s.Length)
start = s.IndexOf(name, start + 1, StringComparison.Ordinal);
else
start = -1;
}
}
return s;
}
private static string ReplaceHtmlTagsWithBlanks(string s)
{
int start = s.IndexOf('<');
while (start >= 0)
{
int end = s.IndexOf('>', start + 1);
if (end < start)
break;
int l = end - start + 1;
s = s.Remove(start, l).Insert(start, string.Empty.PadLeft(l));
end++;
if (end >= s.Length)
break;
start = s.IndexOf('<', end);
}
return s;
}
/// <summary>
/// Removes words with dash'es that are correct, so spell check can ignore the combination (do not split correct words with dash'es)
/// </summary>
private void GetTextWithoutUserWordsAndNames(List<string> replaceIds, List<string> replaceNames, string text)
{
string[] wordsWithDash = text.Split(SplitChars2, StringSplitOptions.RemoveEmptyEntries);
foreach (string w in wordsWithDash)
{
if (w.Contains('-') && DoSpell(w) && !_wordsWithDashesOrPeriods.Contains(w))
_wordsWithDashesOrPeriods.Add(w);
}
if (text.Contains(new[] { '.', '-' }))
{
int i = 0;
foreach (string wordWithDashesOrPeriods in _wordsWithDashesOrPeriods)
{
bool found = true;
int startSearchIndex = 0;
while (found)
{
int indexStart = text.IndexOf(wordWithDashesOrPeriods, startSearchIndex, StringComparison.Ordinal);
if (indexStart >= 0)
{
int endIndexPlus = indexStart + wordWithDashesOrPeriods.Length;
bool startOk = indexStart == 0 || (@" (['""" + Environment.NewLine).Contains(text[indexStart - 1]);
bool endOk = endIndexPlus == text.Length;
if (!endOk && endIndexPlus < text.Length && @",!?:;. ])<'""".Contains(text[endIndexPlus]))
endOk = true;
if (startOk && endOk)
{
i++;
string id = string.Format("_@{0}_", i);
replaceIds.Add(id);
replaceNames.Add(wordWithDashesOrPeriods);
text = text.Remove(indexStart, wordWithDashesOrPeriods.Length).Insert(indexStart, id);
}
else
{
startSearchIndex = indexStart + 1;
}
}
else
{
found = false;
}
}
}
}
} }
private void ShowEndStatusMessage(string completedMessage) private void ShowEndStatusMessage(string completedMessage)
@ -935,9 +732,6 @@ namespace Nikse.SubtitleEdit.Forms
LanguageStructure.Main mainLanguage = Configuration.Settings.Language.Main; LanguageStructure.Main mainLanguage = Configuration.Settings.Language.Main;
_mainWindow = mainWindow; _mainWindow = mainWindow;
_namesEtcListUppercase = new List<string>();
_namesEtcListWithApostrophe = new List<string>();
_skipAllList = new List<string>(); _skipAllList = new List<string>();
_noOfSkippedWords = 0; _noOfSkippedWords = 0;
@ -972,7 +766,7 @@ namespace Nikse.SubtitleEdit.Forms
_languageName = LanguageAutoDetect.AutoDetectLanguageName(_languageName, subtitle); _languageName = LanguageAutoDetect.AutoDetectLanguageName(_languageName, subtitle);
string dictionary = Utilities.DictionaryFolder + _languageName; string dictionary = Utilities.DictionaryFolder + _languageName;
LoadDictionaries(dictionaryFolder, dictionary); LoadDictionaries(dictionaryFolder, dictionary, _languageName);
_currentIndex = 0; _currentIndex = 0;
if (startLine >= 0 && startLine < _subtitle.Paragraphs.Count) if (startLine >= 0 && startLine < _subtitle.Paragraphs.Count)
@ -984,64 +778,11 @@ namespace Nikse.SubtitleEdit.Forms
PrepareNextWord(); PrepareNextWord();
} }
private void LoadDictionaries(string dictionaryFolder, string dictionary) private void LoadDictionaries(string dictionaryFolder, string dictionary, string languageName)
{ {
_changeAllDictionary = new Dictionary<string, string>(); _changeAllDictionary = new Dictionary<string, string>();
_skipAllList = new List<string>(); _skipAllList = new List<string>();
_namesList = new NamesList(Configuration.DictionariesFolder, _languageName, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl); _spellCheckWordLists = new SpellCheckWordLists(dictionaryFolder, languageName, this);
_namesEtcList = _namesList.GetNames();
_namesEtcMultiWordList = _namesList.GetMultiNames();
foreach (string namesItem in _namesEtcList)
_namesEtcListUppercase.Add(namesItem.ToUpper());
if (_languageName.StartsWith("en_", StringComparison.OrdinalIgnoreCase))
{
foreach (string namesItem in _namesEtcList)
{
if (!namesItem.EndsWith('s'))
{
_namesEtcListWithApostrophe.Add(namesItem + "'s");
_namesEtcListWithApostrophe.Add(namesItem + "s");
}
else if (!namesItem.EndsWith('\''))
{
_namesEtcListWithApostrophe.Add(namesItem + "'");
}
}
}
_userWordList = new List<string>();
_userPhraseList = new List<string>();
if (File.Exists(dictionaryFolder + _languageName + "_user.xml"))
{
var userWordDictionary = new XmlDocument();
userWordDictionary.Load(dictionaryFolder + _languageName + "_user.xml");
foreach (XmlNode node in userWordDictionary.DocumentElement.SelectNodes("word"))
{
string word = node.InnerText.Trim().ToLower();
if (word.Contains(' '))
_userPhraseList.Add(word);
else
_userWordList.Add(word);
}
}
// Add names/userdic with "." or " " or "-"
_wordsWithDashesOrPeriods = new List<string>();
_wordsWithDashesOrPeriods.AddRange(_namesEtcMultiWordList);
foreach (string name in _namesEtcList)
{
if (name.Contains(new[] { '.', '-' }))
_wordsWithDashesOrPeriods.Add(name);
}
foreach (string word in _userWordList)
{
if (word.Contains(new[] { '.', '-' }))
_wordsWithDashesOrPeriods.Add(word);
}
_wordsWithDashesOrPeriods.AddRange(_userPhraseList);
_changeAllDictionary = new Dictionary<string, string>(); _changeAllDictionary = new Dictionary<string, string>();
LoadHunspell(dictionary); LoadHunspell(dictionary);
} }
@ -1199,30 +940,10 @@ namespace Nikse.SubtitleEdit.Forms
_skipAllList.Remove(undo.UndoWord.ToUpper().Trim('\'')); _skipAllList.Remove(undo.UndoWord.ToUpper().Trim('\''));
break; break;
case SpellCheckAction.AddToDictionary: case SpellCheckAction.AddToDictionary:
_userWordList.Remove(undo.UndoWord); _spellCheckWordLists.RemoveUserWord(undo.UndoWord);
_userPhraseList.Remove(undo.UndoWord);
Utilities.RemoveFromUserDictionary(undo.UndoWord, _languageName);
break; break;
case SpellCheckAction.AddToNamesEtc: case SpellCheckAction.AddToNamesEtc:
if (undo.UndoWord.Length > 1 && _namesEtcList.Contains(undo.UndoWord)) _spellCheckWordLists.RemoveName(undo.UndoWord);
{
_namesEtcList.Remove(undo.UndoWord);
_namesEtcListUppercase.Remove(undo.UndoWord.ToUpper());
if (_languageName.StartsWith("en_", StringComparison.Ordinal) && !undo.UndoWord.EndsWith('s'))
{
_namesEtcList.Remove(undo.UndoWord + "s");
_namesEtcListUppercase.Remove(undo.UndoWord.ToUpper() + "S");
}
if (!undo.UndoWord.EndsWith('s'))
{
_namesEtcListWithApostrophe.Remove(undo.UndoWord + "'s");
_namesEtcListUppercase.Remove(undo.UndoWord.ToUpper() + "'S");
}
if (!undo.UndoWord.EndsWith('\''))
_namesEtcListWithApostrophe.Remove(undo.UndoWord + "'");
_namesList.Remove(undo.UndoWord);
}
break; break;
case SpellCheckAction.ChangeWholeText: case SpellCheckAction.ChangeWholeText:
_subtitle = _mainWindow.UndoFromSpellCheck(undo.Subtitle); _subtitle = _mainWindow.UndoFromSpellCheck(undo.Subtitle);