mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-25 12:44:46 +01:00
Refactor "Spell check Form"
This commit is contained in:
parent
3cdff0d18b
commit
f75f35b103
7
libse/Interfaces/IDoSpell.cs
Normal file
7
libse/Interfaces/IDoSpell.cs
Normal file
@ -0,0 +1,7 @@
|
||||
namespace Nikse.SubtitleEdit.Core.Interfaces
|
||||
{
|
||||
public interface IDoSpell
|
||||
{
|
||||
bool DoSpell(string word);
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -180,7 +180,10 @@
|
||||
<Compile Include="RegistryUtil.cs" />
|
||||
<Compile Include="RichTextToPlainText.cs" />
|
||||
<Compile Include="Settings.cs" />
|
||||
<Compile Include="SpellCheckWord.cs" />
|
||||
<Compile Include="Interfaces\IDoSpell.cs" />
|
||||
<Compile Include="SpellCheck\SpellCheckWord.cs" />
|
||||
<Compile Include="SpellCheck\SpellCheckWordLists.cs" />
|
||||
<Compile Include="SpellCheck\UndoObject.cs" />
|
||||
<Compile Include="SsaStyle.cs" />
|
||||
<Compile Include="StringExtensions.cs" />
|
||||
<Compile Include="StripableText.cs" />
|
||||
|
@ -1,8 +1,8 @@
|
||||
namespace Nikse.SubtitleEdit.Core
|
||||
{
|
||||
public class SpellCheckWord
|
||||
{
|
||||
public int Index { get; set; }
|
||||
public string Text { get; set; }
|
||||
}
|
||||
}
|
||||
namespace Nikse.SubtitleEdit.Core.SpellCheck
|
||||
{
|
||||
public class SpellCheckWord
|
||||
{
|
||||
public int Index { get; set; }
|
||||
public string Text { get; set; }
|
||||
}
|
||||
}
|
328
libse/SpellCheck/SpellCheckWordLists.cs
Normal file
328
libse/SpellCheck/SpellCheckWordLists.cs
Normal file
@ -0,0 +1,328 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using System.Xml;
|
||||
using Nikse.SubtitleEdit.Core.Dictionaries;
|
||||
using Nikse.SubtitleEdit.Core.Interfaces;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Core.SpellCheck
|
||||
{
|
||||
|
||||
public class SpellCheckWordLists
|
||||
{
|
||||
|
||||
private static readonly char[] SplitChars2 = { ' ', '.', ',', '?', '!', ':', ';', '"', '“', '”', '(', ')', '[', ']', '{', '}', '|', '<', '>', '/', '+', '\r', '\n', '¿', '¡', '…', '—', '–', '♪', '♫', '„', '“' };
|
||||
public static readonly string SplitChars = " -.,?!:;\"“”()[]{}|<>/+\r\n¿¡…—–♪♫„“";
|
||||
|
||||
private readonly NamesList _namesList;
|
||||
private readonly HashSet<string> _namesEtcList;
|
||||
private readonly List<string> _namesEtcListUppercase = new List<string>();
|
||||
private readonly List<string> _namesEtcListWithApostrophe = new List<string>();
|
||||
private readonly List<string> _wordsWithDashesOrPeriods;
|
||||
private readonly List<string> _userWordList;
|
||||
private readonly List<string> _userPhraseList;
|
||||
private readonly string _languageName;
|
||||
private readonly IDoSpell _doSpell;
|
||||
|
||||
public SpellCheckWordLists(string dictionaryFolder, string languageName, IDoSpell doSpell)
|
||||
{
|
||||
if (languageName == null)
|
||||
throw new NullReferenceException("languageName");
|
||||
if (doSpell == null)
|
||||
throw new NullReferenceException("doSpell");
|
||||
|
||||
_languageName = languageName;
|
||||
_doSpell = doSpell;
|
||||
_namesList = new NamesList(Configuration.DictionariesFolder, languageName, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl);
|
||||
_namesEtcList = _namesList.GetNames();
|
||||
var namesEtcMultiWordList = _namesList.GetMultiNames();
|
||||
|
||||
foreach (string namesItem in _namesEtcList)
|
||||
_namesEtcListUppercase.Add(namesItem.ToUpper());
|
||||
|
||||
if (languageName.StartsWith("en_", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
foreach (string namesItem in _namesEtcList)
|
||||
{
|
||||
if (!namesItem.EndsWith('s'))
|
||||
{
|
||||
_namesEtcListWithApostrophe.Add(namesItem + "'s");
|
||||
_namesEtcListWithApostrophe.Add(namesItem + "’s");
|
||||
}
|
||||
else if (!namesItem.EndsWith('\''))
|
||||
{
|
||||
_namesEtcListWithApostrophe.Add(namesItem + "'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_userWordList = new List<string>();
|
||||
_userPhraseList = new List<string>();
|
||||
if (File.Exists(dictionaryFolder + languageName + "_user.xml"))
|
||||
{
|
||||
var userWordDictionary = new XmlDocument();
|
||||
userWordDictionary.Load(dictionaryFolder + languageName + "_user.xml");
|
||||
if (userWordDictionary.DocumentElement != null)
|
||||
{
|
||||
var xmlNodeList = userWordDictionary.DocumentElement.SelectNodes("word");
|
||||
if (xmlNodeList != null)
|
||||
{
|
||||
foreach (XmlNode node in xmlNodeList)
|
||||
{
|
||||
string word = node.InnerText.Trim().ToLower();
|
||||
if (word.Contains(' '))
|
||||
_userPhraseList.Add(word);
|
||||
else
|
||||
_userWordList.Add(word);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add names/userdic with "." or " " or "-"
|
||||
_wordsWithDashesOrPeriods = new List<string>();
|
||||
_wordsWithDashesOrPeriods.AddRange(namesEtcMultiWordList);
|
||||
foreach (string name in _namesEtcList)
|
||||
{
|
||||
if (name.Contains(new[] { '.', '-' }))
|
||||
_wordsWithDashesOrPeriods.Add(name);
|
||||
}
|
||||
foreach (string word in _userWordList)
|
||||
{
|
||||
if (word.Contains(new[] { '.', '-' }))
|
||||
_wordsWithDashesOrPeriods.Add(word);
|
||||
}
|
||||
_wordsWithDashesOrPeriods.AddRange(_userPhraseList);
|
||||
}
|
||||
|
||||
|
||||
public void RemoveUserWord(string word)
|
||||
{
|
||||
_userWordList.Remove(word);
|
||||
_userPhraseList.Remove(word);
|
||||
Utilities.RemoveFromUserDictionary(word, _languageName);
|
||||
}
|
||||
|
||||
public void RemoveName(string word)
|
||||
{
|
||||
if (word == null || word.Length <= 1 || !_namesEtcList.Contains(word))
|
||||
return;
|
||||
|
||||
_namesEtcList.Remove(word);
|
||||
_namesEtcListUppercase.Remove(word.ToUpper());
|
||||
if (_languageName.StartsWith("en_", StringComparison.Ordinal) && !word.EndsWith('s'))
|
||||
{
|
||||
_namesEtcList.Remove(word + "s");
|
||||
_namesEtcListUppercase.Remove(word.ToUpper() + "S");
|
||||
}
|
||||
if (!word.EndsWith('s'))
|
||||
{
|
||||
_namesEtcListWithApostrophe.Remove(word + "'s");
|
||||
_namesEtcListUppercase.Remove(word.ToUpper() + "'S");
|
||||
}
|
||||
if (!word.EndsWith('\''))
|
||||
_namesEtcListWithApostrophe.Remove(word + "'");
|
||||
|
||||
_namesList.Remove(word);
|
||||
}
|
||||
|
||||
public string ReplaceKnownWordsOrNamesWithBlanks(string s)
|
||||
{
|
||||
var replaceIds = new List<string>();
|
||||
var replaceNames = new List<string>();
|
||||
GetTextWithoutUserWordsAndNames(replaceIds, replaceNames, s);
|
||||
foreach (string name in replaceNames)
|
||||
{
|
||||
int start = s.IndexOf(name, StringComparison.Ordinal);
|
||||
while (start >= 0)
|
||||
{
|
||||
bool startOk = start == 0 || SplitChars.Contains(s[start - 1]);
|
||||
if (startOk)
|
||||
{
|
||||
int end = start + name.Length;
|
||||
bool endOk = end >= s.Length || SplitChars.Contains(s[end]);
|
||||
if (endOk)
|
||||
s = s.Remove(start, name.Length).Insert(start, string.Empty.PadLeft(name.Length));
|
||||
}
|
||||
|
||||
if (start + 1 < s.Length)
|
||||
start = s.IndexOf(name, start + 1, StringComparison.Ordinal);
|
||||
else
|
||||
start = -1;
|
||||
}
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
public string ReplaceHtmlTagsWithBlanks(string s)
|
||||
{
|
||||
int start = s.IndexOf('<');
|
||||
while (start >= 0)
|
||||
{
|
||||
int end = s.IndexOf('>', start + 1);
|
||||
if (end < start)
|
||||
break;
|
||||
int l = end - start + 1;
|
||||
s = s.Remove(start, l).Insert(start, string.Empty.PadLeft(l));
|
||||
end++;
|
||||
if (end >= s.Length)
|
||||
break;
|
||||
start = s.IndexOf('<', end);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
public bool IsWordInUserPhrases(int index, List<SpellCheckWord> words)
|
||||
{
|
||||
string current = words[index].Text;
|
||||
string prev = "-";
|
||||
if (index > 0)
|
||||
prev = words[index - 1].Text;
|
||||
string next = "-";
|
||||
if (index < words.Count - 1)
|
||||
next = words[index + 1].Text;
|
||||
foreach (string userPhrase in _userPhraseList)
|
||||
{
|
||||
if (userPhrase == current + " " + next)
|
||||
return true;
|
||||
if (userPhrase == prev + " " + current)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Removes words with dash'es that are correct, so spell check can ignore the combination (do not split correct words with dash'es)
|
||||
/// </summary>
|
||||
private void GetTextWithoutUserWordsAndNames(List<string> replaceIds, List<string> replaceNames, string text)
|
||||
{
|
||||
string[] wordsWithDash = text.Split(SplitChars2, StringSplitOptions.RemoveEmptyEntries);
|
||||
foreach (string w in wordsWithDash)
|
||||
{
|
||||
if (w.Contains('-') && _doSpell.DoSpell(w) && !_wordsWithDashesOrPeriods.Contains(w))
|
||||
_wordsWithDashesOrPeriods.Add(w);
|
||||
}
|
||||
|
||||
if (text.Contains(new[] { '.', '-' }))
|
||||
{
|
||||
int i = 0;
|
||||
foreach (string wordWithDashesOrPeriods in _wordsWithDashesOrPeriods)
|
||||
{
|
||||
bool found = true;
|
||||
int startSearchIndex = 0;
|
||||
while (found)
|
||||
{
|
||||
int indexStart = text.IndexOf(wordWithDashesOrPeriods, startSearchIndex, StringComparison.Ordinal);
|
||||
|
||||
if (indexStart >= 0)
|
||||
{
|
||||
int endIndexPlus = indexStart + wordWithDashesOrPeriods.Length;
|
||||
bool startOk = indexStart == 0 || (@" (['""" + Environment.NewLine).Contains(text[indexStart - 1]);
|
||||
bool endOk = endIndexPlus == text.Length;
|
||||
if (!endOk && endIndexPlus < text.Length && @",!?:;. ])<'""".Contains(text[endIndexPlus]))
|
||||
endOk = true;
|
||||
if (startOk && endOk)
|
||||
{
|
||||
i++;
|
||||
string id = string.Format("_@{0}_", i);
|
||||
replaceIds.Add(id);
|
||||
replaceNames.Add(wordWithDashesOrPeriods);
|
||||
text = text.Remove(indexStart, wordWithDashesOrPeriods.Length).Insert(indexStart, id);
|
||||
}
|
||||
else
|
||||
{
|
||||
startSearchIndex = indexStart + 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
found = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public bool AddName(string word)
|
||||
{
|
||||
if (word == null || word.Length <= 1 || _namesEtcList.Contains(word))
|
||||
return false;
|
||||
|
||||
_namesEtcList.Add(word);
|
||||
_namesEtcListUppercase.Add(word.ToUpper());
|
||||
if (_languageName.StartsWith("en_", StringComparison.Ordinal) && !word.EndsWith('s'))
|
||||
{
|
||||
_namesEtcList.Add(word + "s");
|
||||
_namesEtcListUppercase.Add(word.ToUpper() + "S");
|
||||
}
|
||||
if (!word.EndsWith('s'))
|
||||
{
|
||||
_namesEtcListWithApostrophe.Add(word + "'s");
|
||||
_namesEtcListUppercase.Add(word.ToUpper() + "'S");
|
||||
}
|
||||
if (!word.EndsWith('\''))
|
||||
_namesEtcListWithApostrophe.Add(word + "'");
|
||||
|
||||
var namesList = new NamesList(Configuration.DictionariesFolder, _languageName, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl);
|
||||
namesList.Add(word);
|
||||
return true;
|
||||
}
|
||||
|
||||
public bool AddUserWord(string word)
|
||||
{
|
||||
if (word == null)
|
||||
return false;
|
||||
|
||||
word = word.Trim().ToLower();
|
||||
if (word.Length <= 1 || _userWordList.IndexOf(word) >= 0)
|
||||
return false;
|
||||
|
||||
if (word.Contains(' '))
|
||||
_userPhraseList.Add(word);
|
||||
else
|
||||
_userWordList.Add(word);
|
||||
Utilities.AddToUserDictionary(word, _languageName);
|
||||
return true;
|
||||
}
|
||||
|
||||
public bool HasName(string word)
|
||||
{
|
||||
return _namesEtcList.Contains(word) || (word.StartsWith('\'') || word.EndsWith('\'')) && _namesEtcList.Contains(word.Trim('\''));
|
||||
}
|
||||
|
||||
public bool HasNameExtended(string word, string text)
|
||||
{
|
||||
return _namesEtcListUppercase.Contains(word) || _namesEtcListWithApostrophe.Contains(word) || _namesList.IsInNamesEtcMultiWordList(text, word);
|
||||
}
|
||||
|
||||
public bool HasUserWord(string word)
|
||||
{
|
||||
string s = word.ToLower();
|
||||
return _userWordList.Contains(s) || (s.StartsWith('\'') || s.EndsWith('\'')) && _userWordList.Contains(s.Trim('\''));
|
||||
}
|
||||
|
||||
public static List<SpellCheckWord> Split(string s)
|
||||
{
|
||||
var list = new List<SpellCheckWord>();
|
||||
var sb = new StringBuilder();
|
||||
for (int i = 0; i < s.Length; i++)
|
||||
{
|
||||
if (SplitChars.Contains(s[i]))
|
||||
{
|
||||
if (sb.Length > 0)
|
||||
list.Add(new SpellCheckWord { Text = sb.ToString(), Index = i - sb.Length });
|
||||
sb.Clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.Append(s[i]);
|
||||
}
|
||||
}
|
||||
if (sb.Length > 0)
|
||||
list.Add(new SpellCheckWord { Text = sb.ToString(), Index = s.Length - 1 - sb.Length });
|
||||
return list;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
19
libse/SpellCheck/UndoObject.cs
Normal file
19
libse/SpellCheck/UndoObject.cs
Normal file
@ -0,0 +1,19 @@
|
||||
using Nikse.SubtitleEdit.Core.Enums;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Core.SpellCheck
|
||||
{
|
||||
public class UndoObject
|
||||
{
|
||||
public int CurrentIndex { get; set; }
|
||||
public string UndoText { get; set; }
|
||||
public string UndoWord { get; set; }
|
||||
public string CurrentWord { get; set; }
|
||||
public SpellCheckAction Action { get; set; }
|
||||
public Subtitle Subtitle { get; set; }
|
||||
public int NoOfSkippedWords { get; set; }
|
||||
public int NoOfChangedWords { get; set; }
|
||||
public int NoOfCorrectWords { get; set; }
|
||||
public int NoOfNamesEtc { get; set; }
|
||||
public int NoOfAddedWords { get; set; }
|
||||
}
|
||||
}
|
@ -24,6 +24,7 @@ using System.Net;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Windows.Forms;
|
||||
using Nikse.SubtitleEdit.Core.SpellCheck;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Forms
|
||||
{
|
||||
@ -5638,7 +5639,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
int index = -1;
|
||||
for (int i = 0; i < text.Length; i++)
|
||||
{
|
||||
if (Forms.SpellCheck.SplitChars.Contains(text[i]))
|
||||
if (SpellCheckWordLists.SplitChars.Contains(text[i]))
|
||||
{
|
||||
if (sb.Length > 0)
|
||||
{
|
||||
|
@ -1,5 +1,4 @@
|
||||
using Nikse.SubtitleEdit.Core;
|
||||
using Nikse.SubtitleEdit.Core.Dictionaries;
|
||||
using Nikse.SubtitleEdit.Core.Enums;
|
||||
using Nikse.SubtitleEdit.Logic;
|
||||
using Nikse.SubtitleEdit.Logic.SpellCheck;
|
||||
@ -7,54 +6,26 @@ using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Drawing;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
using System.Windows.Forms;
|
||||
using System.Xml;
|
||||
using Nikse.SubtitleEdit.Core.Interfaces;
|
||||
using Nikse.SubtitleEdit.Core.SpellCheck;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Forms
|
||||
{
|
||||
public sealed partial class SpellCheck : Form
|
||||
{
|
||||
private class UndoObject
|
||||
{
|
||||
public int CurrentIndex { get; set; }
|
||||
public string UndoText { get; set; }
|
||||
public string UndoWord { get; set; }
|
||||
public string CurrentWord { get; set; }
|
||||
public SpellCheckAction Action { get; set; }
|
||||
public Subtitle Subtitle { get; set; }
|
||||
public int NoOfSkippedWords { get; set; }
|
||||
public int NoOfChangedWords { get; set; }
|
||||
public int NoOfCorrectWords { get; set; }
|
||||
public int NoOfNamesEtc { get; set; }
|
||||
public int NoOfAddedWords { get; set; }
|
||||
}
|
||||
public sealed partial class SpellCheck : Form, IDoSpell
|
||||
{
|
||||
private List<UndoObject> _undoList = new List<UndoObject>();
|
||||
|
||||
private SpellCheckAction _action = SpellCheckAction.Skip;
|
||||
private List<string> _suggestions;
|
||||
private string _currentAction;
|
||||
public SpellCheckAction Action { get { return _action; } set { _action = value; } }
|
||||
public string ChangeWord { get { return textBoxWord.Text; } set { textBoxWord.Text = value; } }
|
||||
public string ChangeWholeText { get { return textBoxWholeText.Text; } }
|
||||
public bool AutoFixNames { get { return checkBoxAutoChangeNames.Checked; } }
|
||||
public bool AutoFixNames { get { return checkBoxAutoChangeNames.Checked; } }
|
||||
|
||||
public int CurrentLineIndex
|
||||
{
|
||||
get { return _currentIndex; }
|
||||
}
|
||||
|
||||
private NamesList _namesList;
|
||||
private HashSet<string> _namesEtcList = new HashSet<string>();
|
||||
private HashSet<string> _namesEtcMultiWordList = new HashSet<string>();
|
||||
private List<string> _namesEtcListUppercase = new List<string>();
|
||||
private List<string> _namesEtcListWithApostrophe = new List<string>();
|
||||
private SpellCheckWordLists _spellCheckWordLists;
|
||||
private List<string> _skipAllList = new List<string>();
|
||||
private List<string> _wordsWithDashesOrPeriods = new List<string>();
|
||||
|
||||
private Dictionary<string, string> _changeAllDictionary = new Dictionary<string, string>();
|
||||
private List<string> _userWordList = new List<string>();
|
||||
private List<string> _userPhraseList = new List<string>();
|
||||
private string _prefix = string.Empty;
|
||||
private string _postfix = string.Empty;
|
||||
private Hunspell _hunspell;
|
||||
@ -66,7 +37,6 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
private int _wordsIndex;
|
||||
private Subtitle _subtitle;
|
||||
private string _originalWord;
|
||||
|
||||
private int _noOfSkippedWords;
|
||||
private int _noOfChangedWords;
|
||||
private int _noOfCorrectWords;
|
||||
@ -75,13 +45,9 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
private bool _firstChange = true;
|
||||
private string _languageName;
|
||||
private Main _mainWindow;
|
||||
|
||||
private string _currentDictionary;
|
||||
|
||||
public const string SplitChars = " -.,?!:;\"“”()[]{}|<>/+\r\n¿¡…—–♪♫„“";
|
||||
|
||||
private static readonly char[] ExpectedChars = new[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '%', '&', '@', '$', '*', '=', '£', '#', '_', '½', '^' };
|
||||
private static readonly char[] SplitChars2 = new[] { ' ', '.', ',', '?', '!', ':', ';', '"', '“', '”', '(', ')', '[', ']', '{', '}', '|', '<', '>', '/', '+', '\r', '\n', '¿', '¡', '…', '—', '–', '♪', '♫', '„', '“' };
|
||||
private static readonly char[] ExpectedChars = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '%', '&', '@', '$', '*', '=', '£', '#', '_', '½', '^' };
|
||||
|
||||
public class SuggestionParameter
|
||||
{
|
||||
@ -283,7 +249,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
Configuration.Settings.Save();
|
||||
_languageName = LanguageString;
|
||||
string dictionary = Utilities.DictionaryFolder + _languageName;
|
||||
LoadDictionaries(Utilities.DictionaryFolder, dictionary);
|
||||
LoadDictionaries(Utilities.DictionaryFolder, dictionary, _languageName);
|
||||
_wordsIndex--;
|
||||
PrepareNextWord();
|
||||
}
|
||||
@ -445,38 +411,11 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
_skipAllList.Add(ChangeWord.ToUpper().Trim('\''));
|
||||
break;
|
||||
case SpellCheckAction.AddToDictionary:
|
||||
if (_userWordList.IndexOf(ChangeWord) < 0)
|
||||
{
|
||||
if (_spellCheckWordLists.AddUserWord(ChangeWord))
|
||||
_noOfAddedWords++;
|
||||
string s = ChangeWord.Trim().ToLower();
|
||||
if (s.Contains(' '))
|
||||
_userPhraseList.Add(s);
|
||||
else
|
||||
_userWordList.Add(s);
|
||||
Utilities.AddToUserDictionary(s, _languageName);
|
||||
}
|
||||
break;
|
||||
case SpellCheckAction.AddToNamesEtc:
|
||||
if (ChangeWord.Length > 1 && !_namesEtcList.Contains(ChangeWord))
|
||||
{
|
||||
_namesEtcList.Add(ChangeWord);
|
||||
_namesEtcListUppercase.Add(ChangeWord.ToUpper());
|
||||
if (_languageName.StartsWith("en_", StringComparison.Ordinal) && !ChangeWord.EndsWith('s'))
|
||||
{
|
||||
_namesEtcList.Add(ChangeWord + "s");
|
||||
_namesEtcListUppercase.Add(ChangeWord.ToUpper() + "S");
|
||||
}
|
||||
if (!ChangeWord.EndsWith('s'))
|
||||
{
|
||||
_namesEtcListWithApostrophe.Add(ChangeWord + "'s");
|
||||
_namesEtcListUppercase.Add(ChangeWord.ToUpper() + "'S");
|
||||
}
|
||||
if (!ChangeWord.EndsWith('\''))
|
||||
_namesEtcListWithApostrophe.Add(ChangeWord + "'");
|
||||
|
||||
var namesList = new NamesList(Configuration.DictionariesFolder, _languageName, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl);
|
||||
namesList.Add(ChangeWord);
|
||||
}
|
||||
_spellCheckWordLists.AddName(ChangeWord);
|
||||
break;
|
||||
case SpellCheckAction.ChangeWholeText:
|
||||
_mainWindow.ShowStatus(string.Format(Configuration.Settings.Language.Main.SpellCheckChangedXToY, _currentParagraph.Text.Replace(Environment.NewLine, " "), ChangeWholeText.Replace(Environment.NewLine, " ")));
|
||||
@ -503,25 +442,6 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
ShowActionInfo(_currentAction, textBoxWord.Text);
|
||||
}
|
||||
|
||||
public static bool IsWordInUserPhrases(List<string> userPhraseList, int index, List<SpellCheckWord> words)
|
||||
{
|
||||
string current = words[index].Text;
|
||||
string prev = "-";
|
||||
if (index > 0)
|
||||
prev = words[index - 1].Text;
|
||||
string next = "-";
|
||||
if (index < words.Count - 1)
|
||||
next = words[index + 1].Text;
|
||||
foreach (string userPhrase in userPhraseList)
|
||||
{
|
||||
if (userPhrase == current + " " + next)
|
||||
return true;
|
||||
if (userPhrase == prev + " " + current)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private void PrepareNextWord()
|
||||
{
|
||||
while (true)
|
||||
@ -580,8 +500,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
_currentWord = _currentWord.Substring(1);
|
||||
}
|
||||
}
|
||||
if (_namesEtcList.Contains(_currentWord)
|
||||
|| (_currentWord.StartsWith('\'') || _currentWord.EndsWith('\'')) && _namesEtcList.Contains(_currentWord.Trim('\'')))
|
||||
if (_spellCheckWordLists.HasName(_currentWord))
|
||||
{
|
||||
_noOfNamesEtc++;
|
||||
}
|
||||
@ -590,8 +509,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
{
|
||||
_noOfSkippedWords++;
|
||||
}
|
||||
else if (_userWordList.Contains(_currentWord.ToLower())
|
||||
|| (_currentWord.StartsWith('\'') || _currentWord.EndsWith('\'')) && _userWordList.Contains(_currentWord.Trim('\'').ToLower()))
|
||||
else if (_spellCheckWordLists.HasUserWord(_currentWord))
|
||||
{
|
||||
_noOfCorrectWords++;
|
||||
}
|
||||
@ -605,13 +523,11 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
_noOfChangedWords++;
|
||||
_mainWindow.CorrectWord(_changeAllDictionary[_currentWord], _currentParagraph, _currentWord.Trim('\''), ref _firstChange, -1);
|
||||
}
|
||||
else if (_namesEtcListUppercase.Contains(_currentWord)
|
||||
|| _namesEtcListWithApostrophe.Contains(_currentWord)
|
||||
|| _namesList.IsInNamesEtcMultiWordList(_currentParagraph.Text, _currentWord)) // TODO: Verify this!
|
||||
else if (_spellCheckWordLists.HasNameExtended(_currentWord, _currentParagraph.Text)) // TODO: Verify this!
|
||||
{
|
||||
_noOfNamesEtc++;
|
||||
}
|
||||
else if (IsWordInUserPhrases(_userPhraseList, _wordsIndex, _words))
|
||||
else if (_spellCheckWordLists.IsWordInUserPhrases(_wordsIndex, _words))
|
||||
{
|
||||
_noOfCorrectWords++;
|
||||
}
|
||||
@ -619,7 +535,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
{
|
||||
bool correct;
|
||||
|
||||
if (_prefix == "'" && _currentWord.Length >= 1 && (DoSpell(_prefix + _currentWord) || _userWordList.Contains(_prefix + _currentWord)))
|
||||
if (_prefix == "'" && _currentWord.Length >= 1 && (DoSpell(_prefix + _currentWord) || _spellCheckWordLists.HasUserWord(_prefix + _currentWord)))
|
||||
{
|
||||
correct = true;
|
||||
}
|
||||
@ -714,7 +630,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
DoAction(SpellCheckAction.ChangeAll);
|
||||
return;
|
||||
}
|
||||
if (AutoFixNames && _currentWord.Length > 1 && _namesEtcList.Contains(char.ToUpper(_currentWord[0]) + _currentWord.Substring(1)))
|
||||
if (AutoFixNames && _currentWord.Length > 1 && _spellCheckWordLists.HasName(char.ToUpper(_currentWord[0]) + _currentWord.Substring(1)))
|
||||
{
|
||||
ChangeWord = char.ToUpper(_currentWord[0]) + _currentWord.Substring(1);
|
||||
DoAction(SpellCheckAction.ChangeAll);
|
||||
@ -752,131 +668,12 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
}
|
||||
}
|
||||
|
||||
private static List<SpellCheckWord> Split(string s)
|
||||
{
|
||||
var list = new List<SpellCheckWord>();
|
||||
var sb = new StringBuilder();
|
||||
for (int i = 0; i < s.Length; i++)
|
||||
{
|
||||
if (SplitChars.Contains(s[i]))
|
||||
{
|
||||
if (sb.Length > 0)
|
||||
list.Add(new SpellCheckWord { Text = sb.ToString(), Index = i - sb.Length });
|
||||
sb.Clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.Append(s[i]);
|
||||
}
|
||||
}
|
||||
if (sb.Length > 0)
|
||||
list.Add(new SpellCheckWord { Text = sb.ToString(), Index = s.Length - 1 - sb.Length });
|
||||
return list;
|
||||
}
|
||||
|
||||
|
||||
private void SetWords(string s)
|
||||
{
|
||||
s = ReplaceHtmlTagsWithBlanks(s);
|
||||
s = ReplaceKnownWordsOrNamesWithBlanks(s);
|
||||
_words = Split(s);
|
||||
}
|
||||
|
||||
private string ReplaceKnownWordsOrNamesWithBlanks(string s)
|
||||
{
|
||||
var replaceIds = new List<string>();
|
||||
var replaceNames = new List<string>();
|
||||
GetTextWithoutUserWordsAndNames(replaceIds, replaceNames, s);
|
||||
foreach (string name in replaceNames)
|
||||
{
|
||||
int start = s.IndexOf(name, StringComparison.Ordinal);
|
||||
while (start >= 0)
|
||||
{
|
||||
bool startOk = start == 0 || SplitChars.Contains(s[start - 1]);
|
||||
if (startOk)
|
||||
{
|
||||
int end = start + name.Length;
|
||||
bool endOk = end >= s.Length || SplitChars.Contains(s[end]);
|
||||
if (endOk)
|
||||
s = s.Remove(start, name.Length).Insert(start, string.Empty.PadLeft(name.Length));
|
||||
}
|
||||
|
||||
if (start + 1 < s.Length)
|
||||
start = s.IndexOf(name, start + 1, StringComparison.Ordinal);
|
||||
else
|
||||
start = -1;
|
||||
}
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
private static string ReplaceHtmlTagsWithBlanks(string s)
|
||||
{
|
||||
int start = s.IndexOf('<');
|
||||
while (start >= 0)
|
||||
{
|
||||
int end = s.IndexOf('>', start + 1);
|
||||
if (end < start)
|
||||
break;
|
||||
int l = end - start + 1;
|
||||
s = s.Remove(start, l).Insert(start, string.Empty.PadLeft(l));
|
||||
end++;
|
||||
if (end >= s.Length)
|
||||
break;
|
||||
start = s.IndexOf('<', end);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Removes words with dash'es that are correct, so spell check can ignore the combination (do not split correct words with dash'es)
|
||||
/// </summary>
|
||||
private void GetTextWithoutUserWordsAndNames(List<string> replaceIds, List<string> replaceNames, string text)
|
||||
{
|
||||
string[] wordsWithDash = text.Split(SplitChars2, StringSplitOptions.RemoveEmptyEntries);
|
||||
foreach (string w in wordsWithDash)
|
||||
{
|
||||
if (w.Contains('-') && DoSpell(w) && !_wordsWithDashesOrPeriods.Contains(w))
|
||||
_wordsWithDashesOrPeriods.Add(w);
|
||||
}
|
||||
|
||||
if (text.Contains(new[] { '.', '-' }))
|
||||
{
|
||||
int i = 0;
|
||||
foreach (string wordWithDashesOrPeriods in _wordsWithDashesOrPeriods)
|
||||
{
|
||||
bool found = true;
|
||||
int startSearchIndex = 0;
|
||||
while (found)
|
||||
{
|
||||
int indexStart = text.IndexOf(wordWithDashesOrPeriods, startSearchIndex, StringComparison.Ordinal);
|
||||
|
||||
if (indexStart >= 0)
|
||||
{
|
||||
int endIndexPlus = indexStart + wordWithDashesOrPeriods.Length;
|
||||
bool startOk = indexStart == 0 || (@" (['""" + Environment.NewLine).Contains(text[indexStart - 1]);
|
||||
bool endOk = endIndexPlus == text.Length;
|
||||
if (!endOk && endIndexPlus < text.Length && @",!?:;. ])<'""".Contains(text[endIndexPlus]))
|
||||
endOk = true;
|
||||
if (startOk && endOk)
|
||||
{
|
||||
i++;
|
||||
string id = string.Format("_@{0}_", i);
|
||||
replaceIds.Add(id);
|
||||
replaceNames.Add(wordWithDashesOrPeriods);
|
||||
text = text.Remove(indexStart, wordWithDashesOrPeriods.Length).Insert(indexStart, id);
|
||||
}
|
||||
else
|
||||
{
|
||||
startSearchIndex = indexStart + 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
found = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
s = _spellCheckWordLists.ReplaceHtmlTagsWithBlanks(s);
|
||||
s = _spellCheckWordLists.ReplaceKnownWordsOrNamesWithBlanks(s);
|
||||
_words = SpellCheckWordLists.Split(s);
|
||||
}
|
||||
|
||||
private void ShowEndStatusMessage(string completedMessage)
|
||||
@ -935,9 +732,6 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
LanguageStructure.Main mainLanguage = Configuration.Settings.Language.Main;
|
||||
_mainWindow = mainWindow;
|
||||
|
||||
_namesEtcListUppercase = new List<string>();
|
||||
_namesEtcListWithApostrophe = new List<string>();
|
||||
|
||||
_skipAllList = new List<string>();
|
||||
|
||||
_noOfSkippedWords = 0;
|
||||
@ -972,7 +766,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
_languageName = LanguageAutoDetect.AutoDetectLanguageName(_languageName, subtitle);
|
||||
string dictionary = Utilities.DictionaryFolder + _languageName;
|
||||
|
||||
LoadDictionaries(dictionaryFolder, dictionary);
|
||||
LoadDictionaries(dictionaryFolder, dictionary, _languageName);
|
||||
|
||||
_currentIndex = 0;
|
||||
if (startLine >= 0 && startLine < _subtitle.Paragraphs.Count)
|
||||
@ -984,64 +778,11 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
PrepareNextWord();
|
||||
}
|
||||
|
||||
private void LoadDictionaries(string dictionaryFolder, string dictionary)
|
||||
private void LoadDictionaries(string dictionaryFolder, string dictionary, string languageName)
|
||||
{
|
||||
_changeAllDictionary = new Dictionary<string, string>();
|
||||
_skipAllList = new List<string>();
|
||||
_namesList = new NamesList(Configuration.DictionariesFolder, _languageName, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl);
|
||||
_namesEtcList = _namesList.GetNames();
|
||||
_namesEtcMultiWordList = _namesList.GetMultiNames();
|
||||
|
||||
foreach (string namesItem in _namesEtcList)
|
||||
_namesEtcListUppercase.Add(namesItem.ToUpper());
|
||||
|
||||
if (_languageName.StartsWith("en_", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
foreach (string namesItem in _namesEtcList)
|
||||
{
|
||||
if (!namesItem.EndsWith('s'))
|
||||
{
|
||||
_namesEtcListWithApostrophe.Add(namesItem + "'s");
|
||||
_namesEtcListWithApostrophe.Add(namesItem + "’s");
|
||||
}
|
||||
else if (!namesItem.EndsWith('\''))
|
||||
{
|
||||
_namesEtcListWithApostrophe.Add(namesItem + "'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_userWordList = new List<string>();
|
||||
_userPhraseList = new List<string>();
|
||||
if (File.Exists(dictionaryFolder + _languageName + "_user.xml"))
|
||||
{
|
||||
var userWordDictionary = new XmlDocument();
|
||||
userWordDictionary.Load(dictionaryFolder + _languageName + "_user.xml");
|
||||
foreach (XmlNode node in userWordDictionary.DocumentElement.SelectNodes("word"))
|
||||
{
|
||||
string word = node.InnerText.Trim().ToLower();
|
||||
if (word.Contains(' '))
|
||||
_userPhraseList.Add(word);
|
||||
else
|
||||
_userWordList.Add(word);
|
||||
}
|
||||
}
|
||||
|
||||
// Add names/userdic with "." or " " or "-"
|
||||
_wordsWithDashesOrPeriods = new List<string>();
|
||||
_wordsWithDashesOrPeriods.AddRange(_namesEtcMultiWordList);
|
||||
foreach (string name in _namesEtcList)
|
||||
{
|
||||
if (name.Contains(new[] { '.', '-' }))
|
||||
_wordsWithDashesOrPeriods.Add(name);
|
||||
}
|
||||
foreach (string word in _userWordList)
|
||||
{
|
||||
if (word.Contains(new[] { '.', '-' }))
|
||||
_wordsWithDashesOrPeriods.Add(word);
|
||||
}
|
||||
_wordsWithDashesOrPeriods.AddRange(_userPhraseList);
|
||||
|
||||
_spellCheckWordLists = new SpellCheckWordLists(dictionaryFolder, languageName, this);
|
||||
_changeAllDictionary = new Dictionary<string, string>();
|
||||
LoadHunspell(dictionary);
|
||||
}
|
||||
@ -1199,30 +940,10 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
_skipAllList.Remove(undo.UndoWord.ToUpper().Trim('\''));
|
||||
break;
|
||||
case SpellCheckAction.AddToDictionary:
|
||||
_userWordList.Remove(undo.UndoWord);
|
||||
_userPhraseList.Remove(undo.UndoWord);
|
||||
Utilities.RemoveFromUserDictionary(undo.UndoWord, _languageName);
|
||||
_spellCheckWordLists.RemoveUserWord(undo.UndoWord);
|
||||
break;
|
||||
case SpellCheckAction.AddToNamesEtc:
|
||||
if (undo.UndoWord.Length > 1 && _namesEtcList.Contains(undo.UndoWord))
|
||||
{
|
||||
_namesEtcList.Remove(undo.UndoWord);
|
||||
_namesEtcListUppercase.Remove(undo.UndoWord.ToUpper());
|
||||
if (_languageName.StartsWith("en_", StringComparison.Ordinal) && !undo.UndoWord.EndsWith('s'))
|
||||
{
|
||||
_namesEtcList.Remove(undo.UndoWord + "s");
|
||||
_namesEtcListUppercase.Remove(undo.UndoWord.ToUpper() + "S");
|
||||
}
|
||||
if (!undo.UndoWord.EndsWith('s'))
|
||||
{
|
||||
_namesEtcListWithApostrophe.Remove(undo.UndoWord + "'s");
|
||||
_namesEtcListUppercase.Remove(undo.UndoWord.ToUpper() + "'S");
|
||||
}
|
||||
if (!undo.UndoWord.EndsWith('\''))
|
||||
_namesEtcListWithApostrophe.Remove(undo.UndoWord + "'");
|
||||
|
||||
_namesList.Remove(undo.UndoWord);
|
||||
}
|
||||
_spellCheckWordLists.RemoveName(undo.UndoWord);
|
||||
break;
|
||||
case SpellCheckAction.ChangeWholeText:
|
||||
_subtitle = _mainWindow.UndoFromSpellCheck(undo.Subtitle);
|
||||
|
Loading…
Reference in New Issue
Block a user