2016-02-02 20:40:47 +01:00
|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.IO;
|
2018-12-23 19:14:41 +01:00
|
|
|
|
using System.Linq;
|
2016-02-02 20:40:47 +01:00
|
|
|
|
using System.Text;
|
|
|
|
|
using System.Xml;
|
|
|
|
|
using Nikse.SubtitleEdit.Core.Dictionaries;
|
|
|
|
|
using Nikse.SubtitleEdit.Core.Interfaces;
|
|
|
|
|
|
|
|
|
|
namespace Nikse.SubtitleEdit.Core.SpellCheck
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
public class SpellCheckWordLists
|
|
|
|
|
{
|
|
|
|
|
|
2019-09-14 18:06:26 +02:00
|
|
|
|
public static readonly HashSet<char> SplitChars = new HashSet<char>
|
|
|
|
|
{
|
|
|
|
|
' ', '-', '.', ',', '?', '!', ':', ';', '\\', '"', '“', '”', '(', ')', '[', ']', '{', '}', '|', '<', '>', '/', '+', '\r', '\n',
|
|
|
|
|
'¿', '¡', '…', '—', '–', '♪', '♫', '„', '«', '»', '‹', '›', '؛', '،', '؟', '\u00A0', '\u1680', '\u2000', '\u2001', '\u2002', '\u2003',
|
|
|
|
|
'\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A', '\u200B', '\u200E', '\u200F', '\u2028', '\u2029', '\u202A',
|
|
|
|
|
'\u202B', '\u202C', '\u202D', '\u202E', '\u202F', '\u3000', '\uFEFF'
|
|
|
|
|
};
|
2016-02-02 20:40:47 +01:00
|
|
|
|
|
2016-02-03 21:39:36 +01:00
|
|
|
|
private static readonly char[] PeriodAndDash = { '.', '-' };
|
2017-08-03 18:31:21 +02:00
|
|
|
|
private static readonly char[] SplitChars2 = { ' ', '.', ',', '?', '!', ':', ';', '"', '“', '”', '(', ')', '[', ']', '{', '}', '|', '<', '>', '/', '+', '\r', '\n', '¿', '¡', '…', '—', '–', '♪', '♫', '„', '«', '»', '‹', '›', '؛', '،', '؟' };
|
2016-02-03 21:39:36 +01:00
|
|
|
|
|
2017-05-08 20:45:28 +02:00
|
|
|
|
private readonly NameList _nameList;
|
2017-04-19 23:27:16 +02:00
|
|
|
|
private readonly HashSet<string> _names;
|
|
|
|
|
private readonly HashSet<string> _namesListUppercase = new HashSet<string>();
|
|
|
|
|
private readonly HashSet<string> _namesListWithApostrophe = new HashSet<string>();
|
2017-03-11 16:17:02 +01:00
|
|
|
|
private readonly HashSet<string> _wordsWithDashesOrPeriods = new HashSet<string>();
|
|
|
|
|
private readonly HashSet<string> _userWordList = new HashSet<string>();
|
|
|
|
|
private readonly HashSet<string> _userPhraseList = new HashSet<string>();
|
2018-12-23 19:14:41 +01:00
|
|
|
|
private readonly string _dictionaryFolder;
|
|
|
|
|
private HashSet<string> _skipAllList = new HashSet<string>();
|
|
|
|
|
private readonly Dictionary<string, string> _useAlwaysList = new Dictionary<string, string>();
|
2016-02-02 20:40:47 +01:00
|
|
|
|
private readonly string _languageName;
|
|
|
|
|
private readonly IDoSpell _doSpell;
|
|
|
|
|
|
|
|
|
|
public SpellCheckWordLists(string dictionaryFolder, string languageName, IDoSpell doSpell)
|
|
|
|
|
{
|
2018-12-23 19:14:41 +01:00
|
|
|
|
_dictionaryFolder = dictionaryFolder ?? throw new NullReferenceException(nameof(dictionaryFolder));
|
|
|
|
|
_languageName = languageName ?? throw new NullReferenceException(nameof(languageName));
|
|
|
|
|
_doSpell = doSpell ?? throw new NullReferenceException(nameof(doSpell));
|
2017-05-08 20:45:28 +02:00
|
|
|
|
_nameList = new NameList(Configuration.DictionariesDirectory, languageName, Configuration.Settings.WordLists.UseOnlineNames, Configuration.Settings.WordLists.NamesUrl);
|
|
|
|
|
_names = _nameList.GetNames();
|
|
|
|
|
var namesMultiWordList = _nameList.GetMultiNames();
|
2018-12-23 19:14:41 +01:00
|
|
|
|
if (Configuration.Settings.Tools.RememberUseAlwaysList)
|
|
|
|
|
{
|
|
|
|
|
LoadUseAlwaysList();
|
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
|
2017-04-19 23:27:16 +02:00
|
|
|
|
foreach (string namesItem in _names)
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2019-01-21 09:53:15 +01:00
|
|
|
|
_namesListUppercase.Add(namesItem.ToUpperInvariant());
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
|
|
|
|
|
if (languageName.StartsWith("en_", StringComparison.OrdinalIgnoreCase))
|
|
|
|
|
{
|
2017-04-19 23:27:16 +02:00
|
|
|
|
foreach (string namesItem in _names)
|
2016-02-02 20:40:47 +01:00
|
|
|
|
{
|
|
|
|
|
if (!namesItem.EndsWith('s'))
|
|
|
|
|
{
|
2017-04-19 23:27:16 +02:00
|
|
|
|
_namesListWithApostrophe.Add(namesItem + "'s");
|
|
|
|
|
_namesListWithApostrophe.Add(namesItem + "’s");
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
else if (!namesItem.EndsWith('\''))
|
|
|
|
|
{
|
2017-04-19 23:27:16 +02:00
|
|
|
|
_namesListWithApostrophe.Add(namesItem + "'");
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (File.Exists(dictionaryFolder + languageName + "_user.xml"))
|
|
|
|
|
{
|
|
|
|
|
var userWordDictionary = new XmlDocument();
|
|
|
|
|
userWordDictionary.Load(dictionaryFolder + languageName + "_user.xml");
|
2019-01-31 19:46:48 +01:00
|
|
|
|
var xmlNodeList = userWordDictionary.DocumentElement?.SelectNodes("word");
|
|
|
|
|
if (xmlNodeList != null)
|
2016-02-02 20:40:47 +01:00
|
|
|
|
{
|
2019-01-31 19:46:48 +01:00
|
|
|
|
foreach (XmlNode node in xmlNodeList)
|
2016-02-02 20:40:47 +01:00
|
|
|
|
{
|
2019-01-31 19:46:48 +01:00
|
|
|
|
string word = node.InnerText.Trim().ToLowerInvariant();
|
|
|
|
|
if (word.Contains(' '))
|
2016-02-02 20:40:47 +01:00
|
|
|
|
{
|
2019-01-31 19:46:48 +01:00
|
|
|
|
_userPhraseList.Add(word);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
_userWordList.Add(word);
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Add names/userdic with "." or " " or "-"
|
2017-04-19 23:27:16 +02:00
|
|
|
|
foreach (var word in namesMultiWordList)
|
2017-03-11 16:17:02 +01:00
|
|
|
|
{
|
|
|
|
|
if (word.Contains(PeriodAndDash))
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2017-03-11 16:17:02 +01:00
|
|
|
|
_wordsWithDashesOrPeriods.Add(word);
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2017-03-11 16:17:02 +01:00
|
|
|
|
}
|
2017-04-19 23:27:16 +02:00
|
|
|
|
foreach (string name in _names)
|
2016-02-02 20:40:47 +01:00
|
|
|
|
{
|
2016-02-03 21:39:36 +01:00
|
|
|
|
if (name.Contains(PeriodAndDash))
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
_wordsWithDashesOrPeriods.Add(name);
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
foreach (string word in _userWordList)
|
|
|
|
|
{
|
2016-02-03 21:39:36 +01:00
|
|
|
|
if (word.Contains(PeriodAndDash))
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
_wordsWithDashesOrPeriods.Add(word);
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
2017-03-11 16:17:02 +01:00
|
|
|
|
foreach (var phrase in _userPhraseList)
|
|
|
|
|
{
|
|
|
|
|
if (phrase.Contains(PeriodAndDash))
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2017-03-11 16:17:02 +01:00
|
|
|
|
_wordsWithDashesOrPeriods.Add(phrase);
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2017-03-11 16:17:02 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
|
2018-12-23 19:14:41 +01:00
|
|
|
|
public Dictionary<string, string> GetUseAlwaysList()
|
|
|
|
|
{
|
|
|
|
|
return new Dictionary<string, string>(_useAlwaysList);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void LoadUseAlwaysList()
|
|
|
|
|
{
|
|
|
|
|
if (!Configuration.Settings.Tools.RememberUseAlwaysList)
|
|
|
|
|
{
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var fileName = GetUseAlwaysListFileName();
|
|
|
|
|
var xmlDoc = new XmlDocument();
|
|
|
|
|
if (File.Exists(fileName))
|
|
|
|
|
{
|
|
|
|
|
xmlDoc.Load(fileName);
|
|
|
|
|
var xmlNodeList = xmlDoc.DocumentElement?.SelectNodes("Pair");
|
|
|
|
|
if (xmlNodeList != null)
|
|
|
|
|
{
|
|
|
|
|
foreach (XmlNode item in xmlNodeList)
|
|
|
|
|
{
|
|
|
|
|
if (item.Attributes?["from"] != null && item.Attributes["to"] != null)
|
|
|
|
|
{
|
|
|
|
|
var to = item.Attributes["to"].Value;
|
|
|
|
|
var from = item.Attributes["from"].Value;
|
|
|
|
|
if (!_useAlwaysList.ContainsKey(from))
|
|
|
|
|
{
|
|
|
|
|
_useAlwaysList.Add(from, to);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
xmlDoc.LoadXml("<UseAlways></UseAlways>");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private string GetUseAlwaysListFileName()
|
|
|
|
|
{
|
|
|
|
|
return Path.Combine(_dictionaryFolder, _languageName + "_UseAlways.xml");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void UseAlwaysListAdd(string newKey, string newValue)
|
|
|
|
|
{
|
|
|
|
|
SaveUseAlwaysList(newKey, newValue);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void UseAlwaysListRemove(string key)
|
|
|
|
|
{
|
|
|
|
|
SaveUseAlwaysList(null, null, key);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void SaveUseAlwaysList(string newKey = null, string newValue = null, string oldKey = null)
|
|
|
|
|
{
|
|
|
|
|
if (!Configuration.Settings.Tools.RememberUseAlwaysList)
|
|
|
|
|
{
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var xmlDoc = new XmlDocument();
|
|
|
|
|
xmlDoc.LoadXml("<UseAlways></UseAlways>");
|
|
|
|
|
|
|
|
|
|
if (newKey != null && newValue != null && !_useAlwaysList.ContainsKey(newKey.Trim()))
|
|
|
|
|
{
|
|
|
|
|
_useAlwaysList.Add(newKey.Trim(), newValue.Trim());
|
|
|
|
|
}
|
|
|
|
|
if (oldKey != null && _useAlwaysList.ContainsKey(oldKey.Trim()))
|
|
|
|
|
{
|
|
|
|
|
_useAlwaysList.Remove(oldKey.Trim());
|
|
|
|
|
}
|
|
|
|
|
_skipAllList = new HashSet<string>(_skipAllList.OrderBy(p => p).ToList());
|
|
|
|
|
|
|
|
|
|
foreach (KeyValuePair<string, string> kvp in _useAlwaysList)
|
|
|
|
|
{
|
|
|
|
|
XmlNode node = xmlDoc.CreateElement("Pair");
|
|
|
|
|
var f = xmlDoc.CreateAttribute("from");
|
|
|
|
|
f.Value = kvp.Key;
|
|
|
|
|
var t = xmlDoc.CreateAttribute("to");
|
|
|
|
|
t.Value = kvp.Value;
|
2019-01-31 19:46:48 +01:00
|
|
|
|
if (node.Attributes != null)
|
|
|
|
|
{
|
|
|
|
|
node.Attributes.Append(f);
|
|
|
|
|
node.Attributes.Append(t);
|
|
|
|
|
}
|
|
|
|
|
xmlDoc.DocumentElement?.AppendChild(node);
|
2018-12-23 19:14:41 +01:00
|
|
|
|
}
|
|
|
|
|
xmlDoc.Save(GetUseAlwaysListFileName());
|
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
|
|
|
|
|
public void RemoveUserWord(string word)
|
|
|
|
|
{
|
|
|
|
|
_userWordList.Remove(word);
|
|
|
|
|
_userPhraseList.Remove(word);
|
|
|
|
|
Utilities.RemoveFromUserDictionary(word, _languageName);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void RemoveName(string word)
|
|
|
|
|
{
|
2017-04-19 23:27:16 +02:00
|
|
|
|
if (word == null || word.Length <= 1 || !_names.Contains(word))
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
return;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
|
2017-04-19 23:27:16 +02:00
|
|
|
|
_names.Remove(word);
|
2019-01-21 09:53:15 +01:00
|
|
|
|
_namesListUppercase.Remove(word.ToUpperInvariant());
|
2016-02-02 20:40:47 +01:00
|
|
|
|
if (_languageName.StartsWith("en_", StringComparison.Ordinal) && !word.EndsWith('s'))
|
|
|
|
|
{
|
2017-04-19 23:27:16 +02:00
|
|
|
|
_names.Remove(word + "s");
|
2019-01-21 09:53:15 +01:00
|
|
|
|
_namesListUppercase.Remove(word.ToUpperInvariant() + "S");
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
if (!word.EndsWith('s'))
|
|
|
|
|
{
|
2017-04-19 23:27:16 +02:00
|
|
|
|
_namesListWithApostrophe.Remove(word + "'s");
|
2019-01-23 20:27:53 +01:00
|
|
|
|
_namesListUppercase.Remove(word.ToUpperInvariant() + "'S");
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
if (!word.EndsWith('\''))
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2017-04-19 23:27:16 +02:00
|
|
|
|
_namesListWithApostrophe.Remove(word + "'");
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
|
2017-05-08 20:45:28 +02:00
|
|
|
|
_nameList.Remove(word);
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public string ReplaceKnownWordsOrNamesWithBlanks(string s)
|
|
|
|
|
{
|
|
|
|
|
var replaceIds = new List<string>();
|
|
|
|
|
var replaceNames = new List<string>();
|
|
|
|
|
GetTextWithoutUserWordsAndNames(replaceIds, replaceNames, s);
|
|
|
|
|
foreach (string name in replaceNames)
|
|
|
|
|
{
|
|
|
|
|
int start = s.IndexOf(name, StringComparison.Ordinal);
|
|
|
|
|
while (start >= 0)
|
|
|
|
|
{
|
2018-12-16 21:28:21 +01:00
|
|
|
|
bool startOk = start == 0 || SplitChars.Contains(s[start - 1]) || char.IsControl(s[start - 1]);
|
2016-02-02 20:40:47 +01:00
|
|
|
|
if (startOk)
|
|
|
|
|
{
|
|
|
|
|
int end = start + name.Length;
|
2018-12-16 21:28:21 +01:00
|
|
|
|
bool endOk = end >= s.Length || SplitChars.Contains(s[end]) || char.IsControl(s[end]);
|
2016-02-02 20:40:47 +01:00
|
|
|
|
if (endOk)
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
s = s.Remove(start, name.Length).Insert(start, string.Empty.PadLeft(name.Length));
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (start + 1 < s.Length)
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
start = s.IndexOf(name, start + 1, StringComparison.Ordinal);
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
else
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
start = -1;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public string ReplaceHtmlTagsWithBlanks(string s)
|
|
|
|
|
{
|
|
|
|
|
int start = s.IndexOf('<');
|
|
|
|
|
while (start >= 0)
|
|
|
|
|
{
|
|
|
|
|
int end = s.IndexOf('>', start + 1);
|
|
|
|
|
if (end < start)
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
break;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
|
|
|
|
|
2016-02-02 20:40:47 +01:00
|
|
|
|
int l = end - start + 1;
|
|
|
|
|
s = s.Remove(start, l).Insert(start, string.Empty.PadLeft(l));
|
|
|
|
|
end++;
|
|
|
|
|
if (end >= s.Length)
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
break;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
|
|
|
|
|
2016-02-02 20:40:47 +01:00
|
|
|
|
start = s.IndexOf('<', end);
|
|
|
|
|
}
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-18 07:55:06 +01:00
|
|
|
|
public string ReplaceAssTagsWithBlanks(string s)
|
|
|
|
|
{
|
|
|
|
|
int start = s.IndexOf("{\\", StringComparison.Ordinal);
|
|
|
|
|
int end = s.IndexOf('}');
|
|
|
|
|
if (start < 0 || end < 0 || end < start)
|
|
|
|
|
{
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (start >= 0)
|
|
|
|
|
{
|
|
|
|
|
end = s.IndexOf('}', start + 1);
|
|
|
|
|
if (end < start)
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2018-11-18 07:55:06 +01:00
|
|
|
|
break;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
|
|
|
|
|
2018-11-18 07:55:06 +01:00
|
|
|
|
int l = end - start + 1;
|
|
|
|
|
s = s.Remove(start, l).Insert(start, string.Empty.PadLeft(l));
|
|
|
|
|
end++;
|
|
|
|
|
if (end >= s.Length)
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2018-11-18 07:55:06 +01:00
|
|
|
|
break;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
|
|
|
|
|
2018-11-18 07:55:06 +01:00
|
|
|
|
start = s.IndexOf("{\\", end, StringComparison.Ordinal);
|
|
|
|
|
}
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-02 20:40:47 +01:00
|
|
|
|
public bool IsWordInUserPhrases(int index, List<SpellCheckWord> words)
|
|
|
|
|
{
|
|
|
|
|
string current = words[index].Text;
|
|
|
|
|
string prev = "-";
|
|
|
|
|
if (index > 0)
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
prev = words[index - 1].Text;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
|
|
|
|
|
2016-02-02 20:40:47 +01:00
|
|
|
|
string next = "-";
|
|
|
|
|
if (index < words.Count - 1)
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
next = words[index + 1].Text;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
|
|
|
|
|
2016-02-02 20:40:47 +01:00
|
|
|
|
foreach (string userPhrase in _userPhraseList)
|
|
|
|
|
{
|
|
|
|
|
if (userPhrase == current + " " + next)
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
return true;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
|
|
|
|
|
2016-02-02 20:40:47 +01:00
|
|
|
|
if (userPhrase == prev + " " + current)
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
return true;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Removes words with dash'es that are correct, so spell check can ignore the combination (do not split correct words with dash'es)
|
|
|
|
|
/// </summary>
|
|
|
|
|
private void GetTextWithoutUserWordsAndNames(List<string> replaceIds, List<string> replaceNames, string text)
|
|
|
|
|
{
|
|
|
|
|
string[] wordsWithDash = text.Split(SplitChars2, StringSplitOptions.RemoveEmptyEntries);
|
|
|
|
|
foreach (string w in wordsWithDash)
|
|
|
|
|
{
|
|
|
|
|
if (w.Contains('-') && _doSpell.DoSpell(w) && !_wordsWithDashesOrPeriods.Contains(w))
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
_wordsWithDashesOrPeriods.Add(w);
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
|
2016-02-03 21:39:36 +01:00
|
|
|
|
if (text.Contains(PeriodAndDash))
|
2016-02-02 20:40:47 +01:00
|
|
|
|
{
|
|
|
|
|
int i = 0;
|
|
|
|
|
foreach (string wordWithDashesOrPeriods in _wordsWithDashesOrPeriods)
|
|
|
|
|
{
|
|
|
|
|
bool found = true;
|
|
|
|
|
int startSearchIndex = 0;
|
|
|
|
|
while (found)
|
|
|
|
|
{
|
|
|
|
|
int indexStart = text.IndexOf(wordWithDashesOrPeriods, startSearchIndex, StringComparison.Ordinal);
|
|
|
|
|
|
|
|
|
|
if (indexStart >= 0)
|
|
|
|
|
{
|
|
|
|
|
int endIndexPlus = indexStart + wordWithDashesOrPeriods.Length;
|
2016-02-03 21:39:36 +01:00
|
|
|
|
bool startOk = indexStart == 0 || (@" (['""" + "\r\n").Contains(text[indexStart - 1]);
|
2016-02-02 20:40:47 +01:00
|
|
|
|
bool endOk = endIndexPlus == text.Length;
|
|
|
|
|
if (!endOk && endIndexPlus < text.Length && @",!?:;. ])<'""".Contains(text[endIndexPlus]))
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
endOk = true;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
|
|
|
|
|
2016-02-02 20:40:47 +01:00
|
|
|
|
if (startOk && endOk)
|
|
|
|
|
{
|
|
|
|
|
i++;
|
2019-01-28 19:35:15 +01:00
|
|
|
|
string id = $"_@{i}_";
|
2016-02-02 20:40:47 +01:00
|
|
|
|
replaceIds.Add(id);
|
|
|
|
|
replaceNames.Add(wordWithDashesOrPeriods);
|
|
|
|
|
text = text.Remove(indexStart, wordWithDashesOrPeriods.Length).Insert(indexStart, id);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
startSearchIndex = indexStart + 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
found = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public bool AddName(string word)
|
|
|
|
|
{
|
2017-04-19 23:27:16 +02:00
|
|
|
|
if (string.IsNullOrEmpty(word) || _names.Contains(word))
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
return false;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
|
2017-04-19 23:27:16 +02:00
|
|
|
|
_names.Add(word);
|
2019-01-21 09:53:15 +01:00
|
|
|
|
_namesListUppercase.Add(word.ToUpperInvariant());
|
2016-02-02 20:40:47 +01:00
|
|
|
|
if (_languageName.StartsWith("en_", StringComparison.Ordinal) && !word.EndsWith('s'))
|
|
|
|
|
{
|
2017-04-19 23:27:16 +02:00
|
|
|
|
_names.Add(word + "s");
|
2019-01-21 09:53:15 +01:00
|
|
|
|
_namesListUppercase.Add(word.ToUpperInvariant() + "S");
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
if (!word.EndsWith('s'))
|
|
|
|
|
{
|
2017-04-19 23:27:16 +02:00
|
|
|
|
_namesListWithApostrophe.Add(word + "'s");
|
2019-01-21 09:53:15 +01:00
|
|
|
|
_namesListUppercase.Add(word.ToUpperInvariant() + "'S");
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
if (!word.EndsWith('\''))
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2017-04-19 23:27:16 +02:00
|
|
|
|
_namesListWithApostrophe.Add(word + "'");
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
|
2018-12-29 21:51:25 +01:00
|
|
|
|
_wordsWithDashesOrPeriods.Add(word);
|
|
|
|
|
|
2017-05-07 16:26:23 +02:00
|
|
|
|
var namesList = new NameList(Configuration.DictionariesDirectory, _languageName, Configuration.Settings.WordLists.UseOnlineNames, Configuration.Settings.WordLists.NamesUrl);
|
2016-02-02 20:40:47 +01:00
|
|
|
|
namesList.Add(word);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public bool AddUserWord(string word)
|
|
|
|
|
{
|
|
|
|
|
if (word == null)
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
return false;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
|
2019-01-21 09:53:15 +01:00
|
|
|
|
word = word.Trim().ToLowerInvariant();
|
2017-03-11 16:17:02 +01:00
|
|
|
|
if (word.Length == 0 || _userWordList.Contains(word))
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
return false;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
|
|
|
|
|
if (word.Contains(' '))
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
_userPhraseList.Add(word);
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-02 20:40:47 +01:00
|
|
|
|
else
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
_userWordList.Add(word);
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
|
|
|
|
|
2016-02-02 20:40:47 +01:00
|
|
|
|
Utilities.AddToUserDictionary(word, _languageName);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public bool HasName(string word)
|
|
|
|
|
{
|
2017-04-19 23:27:16 +02:00
|
|
|
|
return _names.Contains(word) || ((word.StartsWith('\'') || word.EndsWith('\'')) && _names.Contains(word.Trim('\'')));
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public bool HasNameExtended(string word, string text)
|
|
|
|
|
{
|
2017-05-08 20:45:28 +02:00
|
|
|
|
return _namesListUppercase.Contains(word) || _namesListWithApostrophe.Contains(word) || _nameList.IsInNamesMultiWordList(text, word);
|
2016-02-02 20:40:47 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public bool HasUserWord(string word)
|
|
|
|
|
{
|
2019-01-21 09:53:15 +01:00
|
|
|
|
string s = word.ToLowerInvariant();
|
2016-02-02 20:40:47 +01:00
|
|
|
|
return _userWordList.Contains(s) || (s.StartsWith('\'') || s.EndsWith('\'')) && _userWordList.Contains(s.Trim('\''));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static List<SpellCheckWord> Split(string s)
|
|
|
|
|
{
|
|
|
|
|
var list = new List<SpellCheckWord>();
|
|
|
|
|
var sb = new StringBuilder();
|
|
|
|
|
for (int i = 0; i < s.Length; i++)
|
|
|
|
|
{
|
2018-12-16 21:28:21 +01:00
|
|
|
|
if (SplitChars.Contains(s[i]) || char.IsControl(s[i]))
|
2016-02-02 20:40:47 +01:00
|
|
|
|
{
|
|
|
|
|
if (sb.Length > 0)
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-02 20:40:47 +01:00
|
|
|
|
list.Add(new SpellCheckWord { Text = sb.ToString(), Index = i - sb.Length });
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
|
|
|
|
|
2016-02-02 20:40:47 +01:00
|
|
|
|
sb.Clear();
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
sb.Append(s[i]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (sb.Length > 0)
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2016-02-03 12:09:02 +01:00
|
|
|
|
list.Add(new SpellCheckWord { Text = sb.ToString(), Index = s.Length - sb.Length });
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
|
|
|
|
|
2016-02-02 20:40:47 +01:00
|
|
|
|
return list;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|