mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-10-27 14:32:35 +01:00
Work on wordlists
This commit is contained in:
parent
df5a89d768
commit
e3bdff09c7
@ -191,6 +191,7 @@
|
||||
<word>cyanotic</word>
|
||||
<word>darmstadtium</word>
|
||||
<word>darndest</word>
|
||||
<word>dealmaking</word>
|
||||
<word>debride</word>
|
||||
<word>debridement</word>
|
||||
<word>decompensating</word>
|
||||
@ -418,6 +419,7 @@
|
||||
<word>mycelial</word>
|
||||
<word>nah</word>
|
||||
<word>namaste</word>
|
||||
<word>narrowminded</word>
|
||||
<word>nasties</word>
|
||||
<word>neighbour</word>
|
||||
<word>neighbourhood</word>
|
||||
@ -499,6 +501,7 @@
|
||||
<word>polenta</word>
|
||||
<word>polonium</word>
|
||||
<word>polysulfide</word>
|
||||
<word>postsurgical</word>
|
||||
<word>postulator</word>
|
||||
<word>potassium</word>
|
||||
<word>praseodymium</word>
|
||||
@ -511,6 +514,7 @@
|
||||
<word>pseudoachondroplasia</word>
|
||||
<word>pupillary</word>
|
||||
<word>purée</word>
|
||||
<word>pushback</word>
|
||||
<word>radium</word>
|
||||
<word>radon</word>
|
||||
<word>ragdoll</word>
|
||||
@ -576,6 +580,7 @@
|
||||
<word>slipspace</word>
|
||||
<word>smartphone</word>
|
||||
<word>smartphones</word>
|
||||
<word>snakelet</word>
|
||||
<word>snuck</word>
|
||||
<word>sociopathic</word>
|
||||
<word>sodium</word>
|
||||
@ -603,6 +608,7 @@
|
||||
<word>stepmom</word>
|
||||
<word>stereotactic</word>
|
||||
<word>sternotomy</word>
|
||||
<word>storyboarded</word>
|
||||
<word>strontium</word>
|
||||
<word>subclavian</word>
|
||||
<word>subdural hematoma</word>
|
||||
@ -708,6 +714,7 @@
|
||||
<word>where'd</word>
|
||||
<word>where're</word>
|
||||
<word>which</word>
|
||||
<word>whodunit</word>
|
||||
<word>why'd</word>
|
||||
<word>why's</word>
|
||||
<word>wizarding</word>
|
||||
|
@ -1437,6 +1437,7 @@
|
||||
<name>Rafferty</name>
|
||||
<name>Raiden</name>
|
||||
<name>Raina</name>
|
||||
<name>Rajan</name>
|
||||
<name>Ramiro</name>
|
||||
<name>Rashad</name>
|
||||
<name>Rayan</name>
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,6 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using Nikse.SubtitleEdit.Core.Common;
|
||||
|
||||
@ -38,7 +39,13 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
|
||||
var s = input;
|
||||
var check = s;
|
||||
var spaces = new List<int>();
|
||||
for (int i = 0; i < words.Length; i++)
|
||||
|
||||
if (words.Contains(input))
|
||||
{
|
||||
return input;
|
||||
}
|
||||
|
||||
for (var i = 0; i < words.Length; i++)
|
||||
{
|
||||
var w = words[i];
|
||||
if (w.Length >= input.Length)
|
||||
@ -77,5 +84,32 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
|
||||
|
||||
return s.Trim();
|
||||
}
|
||||
|
||||
public static string[] LoadWordSplitList(string threeLetterIsoLanguageName, NameList nameList)
|
||||
{
|
||||
var fileName = $"{Configuration.DictionariesDirectory}{threeLetterIsoLanguageName}_WordSplitList.txt";
|
||||
if (!File.Exists(fileName))
|
||||
{
|
||||
return Array.Empty<string>();
|
||||
}
|
||||
|
||||
var wordList = File.ReadAllText(fileName).SplitToLines().Where(p => p.Trim().Length > 0).ToList();
|
||||
|
||||
if (threeLetterIsoLanguageName == "eng")
|
||||
{
|
||||
wordList.AddRange(new List<string>
|
||||
{
|
||||
// Ignore list
|
||||
"Andor", "honour", "putain", "whoah", "eastside", "Starpath", "comlink"
|
||||
});
|
||||
}
|
||||
|
||||
if (nameList != null)
|
||||
{
|
||||
wordList.AddRange(nameList.GetNames().Where(p => p.Length > 4));
|
||||
}
|
||||
|
||||
return wordList.OrderByDescending(p => p.Length).ToArray();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -340,14 +340,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
_wordSplitListLanguage = languageName;
|
||||
var threeLetterIsoLanguageName = Iso639Dash2LanguageCode.GetThreeLetterCodeFromTwoLetterCode(twoLetterLanguageName);
|
||||
var fileName = $"{Configuration.DictionariesDirectory}{threeLetterIsoLanguageName}_WordSplitList.txt";
|
||||
if (!File.Exists(fileName))
|
||||
{
|
||||
return Array.Empty<string>();
|
||||
}
|
||||
|
||||
var wordList = File.ReadAllText(fileName).SplitToLines().Where(p => p.Trim().Length > 0).ToList();
|
||||
return wordList.ToArray();
|
||||
return StringWithoutSpaceSplitToWords.LoadWordSplitList(threeLetterIsoLanguageName, null);
|
||||
}
|
||||
|
||||
private void FillSpellCheckDictionaries(string languageName)
|
||||
|
@ -313,7 +313,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
_nameListWithApostrophe = new HashSet<string>();
|
||||
var nameListWithPeriods = new List<string>();
|
||||
_abbreviationList = new HashSet<string>();
|
||||
_wordSplitList = LoadWordSplitList(threeLetterIsoLanguageName, _nameListObj);
|
||||
_wordSplitList = StringWithoutSpaceSplitToWords.LoadWordSplitList(threeLetterIsoLanguageName, _nameListObj);
|
||||
|
||||
var isEnglish = threeLetterIsoLanguageName.Equals("eng", StringComparison.OrdinalIgnoreCase);
|
||||
foreach (var name in _nameList)
|
||||
@ -415,19 +415,6 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
}
|
||||
}
|
||||
|
||||
private static string[] LoadWordSplitList(string threeLetterIsoLanguageName, NameList nameList)
|
||||
{
|
||||
var fileName = $"{Configuration.DictionariesDirectory}{threeLetterIsoLanguageName}_WordSplitList.txt";
|
||||
if (!File.Exists(fileName))
|
||||
{
|
||||
return Array.Empty<string>();
|
||||
}
|
||||
|
||||
var wordList = File.ReadAllText(fileName).SplitToLines().Where(p => p.Trim().Length > 0).ToList();
|
||||
wordList.AddRange(nameList.GetNames().Where(p => p.Length > 4));
|
||||
return wordList.OrderByDescending(p => p.Length).ToArray();
|
||||
}
|
||||
|
||||
public string SpellCheckDictionaryName
|
||||
{
|
||||
get
|
||||
|
Loading…
Reference in New Issue
Block a user