Minor improvements for the new word split

See https://github.com/SubtitleEdit/subtitleedit/discussions/5616
This commit is contained in:
niksedk 2021-12-20 12:12:14 +01:00
parent e7f6db25ed
commit 6b74c201b6
3 changed files with 22 additions and 12 deletions

View File

@ -33,6 +33,7 @@
<word>autotrophs</word>
<word>axe</word>
<word>backsplashes</word>
<word>badass</word>
<word>barista</word>
<word>barium</word>
<word>behaviour</word>
@ -165,6 +166,7 @@
<word>golconda</word>
<word>gold</word>
<word>gracilis</word>
<word>grandkids</word>
<word>guys'll</word>
<word>hafnium</word>
<word>hashtag</word>
@ -355,6 +357,7 @@
<word>skeevy</word>
<word>snuck</word>
<word>sodium</word>
<word>someplace</word>
<word>something's</word>
<word>spacefaring</word>
<word>spell's</word>
@ -366,6 +369,7 @@
<word>stenosis</word>
<word>stent</word>
<word>stenting</word>
<word>stepmom</word>
<word>stereotactic</word>
<word>sternotomy</word>
<word>strontium</word>

View File

@ -2017,8 +2017,6 @@
<Word from="soldierl" to="soldier!" />
<Word from="somebodypicks" to="somebody picks" />
<Word from="somePIace" to="someplace" />
<Word from="someplace" to="some place" />
<Word from="Someplace" to="Some place" />
<Word from="somethlng" to="something" />
<Word from="somethlng's" to="something's" />
<Word from="somez'/7/ng" to="something" />

View File

@ -1574,10 +1574,20 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
if (word.Length > 4)
{
var splitWords = StringWithoutSpaceSplitToWords.SplitWord(_wordSplitList, word);
if (splitWords != word)
if (_threeLetterIsoLanguageName == "eng" &&
word.EndsWith("in", StringComparison.Ordinal) &&
line.Contains(word + "'") &&
DoSpell(word + "g"))
{
guesses.Add(splitWords);
// avoid words like "workin'" or "holdin'"
}
else
{
var splitWords = StringWithoutSpaceSplitToWords.SplitWord(_wordSplitList, word);
if (splitWords != word)
{
guesses.Add(splitWords);
}
}
}
@ -1921,13 +1931,11 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
{
foreach (string s in word.Split(' '))
{
if (!DoSpell(s) && !_nameList.Contains(s))
if (!DoSpell(s) &&
!_nameList.Contains(s) &&
!_userWordList.Contains(s) &&
!IsWordKnownOrNumber(s, word))
{
if (IsWordKnownOrNumber(word, word))
{
return true;
}
if (s.Length > 10 && s.Contains('/'))
{
string[] ar = s.Split('/');
@ -1950,7 +1958,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
if ((DoSpell(a) || IsWordKnownOrNumber(a, word)) &&
(DoSpell(b) || IsWordKnownOrNumber(b, word)))
{
return true;
continue;
}
}
}