Minor improvements for the new word split

See https://github.com/SubtitleEdit/subtitleedit/discussions/5616
This commit is contained in:
niksedk 2021-12-20 12:12:14 +01:00
parent e7f6db25ed
commit 6b74c201b6
3 changed files with 22 additions and 12 deletions

View File

@ -33,6 +33,7 @@
<word>autotrophs</word> <word>autotrophs</word>
<word>axe</word> <word>axe</word>
<word>backsplashes</word> <word>backsplashes</word>
<word>badass</word>
<word>barista</word> <word>barista</word>
<word>barium</word> <word>barium</word>
<word>behaviour</word> <word>behaviour</word>
@ -165,6 +166,7 @@
<word>golconda</word> <word>golconda</word>
<word>gold</word> <word>gold</word>
<word>gracilis</word> <word>gracilis</word>
<word>grandkids</word>
<word>guys'll</word> <word>guys'll</word>
<word>hafnium</word> <word>hafnium</word>
<word>hashtag</word> <word>hashtag</word>
@ -355,6 +357,7 @@
<word>skeevy</word> <word>skeevy</word>
<word>snuck</word> <word>snuck</word>
<word>sodium</word> <word>sodium</word>
<word>someplace</word>
<word>something's</word> <word>something's</word>
<word>spacefaring</word> <word>spacefaring</word>
<word>spell's</word> <word>spell's</word>
@ -366,6 +369,7 @@
<word>stenosis</word> <word>stenosis</word>
<word>stent</word> <word>stent</word>
<word>stenting</word> <word>stenting</word>
<word>stepmom</word>
<word>stereotactic</word> <word>stereotactic</word>
<word>sternotomy</word> <word>sternotomy</word>
<word>strontium</word> <word>strontium</word>

View File

@ -2017,8 +2017,6 @@
<Word from="soldierl" to="soldier!" /> <Word from="soldierl" to="soldier!" />
<Word from="somebodypicks" to="somebody picks" /> <Word from="somebodypicks" to="somebody picks" />
<Word from="somePIace" to="someplace" /> <Word from="somePIace" to="someplace" />
<Word from="someplace" to="some place" />
<Word from="Someplace" to="Some place" />
<Word from="somethlng" to="something" /> <Word from="somethlng" to="something" />
<Word from="somethlng's" to="something's" /> <Word from="somethlng's" to="something's" />
<Word from="somez'/7/ng" to="something" /> <Word from="somez'/7/ng" to="something" />

View File

@ -1574,10 +1574,20 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
if (word.Length > 4) if (word.Length > 4)
{ {
var splitWords = StringWithoutSpaceSplitToWords.SplitWord(_wordSplitList, word); if (_threeLetterIsoLanguageName == "eng" &&
if (splitWords != word) word.EndsWith("in", StringComparison.Ordinal) &&
line.Contains(word + "'") &&
DoSpell(word + "g"))
{ {
guesses.Add(splitWords); // avoid words like "workin'" or "holdin'"
}
else
{
var splitWords = StringWithoutSpaceSplitToWords.SplitWord(_wordSplitList, word);
if (splitWords != word)
{
guesses.Add(splitWords);
}
} }
} }
@ -1921,13 +1931,11 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
{ {
foreach (string s in word.Split(' ')) foreach (string s in word.Split(' '))
{ {
if (!DoSpell(s) && !_nameList.Contains(s)) if (!DoSpell(s) &&
!_nameList.Contains(s) &&
!_userWordList.Contains(s) &&
!IsWordKnownOrNumber(s, word))
{ {
if (IsWordKnownOrNumber(word, word))
{
return true;
}
if (s.Length > 10 && s.Contains('/')) if (s.Length > 10 && s.Contains('/'))
{ {
string[] ar = s.Split('/'); string[] ar = s.Split('/');
@ -1950,7 +1958,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
if ((DoSpell(a) || IsWordKnownOrNumber(a, word)) && if ((DoSpell(a) || IsWordKnownOrNumber(a, word)) &&
(DoSpell(b) || IsWordKnownOrNumber(b, word))) (DoSpell(b) || IsWordKnownOrNumber(b, word)))
{ {
return true; continue;
} }
} }
} }