Minor OCR fixes

This commit is contained in:
Nikolaj Olsson 2020-04-16 09:42:27 +02:00
parent fdae007ed0
commit 9e96ad4434
5 changed files with 33 additions and 1 deletions

View File

@ -1304,6 +1304,7 @@
<Word from="Ldid" to="I did" />
<Word from="ldiot" to="Idiot" />
<Word from="L'djump" to="I'd jump" />
<Word from="L'Il" to="I'll" />
<Word from="ldon't" to="I don't" />
<Word from="Ldon't" to="I don't" />
<Word from="leastpeople" to="least people" />
@ -2459,6 +2460,8 @@
<Word from="Whodunit" to="Whodunnit" />
<Word from="whoJuma" to="who Juma" />
<Word from="whoJuma's" to="who Juma's" />
<Word from="whojust" to="who just" />
<Word from="Whojust" to="Who just" />
<Word from="Whyyou" to="Why you" />
<Word from="whyyou" to="why you" />
<Word from="wi//" to="will" />
@ -3060,6 +3063,7 @@
<Beginning from="-l don't" to="-I don't" />
<Beginning from="l don't" to="I don't" />
<Beginning from="L " to="I " />
<Beginning from="L'Il " to="I'll " />
<Beginning from="-L " to="-I " />
<Beginning from="-l " to="-I " />
<Beginning from="- l " to="- I " />

View File

@ -1481,6 +1481,7 @@ This file is case sensitive.
<name>Dashawn</name>
<name>Datsun</name>
<name>Daughtry</name>
<name>Dauphine</name>
<name>Dautry</name>
<name>Dave</name>
<name>Dave Matthews Band</name>
@ -3212,6 +3213,7 @@ This file is case sensitive.
<name>Libreville</name>
<name>Liby</name>
<name>Libya</name>
<name>Licia</name>
<name>Lidocaine</name>
<name>Liebowitz</name>
<name>Liechtenstein</name>

Binary file not shown.

View File

@ -547,26 +547,42 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
private bool GetReplaceWord(string pre, string word, string post, out string result)
{
if (string.IsNullOrEmpty(pre) && string.IsNullOrEmpty(post))
{
if (WordReplaceList.ContainsKey(word))
{
result = WordReplaceList[word];
return true;
}
result = null;
return false;
}
if (WordReplaceList.ContainsKey(pre + word + post))
{
result = WordReplaceList[pre + word + post];
return true;
}
if (WordReplaceList.ContainsKey(pre + word))
{
result = WordReplaceList[pre + word] + post;
return true;
}
if (WordReplaceList.ContainsKey(word + post))
{
result = pre + WordReplaceList[word + post];
return true;
}
if (WordReplaceList.ContainsKey(word))
{
result = pre + WordReplaceList[word] + post;
return true;
}
result = null;
return false;
}

View File

@ -575,6 +575,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
}
comboBoxLineSplitMinLineHeight.SelectedIndex = 0;
if (comboBoxDictionaries.SelectedIndex == -1)
{
comboBoxDictionaries.SelectedIndex = 0;
}
}
private void FillSpellCheckDictionaries()
@ -4151,7 +4156,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
//OCR fix engine
string textWithOutFixes = line;
if (_ocrFixEngine.IsDictionaryLoaded)
if (_ocrFixEngine != null && _ocrFixEngine.IsDictionaryLoaded)
{
var autoGuessLevel = OcrFixEngine.AutoGuessLevel.None;
if (checkBoxGuessUnknownWords.Checked)
@ -5078,6 +5083,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
return;
}
if (comboBoxDictionaries.SelectedIndex <= 0)
{
_ocrFixEngine = new OcrFixEngine(string.Empty, string.Empty, this, _ocrMethodIndex == _ocrMethodBinaryImageCompare);
}
InitializeTopAlign();
if (_ocrMethodIndex == _ocrMethodTesseract302 || _ocrMethodIndex == _ocrMethodTesseract4)