Minor OCR stuff

This commit is contained in:
Nikolaj Olsson 2020-05-16 12:52:00 +02:00
parent 8a6e1bc199
commit 38a75d048d
5 changed files with 11 additions and 4 deletions

View File

@ -2762,6 +2762,9 @@
<Word from="wishyou" to="wish you" />
<Word from="wouldnot" to="would not" />
<Word from="yourpeople" to="your people" />
<Word from="AII's" to="All's" />
<Word from="FIog" to="Flog" />
<Word from="SIit" to="Slit" />
</WholeWords>
<PartialWordsAlways>
<!-- Will be replaced always -->

View File

@ -3261,6 +3261,7 @@ This file is case sensitive.
<name>Ljubljana</name>
<name>Llithyia</name>
<name>Lloyd</name>
<name>Loch Ness</name>
<name>Lochley</name>
<name>Locke</name>
<name>Lockhart</name>

Binary file not shown.

View File

@ -4237,7 +4237,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
_ocrFixEngine.AutoGuessesUsed.Clear();
// Log unkown words guess (found via spelling dictionaries)
// Log unknown words guess (found via spelling dictionaries)
LogUnknownWords();
ColorLineByNumberOfUnknownWords(listViewIndex, wordsNotFound, line);
@ -5213,7 +5213,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private void CleanLogGreaterThanOrEqualTo(ListBox listBox, int start)
{
listBox.BeginUpdate();
for (int i = listBox.Items.Count-1; i> 0; i--)
for (int i = listBox.Items.Count - 1; i >= 0; i--)
{
var text = listBox.Items[i].ToString();
var idx = text.IndexOf(':');

View File

@ -600,7 +600,6 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
}
text = FixCommonOcrLineErrors(sb.ToString(), lastLine);
text = FixUnknownWordsViaGuessOrPrompt(out _, text, index, null, true, false, logSuggestions, autoGuess);
if (Configuration.Settings.Tools.OcrFixUseHardcodedRules)
{
text = FixLowercaseIToUppercaseI(text, lastLine);
@ -622,6 +621,10 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
text = Utilities.RemoveSpaceBetweenNumbers(text);
}
// must be last - counts/logs unknown words
text = FixUnknownWordsViaGuessOrPrompt(out _, text, index, null, true, false, logSuggestions, autoGuess);
return text;
}