Work on OCR

This commit is contained in:
Nikolaj Olsson 2018-11-24 12:04:44 +01:00
parent 6be942076d
commit 8ae0a6e89d
3 changed files with 27 additions and 2 deletions

View File

@ -1978,6 +1978,9 @@
<Word from="PIaza" to="Plaza" />
<Word from="youll" to="you'll" />
<Word from="Icould" to="I could" />
<Word from="Ispent" to="I spent" />
<Word from="Italked" to="I talked" />
<Word from="itis" to="it is" />
</WholeWords>
<PartialWordsAlways>
<!-- Will be replaced always -->
@ -2439,7 +2442,18 @@
<Beginning from="I'II " to="I'll " />
<Beginning from="Do l " to="Do I " />
<Beginning from="-] " to="- I " />
<Beginning from="- ] " to="- I " />
<Beginning from="] " to="I " />
<Beginning from="-| " to="- I " />
<Beginning from="- | " to="- I " />
<Beginning from="!/ know" to="I know" />
<Beginning from="!/ promise" to="I promise" />
<Beginning from="!/ think" to="I think" />
<Beginning from="!/ will" to="I will" />
<Beginning from="!/ can" to="I can" />
<Beginning from="!/ see" to="I see" />
<Beginning from="!/ do" to="I do" />
<Beginning from="!/ am" to="I am" />
</BeginLines>
<EndLines>
<Ending from=", sin" to=", sir." />

View File

@ -1489,7 +1489,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
if (_ocrMethodIndex == _ocrMethodTesseract4 && !_fromMenuItem)
{
var nb = new NikseBitmap(returnBmp);
nb.AddMargin(2);
nb.AddMargin(10);
nb.MakeTwoColor(Configuration.Settings.Tools.OcrBinaryImageCompareRgbThreshold, Color.White, Color.Black);
returnBmp.Dispose();
return nb.GetBitmap();

View File

@ -1097,9 +1097,20 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
guesses.Add(w);
}
if (!correct && autoFix && word.Length > 3 && char.IsUpper(word[0]) && !_nameList.Contains(word))
{
var rest = word.Substring(1);
if (rest != rest.ToUpperInvariant())
{
var newWord = word[0] + rest.ToLowerInvariant();
if (_nameList.Contains(newWord))
guesses.Add(newWord);
}
}
if (word.Length > 5 && autoGuess == AutoGuessLevel.Aggressive)
{
guesses = (List<string>)_ocrFixReplaceList.CreateGuessesFromLetters(word);
guesses.AddRange((List<string>)_ocrFixReplaceList.CreateGuessesFromLetters(word));
if (word[0] == 'L')
guesses.Add("I" + word.Substring(1));