Work on OCR

This commit is contained in:
Nikolaj Olsson 2020-06-12 19:12:38 +02:00
parent 94754fc3de
commit d52a9994ad
2 changed files with 24 additions and 10 deletions

View File

@ -399,12 +399,22 @@
<Word from="sätterpå" to="sätter på" /> <Word from="sätterpå" to="sätter på" />
<Word from="stårpå" to="står på" /> <Word from="stårpå" to="står på" />
<Word from="tillhörpå" to="tillhör på" /> <Word from="tillhörpå" to="tillhör på" />
<Word from="AIIa" to="Alla" />
<Word from="AIIt" to="Allt" />
<Word from="SIuta" to="Sluta" />
<Word from="ÖIen" to="Ölen" />
<Word from="BIått" to="Blått" />
<Word from="GIöm" to="Glöm" />
<Word from="SIäpp" to="Släpp" />
<Word from="BIås" to="Blås" />
<Word from="BIi" to="Bli" />
</WholeWords> </WholeWords>
<PartialWordsAlways /> <PartialWordsAlways />
<PartialWords> <PartialWords>
<!-- Will be used to check words not in dictionary --> <!-- Will be used to check words not in dictionary -->
<!-- If new word(s) exists in spelling dictionary, it(they) is accepted --> <!-- If new word(s) exists in spelling dictionary, it(they) is accepted -->
<WordPart from="¤" to="o" /> <WordPart from="¤" to="o" />
<WordPart from="I" to="l" />
<WordPart from="fi" to="fi" /> <WordPart from="fi" to="fi" />
<WordPart from="â" to="ä" /> <WordPart from="â" to="ä" />
<WordPart from="/" to="l" /> <WordPart from="/" to="l" />
@ -441,12 +451,23 @@
<WordPart from="ärp" to="är p" /> <WordPart from="ärp" to="är p" />
</PartialWords> </PartialWords>
<WholeLines /> <WholeLines />
<PartialLinesAlways /> <PartialLinesAlways>
<LinePart from="Apollo 1 3" to="Apollo 13" />
<LinePart from=",.," to="..." />
<LinePart from=" l " to=" I " />
<LinePart from=" l." to=" I." />
<LinePart from=" l?" to=" I?" />
<LinePart from=" l!" to=" I!" />
<LinePart from=" . " to=". " />
</PartialLinesAlways>
<PartialLines /> <PartialLines />
<BeginLines> <BeginLines>
<Beginning from="Ln " to="In " /> <Beginning from="Ln " to="In " />
<Beginning from="U ppfattat" to="Uppfattat" /> <Beginning from="U ppfattat" to="Uppfattat" />
</BeginLines> </BeginLines>
<EndLines /> <EndLines />
<RegularExpressions /> <RegularExpressions>
<RegEx find="\b\|\b" replaceWith="I" />
<RegEx find="\b\l\b" replaceWith="I" />
</RegularExpressions>
</OCRFixReplaceList> </OCRFixReplaceList>

View File

@ -8537,12 +8537,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
} }
Cursor = Cursors.WaitCursor; Cursor = Cursors.WaitCursor;
Bitmap bitmap = GetSubtitleBitmap(subtitleListView1.SelectedItems[0].Index); var bitmap = GetSubtitleBitmap(subtitleListView1.SelectedItems[0].Index);
bool oldPrompt = checkBoxPromptForUnknownWords.Checked;
bool oldCorrect = checkBoxNOcrDrawUnknownLetters.Checked;
checkBoxNOcrDrawUnknownLetters.Checked = false;
string result = OcrViaNOCR(bitmap, subtitleListView1.SelectedItems[0].Index);
checkBoxPromptForUnknownWords.Checked = oldPrompt;
Cursor = Cursors.Default; Cursor = Cursors.Default;
using (var inspect = new VobSubNOcrCharacterInspect()) using (var inspect = new VobSubNOcrCharacterInspect())
{ {
@ -8561,8 +8556,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
_nOcrDb.LoadOcrCharacters(); _nOcrDb.LoadOcrCharacters();
Cursor = Cursors.Default; Cursor = Cursors.Default;
} }
checkBoxNOcrDrawUnknownLetters.Checked = oldCorrect;
} }
} }