Minor work on ocr

This commit is contained in:
Nikolaj Olsson 2018-11-20 20:14:32 +01:00
parent 602b057131
commit d8535f5e05
3 changed files with 14 additions and 2 deletions

View File

@ -1999,6 +1999,7 @@
If new word(s) and longer than 5 chars and exists in spelling dictionary, it is (or they are) accepted -->
<WordPart from="IVI" to="M" />
<WordPart from="/" to="l" />
<WordPart from="|" to="I" />
<WordPart from="vv" to="w" />
<WordPart from="m" to="rn" />
<WordPart from="l" to="i" />
@ -2303,6 +2304,8 @@
<LinePart from=" that' s " to=" that's " />
<LinePart from="1 2th " to="12th " />
<LinePart from="-| " to="- I " />
<LinePart from=" | " to=" I " />
<LinePart from=" |." to=" I." />
<LinePart from="-1 am " to="- I had " />
<LinePart from="-1 had " to="- I had " />
<LinePart from="-1 think " to="- I think " />
@ -2430,6 +2433,7 @@
<Beginning from="O kay, " to="Okay, " />
<Beginning from="l didn't" to="I didn't" />
<Beginning from="l don't" to="I don't" />
<Beginning from="-1 " to="- I" />
</BeginLines>
<EndLines>
<Ending from=", sin" to=", sir." />
@ -2461,5 +2465,11 @@
<RegularExpressions>
<RegEx find="([a-z]) Won't " replaceWith="$1 won't " />
<RegEx find=" L([,\r\n :;!?]+)" replaceWith=" I$1" />
<RegEx find="^-1 (\p{L})" replaceWith="- I $1" />
<RegEx find="^-1t (\p{L})" replaceWith="- It $1" />
<RegEx find="^-_\.(\p{L})" replaceWith="- ...$1" />
<RegEx find="^_\.\.(\p{L})" replaceWith="...$1" />
<!-- <RegEx find="(\p{L}{2,})&quot;s " replaceWith="$1's " /> -->
</RegularExpressions>
</OCRFixReplaceList>

View File

@ -1864,6 +1864,7 @@ This file is generated/updated by Multi Translator
<name>Dudek</name>
<name>Dudley</name>
<name>Duffy</name>
<name>Dufresne</name>
<name>Duke</name>
<name>Dukes</name>
<name>Dulles</name>

View File

@ -42,7 +42,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr.Tesseract
return;
}
var job = (ImageJob)j;
var job = (ImageJob)j;
job.Result = _tesseractRunner.Run(job.LanguageCode, job.PsmMode, job.EngineMode, job.FileName, job.Run302);
lock (QueueLock)
{
@ -82,7 +82,8 @@ namespace Nikse.SubtitleEdit.Logic.Ocr.Tesseract
if (job != null && job.Completed < checkTime)
{
_jobQueue.Dequeue();
_callback?.Invoke(job.Index, job);
if (!_abort)
_callback?.Invoke(job.Index, job);
}
}
}