Work on T-ocr #3833

This commit is contained in:
niksedk 2019-11-07 07:49:48 +01:00
parent e71fac823f
commit 984b10e6ee
2 changed files with 9 additions and 2 deletions

View File

@ -637,7 +637,13 @@
<WholeLines />
<PartialLinesAlways />
<PartialLines />
<BeginLines />
<BeginLines>
<Beginning from="-] " to="- I " />
<Beginning from="- ] " to="- I " />
<Beginning from="] " to="I " />
<Beginning from="-| " to="- I " />
<Beginning from="- | " to="- I " />
</BeginLines>
<EndLines />
<RegularExpressions />
</OCRFixReplaceList>

View File

@ -5574,7 +5574,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
string psm = Tesseract3DoOcrViaExe(bitmap, _languageId, "7", _tesseractEngineMode); // 7 = Treat the image as a single text line.
// sometimes short texts are not recognized - this resize seems to help
if (psm == string.Empty && textWithOutFixes == string.Empty)
if (psm == string.Empty && textWithOutFixes == string.Empty ||
psm.Length < 5 && !psm.Contains('.') && psm == psm.ToUpperInvariant()) // e.g. "SEN" (could be more...) - see https://github.com/SubtitleEdit/subtitleedit/issues/3833
{
using (var b = ResizeBitmap(bitmap, bitmap.Width * 2, (int)Math.Round(bitmap.Height * 2.5)))
{