Work on T-ocr #3833

2024-11-22 03:02:35 +01:00 · 2019-11-07 07:49:48 +01:00 · 2019-11-07 07:49:48 +01:00 · 984b10e6ee
commit 984b10e6ee
parent e71fac823f
2 changed files with 9 additions and 2 deletions
--- a/Dictionaries/dan_OCRFixReplaceList.xml
+++ b/Dictionaries/dan_OCRFixReplaceList.xml
@ -637,7 +637,13 @@
  <WholeLines />
  <PartialLinesAlways />
  <PartialLines />
-  <BeginLines />
+  <BeginLines>
+    <Beginning from="-] " to="- I " />
+    <Beginning from="- ] " to="- I " />
+    <Beginning from="] " to="I " />
+    <Beginning from="-| " to="- I " />
+    <Beginning from="- | " to="- I " />
+  </BeginLines>
  <EndLines />
  <RegularExpressions />
 </OCRFixReplaceList>
--- a/src/Forms/Ocr/VobSubOcr.cs
+++ b/src/Forms/Ocr/VobSubOcr.cs
@ -5574,7 +5574,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
                string psm = Tesseract3DoOcrViaExe(bitmap, _languageId, "7", _tesseractEngineMode); // 7 = Treat the image as a single text line.

                // sometimes short texts are not recognized - this resize seems to help
-                if (psm == string.Empty && textWithOutFixes == string.Empty)
+                if (psm == string.Empty && textWithOutFixes == string.Empty ||
+                    psm.Length < 5 && !psm.Contains('.') && psm == psm.ToUpperInvariant()) // e.g. "SEN" (could be more...) - see https://github.com/SubtitleEdit/subtitleedit/issues/3833
                {
                    using (var b = ResizeBitmap(bitmap, bitmap.Width * 2, (int)Math.Round(bitmap.Height * 2.5)))
                    {