Improve detection of italic letter in nOCR

This commit is contained in:
Nikolaj Olsson 2020-06-01 10:40:03 +02:00
parent f2fd1ebf43
commit 1d8cb732d0
4 changed files with 47 additions and 15 deletions

View File

@ -3,6 +3,7 @@ using Nikse.SubtitleEdit.Core;
using System.IO;
using System.Windows.Forms;
using Nikse.SubtitleEdit.Logic;
using Nikse.SubtitleEdit.Logic.Ocr;
using Nikse.SubtitleEdit.Logic.Ocr.Binary;
namespace Nikse.SubtitleEdit.Forms.Ocr
@ -41,9 +42,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
comboBoxNOcrLanguage.Items.Clear();
comboBoxNOcrLanguage.Items.Add(string.Empty);
foreach (string fileName in Directory.GetFiles(Configuration.OcrDirectory, "*.nocr"))
foreach (string s in NOcrDb.GetDatabases())
{
string s = Path.GetFileNameWithoutExtension(fileName);
comboBoxNOcrLanguage.Items.Add(s);
if (s == nOcrDbName)
{

View File

@ -2777,6 +2777,29 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
}
var result = NOcrFindBestMatchNew(targetItem, tryItalicScaling, nOcrDb, deepSeek, _unItalicFactor, (int)numericUpDownNOcrMaxWrongPixels.Value);
if (result == null)
{
// try to make letter normal via un-italic angle
if (tryItalicScaling && targetItem.NikseBitmap != null)
{
var unItalicNikseBitmap = new NikseBitmap(targetItem.NikseBitmap);
unItalicNikseBitmap.ReplaceColor(255, 0, 0, 0, 0, 0, 0, 0);
unItalicNikseBitmap.MakeTwoColor(200);
var oldBmp = unItalicNikseBitmap.GetBitmap();
var unItalicImage = UnItalic(oldBmp, _unItalicFactor); //TODO: make unitalic in NikseBitmap
unItalicNikseBitmap = new NikseBitmap(unItalicImage);
unItalicNikseBitmap.CropTransparentSidesAndBottom(0, false);
oldBmp.Dispose();
unItalicImage.Dispose();
var unItalicTargetItem = new ImageSplitterItem(targetItem.X, targetItem.Y, unItalicNikseBitmap) { Top = targetItem.Top };
result = NOcrFindBestMatchNew(unItalicTargetItem, false, nOcrDb, deepSeek, _unItalicFactor, (int)numericUpDownNOcrMaxWrongPixels.Value);
if (result != null)
{
_italicFixes++;
}
}
if (result == null)
{
if (checkBoxNOcrCorrect.Checked)
@ -2786,12 +2809,15 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
return new CompareMatch("*", false, 0, null);
}
}
FixUppercaseLowercaseIssues(targetItem, result);
return new CompareMatch(result.Text, result.Italic, 0, null, result) { Y = targetItem.Y };
}
public static int _italicFixes = 0;
private CompareMatch GetCompareMatchNew(ImageSplitterItem targetItem, out CompareMatch secondBestGuess, List<ImageSplitterItem> list, int listIndex, BinaryOcrDb binaryOcrDb)
{
double maxDiff = _numericUpDownMaxErrorPct;
@ -6786,9 +6812,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
comboBoxNOcrLanguage.Items.Clear();
int index = 0;
int selIndex = 0;
foreach (string fileName in Directory.GetFiles(Configuration.OcrDirectory, "*.nocr"))
foreach (var s in NOcrDb.GetDatabases())
{
string s = Path.GetFileNameWithoutExtension(fileName);
if (s == Configuration.Settings.VobSubOcr.LineOcrLastLanguages)
{
selIndex = index;

View File

@ -243,14 +243,11 @@ namespace Nikse.SubtitleEdit.Logic.Ocr.Binary
public static List<string> GetDatabases()
{
var list = new List<string>();
foreach (string dir in Directory.GetFiles(Configuration.OcrDirectory.TrimEnd(Path.DirectorySeparatorChar), "*.db"))
{
string s = Path.GetFileNameWithoutExtension(dir);
list.Add(s);
return Directory
.GetFiles(Configuration.OcrDirectory.TrimEnd(Path.DirectorySeparatorChar), "*.db")
.Select(Path.GetFileNameWithoutExtension)
.OrderBy(p => p)
.ToList();
}
return list.OrderBy(p => p).ToList();
}
}
}

View File

@ -5,6 +5,7 @@ using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.IO.Compression;
using System.Linq;
namespace Nikse.SubtitleEdit.Logic.Ocr
{
@ -462,5 +463,14 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
return true;
}
public static List<string> GetDatabases()
{
return Directory
.GetFiles(Configuration.OcrDirectory.TrimEnd(Path.DirectorySeparatorChar), "*.nocr")
.Select(Path.GetFileNameWithoutExtension)
.OrderBy(p => p)
.ToList();
}
}
}