Work on nOCR

This commit is contained in:
Nikolaj Olsson 2020-06-14 08:05:07 +02:00
parent f280305141
commit 3a9dbfe2dc
4 changed files with 32 additions and 8 deletions

Binary file not shown.

View File

@ -55,12 +55,23 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
break; break;
} }
var m = _matches[i]; var item = _splitterItems[i];
if (m == null || m.Extra?.Count > 0) if (item.SpecialCharacter != null)
{ {
break; break;
} }
var m = _matches[i];
if (m != null && m.Extra?.Count > 0)
{
break;
}
if (m == null)
{
m = new VobSubOcr.CompareMatch(string.Empty, false, 0, null);
}
count++; count++;
listBoxInspectItems.Items.Add(m); listBoxInspectItems.Items.Add(m);
if (count < 3) if (count < 3)

View File

@ -196,7 +196,14 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
checkBoxItalic.Checked = _nOcrChar.Italic; checkBoxItalic.Checked = _nOcrChar.Italic;
pictureBoxCharacter.Invalidate(); pictureBoxCharacter.Invalidate();
groupBoxCurrentCompareImage.Enabled = true; groupBoxCurrentCompareImage.Enabled = true;
if (_nOcrChar.ExpandCount > 0)
{
labelNOcrCharInfo.Text = string.Format("Size: {0}x{1}, margin top: {2}, expand count: {3}", _nOcrChar.Width, _nOcrChar.Height, _nOcrChar.MarginTop, _nOcrChar.ExpandCount);
}
else
{
labelNOcrCharInfo.Text = string.Format("Size: {0}x{1}, margin top: {2} ", _nOcrChar.Width, _nOcrChar.Height, _nOcrChar.MarginTop); labelNOcrCharInfo.Text = string.Format("Size: {0}x{1}, margin top: {2} ", _nOcrChar.Width, _nOcrChar.Height, _nOcrChar.MarginTop);
}
if (pictureBoxCharacter.Image != null) if (pictureBoxCharacter.Image != null)
{ {

View File

@ -3890,7 +3890,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{ {
var match = matches[i]; var match = matches[i];
var matchNext = matches[i + 1]; var matchNext = matches[i + 1];
if (!match.Italic || match.Text == "," || if (!match.Italic || matchNext.Text == "," ||
string.IsNullOrWhiteSpace(match.Text) || string.IsNullOrWhiteSpace(matchNext.Text) || string.IsNullOrWhiteSpace(match.Text) || string.IsNullOrWhiteSpace(matchNext.Text) ||
match.ImageSplitterItem == null || matchNext.ImageSplitterItem == null) match.ImageSplitterItem == null || matchNext.ImageSplitterItem == null)
{ {
@ -3899,6 +3899,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
} }
int blankVerticalLines = IsVerticalAngledLineTransparent(parentBitmap, match, matchNext, unItalicFactor); int blankVerticalLines = IsVerticalAngledLineTransparent(parentBitmap, match, matchNext, unItalicFactor);
if (match.Text == "f" || match.Text == "," || matchNext.Text.StartsWith('y') || matchNext.Text.StartsWith('j'))
{
blankVerticalLines++;
}
if (blankVerticalLines >= pixelsIsSpace) if (blankVerticalLines >= pixelsIsSpace)
{ {
matchNext.ImageSplitterItem.CouldBeSpaceBefore = true; matchNext.ImageSplitterItem.CouldBeSpaceBefore = true;
@ -4807,7 +4812,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
return; return;
} }
InitializeNOcrThreads(max);
_autoLineHeight = comboBoxNOcrLineSplitMinHeight.SelectedIndex == 0; _autoLineHeight = comboBoxNOcrLineSplitMinHeight.SelectedIndex == 0;
if (comboBoxNOcrLineSplitMinHeight.Visible && comboBoxNOcrLineSplitMinHeight.SelectedIndex > 0) if (comboBoxNOcrLineSplitMinHeight.Visible && comboBoxNOcrLineSplitMinHeight.SelectedIndex > 0)
{ {
@ -4817,6 +4821,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{ {
_ocrMinLineHeight = -1; _ocrMinLineHeight = -1;
} }
InitializeNOcrThreads(max);
} }
else if (_ocrMethodIndex == _ocrMethodBinaryImageCompare) else if (_ocrMethodIndex == _ocrMethodBinaryImageCompare)
{ {
@ -4910,6 +4916,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
p.Index += p.Increment; p.Index += p.Increment;
if (p.Index < _subtitle.Paragraphs.Count) if (p.Index < _subtitle.Paragraphs.Count)
{ {
p = new NOcrThreadParameter(p.Index, _nOcrDbThread, p.Self, p.Increment, _unItalicFactor, checkBoxNOcrItalic.Checked, _numericUpDownPixelsIsSpace, checkBoxRightToLeftNOCR.Checked);
p.Self.RunWorkerAsync(p); p.Self.RunWorkerAsync(p);
} }
} }
@ -4923,7 +4930,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
bmp.Dispose(); bmp.Dispose();
var minLineHeight = GetMinLineHeight(); var minLineHeight = GetMinLineHeight();
var list = NikseBitmapImageSplitter.SplitBitmapToLettersNew(parentBitmap, p.NumberOfPixelsIsSpace, p.RightToLeft, Configuration.Settings.VobSubOcr.TopToBottom, minLineHeight, _autoLineHeight); var list = NikseBitmapImageSplitter.SplitBitmapToLettersNew(parentBitmap, p.NumberOfPixelsIsSpace, p.RightToLeft, Configuration.Settings.VobSubOcr.TopToBottom, minLineHeight, _autoLineHeight);
UpdateLineHeights(list);
p.ResultMatches = new List<CompareMatch>(); p.ResultMatches = new List<CompareMatch>();
int index = 0; int index = 0;
while (index < list.Count) while (index < list.Count)
@ -4935,14 +4941,14 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
} }
else else
{ {
var match = GetNOcrCompareMatchNew(item, parentBitmap, p.NOcrDb, true, true, index, list); var match = GetNOcrCompareMatchNew(item, parentBitmap, p.NOcrDb, p.AdvancedItalicDetection, true, index, list);
if (match == null) if (match == null)
{ {
p.ResultText = string.Empty; p.ResultText = string.Empty;
return; return;
} }
p.ResultMatches.Add(new CompareMatch(match.Text, match.Italic, 0, null)); p.ResultMatches.Add(new CompareMatch(match.Text, match.Italic, 0, null) { ImageSplitterItem = item });
if (match.ExpandCount > 0) if (match.ExpandCount > 0)
{ {
index += match.ExpandCount - 1; index += match.ExpandCount - 1;