nOCR: Make delete extended char work

This commit is contained in:
Nikolaj Olsson 2020-06-02 19:43:36 +02:00
parent 80f7e0623d
commit 2b061f1849
6 changed files with 33 additions and 19 deletions

View File

@ -194,7 +194,7 @@
this.ShowIcon = false;
this.ShowInTaskbar = false;
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
this.Text = "AddBetterMultiMatchNOcr";
this.Text = "Add better multi-match";
this.KeyDown += new System.Windows.Forms.KeyEventHandler(this.AddBetterMultiMatch_KeyDown);
this.groupBoxInspectItems.ResumeLayout(false);
this.groupBoxInspectItems.PerformLayout();

View File

@ -285,7 +285,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
if (_nocrChar != null)
{
_nocrChars.Remove(_nocrChar);
_nocrDb.Remove(_nocrChar);
ShowStatus("Character deleted");
}
}

View File

@ -10,6 +10,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
public sealed partial class VobSubNOcrEdit : Form
{
private NOcrDb _nOcrDb;
private readonly List<NOcrChar> _nocrChars;
private NOcrChar _nocrChar;
private double _zoomFactor = 5.0;
@ -30,6 +31,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
InitializeComponent();
UiUtil.FixFonts(this);
_nOcrDb = nOcrDb;
_nocrChars = nOcrDb.OcrCharacters;
_bitmap = bitmap;
@ -321,7 +323,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
var oldComboBoxIndex = comboBoxTexts.SelectedIndex;
var oldListBoxIndex = listBoxFileNames.SelectedIndex;
_nocrChars.Remove(_nocrChar);
_nOcrDb.Remove(_nocrChar);
Changed = true;
FillComboBox();
@ -516,7 +518,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
}
else if (listBoxlinesBackground.Items.Count > 0)
{
listBoxlinesBackground.SelectedIndex = listBoxlinesBackground.Items.Count -1;
listBoxlinesBackground.SelectedIndex = listBoxlinesBackground.Items.Count - 1;
}
}
}

View File

@ -101,7 +101,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
_subtitleFontSize = Convert.ToInt32(comboBoxSubtitleFontSize.Items[comboBoxSubtitleFontSize.SelectedIndex].ToString());
var charactersLearned = new List<string>();
foreach (Paragraph p in sub.Paragraphs)
foreach (var p in sub.Paragraphs)
{
foreach (char ch in p.Text)
{
@ -110,14 +110,15 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
var s = ch.ToString();
if (!charactersLearned.Contains(s))
{
TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, charactersLearned, s, false, false, false);
charactersLearned.Add(s);
TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, s, false, false, false);
if (checkBoxBold.Checked)
{
TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, charactersLearned, s, true, false, false);
TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, s, true, false, false);
}
if (checkBoxItalic.Checked)
{
TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, charactersLearned, s, false, true, false);
TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, s, false, true, false);
}
}
}
@ -135,14 +136,15 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
if (!charactersLearned.Contains(text) && text.Length > 1 && text.Length <= 3)
{
TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, charactersLearned, text, false, false, true);
charactersLearned.Add(text);
TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, text, false, false, true);
if (checkBoxBold.Checked)
{
TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, charactersLearned, text, true, false, true);
TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, text, true, false, true);
}
if (checkBoxBold.Checked)
{
TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, charactersLearned, text, false, true, true);
TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, text, false, true, true);
}
}
}
@ -176,7 +178,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
_abort = false;
}
private void TrainLetter(ref int numberOfCharactersLeaned, ref int numberOfCharactersSkipped, NOcrDb nOcrD, List<string> charactersLearned, string s, bool bold, bool italic, bool doubleLetter)
private void TrainLetter(ref int numberOfCharactersLeaned, ref int numberOfCharactersSkipped, NOcrDb nOcrD, string s, bool bold, bool italic, bool doubleLetter)
{
Bitmap bmp = GenerateImageFromTextWithStyle("H " + s, bold, italic);
var nikseBitmap = new NikseBitmap(bmp);
@ -201,7 +203,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
VobSubOcrNOcrCharacter.GenerateLineSegments((int)numericUpDownSegmentsPerCharacter.Value, checkBoxVeryAccurate.Checked, nOcrChar, item.NikseBitmap);
nOcrD.Add(nOcrChar);
charactersLearned.Add(s);
numberOfCharactersLeaned++;
labelInfo.Text = string.Format("Now training font '{1}', total characters learned is {0:#,###,###}, {2:#,###,###} skipped", numberOfCharactersLeaned, _subtitleFontName, numberOfCharactersSkipped);
bmp.Dispose();

View File

@ -4005,8 +4005,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
}
item = GetExpandedSelectionNew(nbmpInput, expandSelectionList);
// item.NikseBitmap?.ReplaceTransparentWith(Color.Black);
_vobSubOcrNOcrCharacter.Initialize(bitmap, item, _manualOcrDialogPosition, _italicCheckedLast, expandSelectionList.Count > 1, string.Empty);
var result = _vobSubOcrNOcrCharacter.ShowDialog(this);
_manualOcrDialogPosition = _vobSubOcrNOcrCharacter.FormPosition;

View File

@ -1,5 +1,4 @@
using Nikse.SubtitleEdit.Core;
using Nikse.SubtitleEdit.Forms.Ocr;
using System;
using System.Collections.Generic;
using System.Drawing;
@ -88,6 +87,18 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
}
}
public void Remove(NOcrChar ocrChar)
{
if (ocrChar.ExpandCount > 0)
{
OcrCharactersExpanded.Remove(ocrChar);
}
else
{
OcrCharacters.Remove(ocrChar);
}
}
public NOcrChar GetMatchExpanded(NikseBitmap nikseBitmap, ImageSplitterItem targetItem, int listIndex, List<ImageSplitterItem> list)
{
int w = targetItem.NikseBitmap.Width;
@ -315,11 +326,12 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
if (maxWrongPixels >= 2)
{
var errorsAllowed = Math.Min(3, maxWrongPixels);
foreach (var oc in OcrCharacters)
{
if (Math.Abs(widthPercent - oc.WidthPercent) < 20 && Math.Abs(oc.MarginTop - topMargin) < 15)
{
if (IsMatch(bitmap, oc, 2))
if (IsMatch(bitmap, oc, errorsAllowed))
{
return oc;
}
@ -327,13 +339,14 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
}
}
if (maxWrongPixels >= 20)
if (maxWrongPixels >= 10)
{
var errorsAllowed = Math.Min(20, maxWrongPixels);
foreach (var oc in OcrCharacters)
{
if (!oc.IsSensitive && Math.Abs(widthPercent - oc.WidthPercent) < 20 && Math.Abs(oc.MarginTop - topMargin) < 15 && oc.LinesForeground.Count + oc.LinesBackground.Count > 40)
{
if (IsMatch(bitmap, oc, 20))
if (IsMatch(bitmap, oc, errorsAllowed))
{
return oc;
}