From a848d85b7da02e557e8f72173044942c16df1834 Mon Sep 17 00:00:00 2001 From: Nikolaj Olsson Date: Fri, 22 May 2020 21:05:37 +0200 Subject: [PATCH] Work on OCR (minor optimize+fix type+work on train) --- src/Forms/Ocr/BinaryOcrTrain.cs | 30 +++++++++++++++++++++- src/Forms/Ocr/VobSubOcr.cs | 36 ++++++++++++++------------- src/Logic/NikseBitmapImageSplitter.cs | 2 +- 3 files changed, 49 insertions(+), 19 deletions(-) diff --git a/src/Forms/Ocr/BinaryOcrTrain.cs b/src/Forms/Ocr/BinaryOcrTrain.cs index 882e5ea01..5fa94bb4d 100644 --- a/src/Forms/Ocr/BinaryOcrTrain.cs +++ b/src/Forms/Ocr/BinaryOcrTrain.cs @@ -25,6 +25,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private float _subtitleFontSize = 25.0f; private readonly Color _borderColor = Color.Black; private const float BorderWidth = 2.0f; + private bool _abort = false; public BinaryOcrTrain() { @@ -179,6 +180,12 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void buttonTrain_Click(object sender, EventArgs e) { + if (buttonTrain.Text == "Abort") + { + _abort = true; + return; + } + saveFileDialog1.DefaultExt = ".db"; saveFileDialog1.Filter = "*Binary Image Compare DB files|*.db"; if (saveFileDialog1.ShowDialog(this) != DialogResult.OK) @@ -186,6 +193,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr return; } + buttonTrain.Text = "Abort"; var startFontSize = Convert.ToInt32(comboBoxSubtitleFontSize.Items[comboBoxSubtitleFontSize.SelectedIndex].ToString()); var endFontSize = Convert.ToInt32(comboBoxFontSizeEnd.Items[comboBoxFontSizeEnd.SelectedIndex].ToString()); @@ -235,11 +243,31 @@ namespace Nikse.SubtitleEdit.Forms.Ocr labelInfo.Text = string.Format("Now training font '{1}', total characters learned is {0}", numberOfCharactersLeaned, _subtitleFontName); } } + if (_abort) + { + break; + } } + if (_abort) + { + break; + } + } + + if (_abort) + { + break; } } bicDb.Save(); - labelInfo.Text = "Training completed and saved in " + saveFileDialog1.FileName; + if (_abort) + { + labelInfo.Text = "Partially (aborted) training completed and saved in " + saveFileDialog1.FileName; + } + else + { + labelInfo.Text = "Training completed and saved in " + saveFileDialog1.FileName; + } } private void buttonOK_Click(object sender, EventArgs e) diff --git a/src/Forms/Ocr/VobSubOcr.cs b/src/Forms/Ocr/VobSubOcr.cs index abd5bbd45..cf27ca8ff 100644 --- a/src/Forms/Ocr/VobSubOcr.cs +++ b/src/Forms/Ocr/VobSubOcr.cs @@ -3614,7 +3614,9 @@ namespace Nikse.SubtitleEdit.Forms.Ocr return; } - if (maxDiff < 0.2 || target.Width < 3 || target.Height < 5) + var tWidth = target.Width; + var tHeight = target.Height; + if (maxDiff < 0.2 || tWidth < 3 || tHeight < 5) { return; } @@ -3624,7 +3626,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr foreach (var compareItem in binOcrDb.CompareImages) { - if (compareItem.Width == target.Width && compareItem.Height == target.Height) // precise math in size + if (compareItem.Width == tWidth && compareItem.Height == tHeight) // precise math in size { if (Math.Abs(compareItem.NumberOfColoredPixels - numberOfForegroundColors) < 3) { @@ -3651,7 +3653,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { foreach (var compareItem in binOcrDb.CompareImages) { - if (compareItem.Width == target.Width && compareItem.Height == target.Height) // precise math in size + if (compareItem.Width == tWidth && compareItem.Height == tHeight) // precise math in size { if (Math.Abs(compareItem.NumberOfColoredPixels - numberOfForegroundColors) < 40) { @@ -3675,11 +3677,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } } - if (target.Width > 16 && target.Height > 16 && smallestDifference > 2) // for other than very narrow letter (like 'i' and 'l' and 'I'), try more sizes + if (tWidth > 16 && tHeight > 16 && smallestDifference > 2) // for other than very narrow letter (like 'i' and 'l' and 'I'), try more sizes { foreach (var compareItem in binOcrDb.CompareImages) { - if (compareItem.Width == target.Width && compareItem.Height == target.Height - 1) + if (compareItem.Width == tWidth && compareItem.Height == tHeight - 1) { if (Math.Abs(compareItem.NumberOfColoredPixels - numberOfForegroundColors) < minForeColorMatch) { @@ -3706,7 +3708,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { foreach (var compareItem in binOcrDb.CompareImages) { - if (compareItem.Width == target.Width && compareItem.Height == target.Height + 1) + if (compareItem.Width == tWidth && compareItem.Height == tHeight + 1) { if (Math.Abs(compareItem.NumberOfColoredPixels - numberOfForegroundColors) < minForeColorMatch) { @@ -3734,7 +3736,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { foreach (var compareItem in binOcrDb.CompareImages) { - if (compareItem.Width == target.Width + 1 && compareItem.Height == target.Height + 1) + if (compareItem.Width == tWidth + 1 && compareItem.Height == tHeight + 1) { if (Math.Abs(compareItem.NumberOfColoredPixels - numberOfForegroundColors) < minForeColorMatch) { @@ -3762,7 +3764,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { foreach (var compareItem in binOcrDb.CompareImages) { - if (compareItem.Width == target.Width - 1 && compareItem.Height == target.Height - 1) + if (compareItem.Width == tWidth - 1 && compareItem.Height == tHeight - 1) { if (Math.Abs(compareItem.NumberOfColoredPixels - numberOfForegroundColors) < minForeColorMatch) { @@ -3790,7 +3792,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { foreach (var compareItem in binOcrDb.CompareImages) { - if (compareItem.Width - 1 == target.Width && compareItem.Height == target.Height) + if (compareItem.Width - 1 == tWidth && compareItem.Height == tHeight) { if (Math.Abs(compareItem.NumberOfColoredPixels - numberOfForegroundColors) < minForeColorMatch) { @@ -3814,11 +3816,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } } - if (smallestDifference > 9 && target.Width > 11) + if (smallestDifference > 9 && tWidth > 11) { foreach (var compareItem in binOcrDb.CompareImages) { - if (compareItem.Width == target.Width - 2 && compareItem.Height == target.Height) + if (compareItem.Width == tWidth - 2 && compareItem.Height == tHeight) { if (Math.Abs(compareItem.NumberOfColoredPixels - numberOfForegroundColors) < minForeColorMatch) { @@ -3842,11 +3844,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } } - if (smallestDifference > 9 && target.Width > 14) + if (smallestDifference > 9 && tWidth > 14) { foreach (var compareItem in binOcrDb.CompareImages) { - if (compareItem.Width == target.Width - 3 && compareItem.Height == target.Height) + if (compareItem.Width == tWidth - 3 && compareItem.Height == tHeight) { if (Math.Abs(compareItem.NumberOfColoredPixels - numberOfForegroundColors) < minForeColorMatch) { @@ -3870,11 +3872,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } } - if (smallestDifference > 9 && target.Width > 14) + if (smallestDifference > 9 && tWidth > 14) { foreach (var compareItem in binOcrDb.CompareImages) { - if (compareItem.Width == target.Width && compareItem.Height == target.Height - 3) + if (compareItem.Width == tWidth && compareItem.Height == tHeight - 3) { if (Math.Abs(compareItem.NumberOfColoredPixels - numberOfForegroundColors) < minForeColorMatch) { @@ -3898,11 +3900,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } } - if (smallestDifference > 9 && target.Width > 14) + if (smallestDifference > 9 && tWidth > 14) { foreach (var compareItem in binOcrDb.CompareImages) { - if (compareItem.Width - 2 == target.Width && compareItem.Height == target.Height) + if (compareItem.Width - 2 == tWidth && compareItem.Height == tHeight) { if (Math.Abs(compareItem.NumberOfColoredPixels - numberOfForegroundColors) < minForeColorMatch) { diff --git a/src/Logic/NikseBitmapImageSplitter.cs b/src/Logic/NikseBitmapImageSplitter.cs index 002a3b506..b9c64fe3e 100644 --- a/src/Logic/NikseBitmapImageSplitter.cs +++ b/src/Logic/NikseBitmapImageSplitter.cs @@ -771,7 +771,7 @@ namespace Nikse.SubtitleEdit.Logic { var list = new List(); - // split into seperate lines + // split into separate lines List verticalBitmaps = SplitVertical(bmp, minLineHeight, averageLineHeight); if (!topToBottom)