Minor settings fixes for OCR

This commit is contained in:
Nikolaj Olsson 2020-06-06 20:12:34 +02:00
parent 1f33164728
commit 764bda26a1
4 changed files with 64 additions and 30 deletions

View File

@ -1394,6 +1394,7 @@ $HorzAlign = Center
public double ItalicFactor { get; set; }
public bool LineOcrDraw { get; set; }
public int LineOcrMinHeightSplit { get; set; }
public bool LineOcrAdvancedItalic { get; set; }
public string LineOcrLastLanguages { get; set; }
public string LineOcrLastSpellCheck { get; set; }
@ -5267,6 +5268,12 @@ $HorzAlign = Center
settings.VobSubOcr.LineOcrDraw = Convert.ToBoolean(subNode.InnerText);
}
subNode = node.SelectSingleNode("LineOcrMinHeightSplit");
if (subNode != null)
{
settings.VobSubOcr.LineOcrMinHeightSplit = Convert.ToInt32(subNode.InnerText);
}
subNode = node.SelectSingleNode("LineOcrAdvancedItalic");
if (subNode != null)
{
@ -7519,6 +7526,7 @@ $HorzAlign = Center
textWriter.WriteElementString("AutoBreakSubtitleIfMoreThanTwoLines", settings.VobSubOcr.AutoBreakSubtitleIfMoreThanTwoLines.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("ItalicFactor", settings.VobSubOcr.ItalicFactor.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("LineOcrDraw", settings.VobSubOcr.LineOcrDraw.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("LineOcrMinHeightSplit", settings.VobSubOcr.LineOcrMinHeightSplit.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("LineOcrAdvancedItalic", settings.VobSubOcr.LineOcrAdvancedItalic.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("LineOcrLastLanguages", settings.VobSubOcr.LineOcrLastLanguages);
textWriter.WriteElementString("LineOcrLastSpellCheck", settings.VobSubOcr.LineOcrLastSpellCheck);

View File

@ -61,7 +61,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.label2 = new System.Windows.Forms.Label();
this.comboBoxNOcrLanguage = new System.Windows.Forms.ComboBox();
this.checkBoxNOcrItalic = new System.Windows.Forms.CheckBox();
this.checkBoxNOcrCorrect = new System.Windows.Forms.CheckBox();
this.checkBoxNOcrDrawUnknownLetters = new System.Windows.Forms.CheckBox();
this.checkBoxRightToLeftNOCR = new System.Windows.Forms.CheckBox();
this.numericUpDownNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.NumericUpDown();
this.labelNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.Label();
@ -443,10 +443,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
// groupBoxOcrMethod
//
this.groupBoxOcrMethod.Controls.Add(this.comboBoxOcrMethod);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod);
this.groupBoxOcrMethod.Controls.Add(this.GroupBoxTesseractMethod);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxModiMethod);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR);
this.groupBoxOcrMethod.Location = new System.Drawing.Point(13, 5);
this.groupBoxOcrMethod.Name = "groupBoxOcrMethod";
this.groupBoxOcrMethod.Size = new System.Drawing.Size(392, 192);
@ -481,7 +481,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.groupBoxNOCR.Controls.Add(this.label2);
this.groupBoxNOCR.Controls.Add(this.comboBoxNOcrLanguage);
this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrItalic);
this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrCorrect);
this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrDrawUnknownLetters);
this.groupBoxNOCR.Controls.Add(this.checkBoxRightToLeftNOCR);
this.groupBoxNOCR.Controls.Add(this.numericUpDownNumberOfPixelsIsSpaceNOCR);
this.groupBoxNOCR.Controls.Add(this.labelNumberOfPixelsIsSpaceNOCR);
@ -656,15 +656,15 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.checkBoxNOcrItalic.Text = "Contains italic";
this.checkBoxNOcrItalic.UseVisualStyleBackColor = true;
//
// checkBoxNOcrCorrect
// checkBoxNOcrDrawUnknownLetters
//
this.checkBoxNOcrCorrect.AutoSize = true;
this.checkBoxNOcrCorrect.Location = new System.Drawing.Point(235, 17);
this.checkBoxNOcrCorrect.Name = "checkBoxNOcrCorrect";
this.checkBoxNOcrCorrect.Size = new System.Drawing.Size(116, 17);
this.checkBoxNOcrCorrect.TabIndex = 7;
this.checkBoxNOcrCorrect.Text = "Draw missing texts";
this.checkBoxNOcrCorrect.UseVisualStyleBackColor = true;
this.checkBoxNOcrDrawUnknownLetters.AutoSize = true;
this.checkBoxNOcrDrawUnknownLetters.Location = new System.Drawing.Point(235, 17);
this.checkBoxNOcrDrawUnknownLetters.Name = "checkBoxNOcrDrawUnknownLetters";
this.checkBoxNOcrDrawUnknownLetters.Size = new System.Drawing.Size(116, 17);
this.checkBoxNOcrDrawUnknownLetters.TabIndex = 7;
this.checkBoxNOcrDrawUnknownLetters.Text = "Draw missing texts";
this.checkBoxNOcrDrawUnknownLetters.UseVisualStyleBackColor = true;
//
// checkBoxRightToLeftNOCR
//
@ -697,6 +697,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
0,
0,
0});
this.numericUpDownNumberOfPixelsIsSpaceNOCR.ValueChanged += new System.EventHandler(this.numericUpDownNumberOfPixelsIsSpaceNOCR_ValueChanged);
//
// labelNumberOfPixelsIsSpaceNOCR
//
@ -1964,7 +1965,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private System.Windows.Forms.CheckBox checkBoxRightToLeftNOCR;
private System.Windows.Forms.NumericUpDown numericUpDownNumberOfPixelsIsSpaceNOCR;
private System.Windows.Forms.Label labelNumberOfPixelsIsSpaceNOCR;
private System.Windows.Forms.CheckBox checkBoxNOcrCorrect;
private System.Windows.Forms.CheckBox checkBoxNOcrDrawUnknownLetters;
private System.Windows.Forms.CheckBox checkBoxNOcrItalic;
private System.Windows.Forms.Button buttonGetTesseractDictionaries;
private System.Windows.Forms.ToolStripMenuItem toolStripMenuItemInspectNOcrMatches;

View File

@ -543,7 +543,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
toolStripMenuItemClearGuesses.Text = Configuration.Settings.Language.DvdSubRip.Clear;
clearToolStripMenuItem.Text = Configuration.Settings.Language.DvdSubRip.Clear;
checkBoxNOcrCorrect.Checked = Configuration.Settings.VobSubOcr.LineOcrDraw;
checkBoxNOcrDrawUnknownLetters.Checked = Configuration.Settings.VobSubOcr.LineOcrDraw;
comboBoxNOcrLineSplitMinHeight.SelectedIndex = Configuration.Settings.VobSubOcr.LineOcrMaxLineHeight;
checkBoxNOcrItalic.Checked = Configuration.Settings.VobSubOcr.LineOcrAdvancedItalic;
numericUpDownNOcrMaxWrongPixels.Value = Configuration.Settings.VobSubOcr.LineOcrMaxErrorPixels;
@ -804,6 +805,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
Initialize(subtitles, vobSubOcrSettings, fileName);
_ocrMethodIndex = Configuration.Settings.VobSubOcr.LastOcrMethod == "Tesseract4" ? _ocrMethodTesseract4 : _ocrMethodTesseract302;
var oldNOcrDrawText = checkBoxNOcrDrawUnknownLetters.Checked;
if (ocrEngine?.ToLowerInvariant() == "nocr")
{
@ -827,6 +829,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
checkBoxShowOnlyForced.Checked = forcedOnly;
DoBatch();
checkBoxNOcrDrawUnknownLetters.Checked = oldNOcrDrawText;
}
internal void InitializeBatch(List<VobSubMergedPack> vobSubMergedPackList, List<Color> palette, VobSubOcrSettings vobSubOcrSettings, string fileName, bool forcedOnly, string language)
@ -2849,7 +2852,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
if (result == null)
{
if (checkBoxNOcrCorrect.Checked)
if (checkBoxNOcrDrawUnknownLetters.Checked)
{
return null;
}
@ -3997,7 +4000,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private void NOCRIntialize(Bitmap bitmap)
{
var nikseBitmap = new NikseBitmap(bitmap);
var list = NikseBitmapImageSplitter.SplitBitmapToLettersNew(nikseBitmap, (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value, checkBoxRightToLeft.Checked, Configuration.Settings.VobSubOcr.TopToBottom, 12);
var list = NikseBitmapImageSplitter.SplitBitmapToLettersNew(nikseBitmap, _numericUpDownPixelsIsSpace, checkBoxRightToLeft.Checked, Configuration.Settings.VobSubOcr.TopToBottom, 12);
foreach (var item in list)
{
if (item.NikseBitmap != null)
@ -4040,7 +4043,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
minLineHeight = 5;
}
var list = NikseBitmapImageSplitter.SplitBitmapToLettersNew(nbmpInput, (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value, checkBoxRightToLeft.Checked, Configuration.Settings.VobSubOcr.TopToBottom, minLineHeight);
var list = NikseBitmapImageSplitter.SplitBitmapToLettersNew(nbmpInput, _numericUpDownPixelsIsSpace, checkBoxRightToLeftNOCR.Checked, Configuration.Settings.VobSubOcr.TopToBottom, minLineHeight);
int index = 0;
bool expandSelection = false;
@ -4110,7 +4113,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
}
else
{
var match = GetNOcrCompareMatchNew(item, nbmpInput, _nOcrDb, checkBoxNOcrItalic.Checked, !checkBoxNOcrCorrect.Checked, index, list);
var match = GetNOcrCompareMatchNew(item, nbmpInput, _nOcrDb, checkBoxNOcrItalic.Checked, !checkBoxNOcrDrawUnknownLetters.Checked, index, list);
if (match == null)
{
_vobSubOcrNOcrCharacter.Initialize(bitmap, item, _manualOcrDialogPosition, _italicCheckedLast, false, string.Empty);
@ -4830,7 +4833,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
_binaryOcrDb = new BinaryOcrDb(Configuration.OcrDirectory + "Latin.db", true);
}
checkBoxNOcrCorrect.Checked = true;
checkBoxNOcrDrawUnknownLetters.Checked = true;
_numericUpDownMaxErrorPct = (double)numericUpDownMaxErrorPct.Value;
}
@ -4882,7 +4885,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
if (start + i < max)
{
var bw = new BackgroundWorker();
var p = new NOcrThreadParameter(null, start + i, _nOcrDb, bw, noOfThreads, _unItalicFactor, checkBoxNOcrItalic.Checked, (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value, checkBoxRightToLeft.Checked)
var p = new NOcrThreadParameter(null, start + i, _nOcrDb, bw, noOfThreads, _unItalicFactor, checkBoxNOcrItalic.Checked, _numericUpDownPixelsIsSpace, checkBoxRightToLeftNOCR.Checked)
{
NOcrLastLowercaseHeight = GetLastBinOcrLowercaseHeight(),
NOcrLastUppercaseHeight = GetLastBinOcrUppercaseHeight(),
@ -6365,7 +6368,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private void InitializeNOcrForBatch(string db)
{
_ocrMethodIndex = _ocrMethodNocr;
checkBoxNOcrCorrect.Checked = false;
checkBoxNOcrDrawUnknownLetters.Checked = false;
var fileName = string.Empty;
if (!string.IsNullOrEmpty(db))
{
@ -8137,7 +8140,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
Configuration.Settings.VobSubOcr.PromptForUnknownWords = checkBoxPromptForUnknownWords.Checked;
Configuration.Settings.VobSubOcr.GuessUnknownWords = checkBoxGuessUnknownWords.Checked;
Configuration.Settings.VobSubOcr.AutoBreakSubtitleIfMoreThanTwoLines = checkBoxAutoBreakLines.Checked;
Configuration.Settings.VobSubOcr.LineOcrDraw = checkBoxNOcrCorrect.Checked;
Configuration.Settings.VobSubOcr.LineOcrDraw = checkBoxNOcrDrawUnknownLetters.Checked;
Configuration.Settings.VobSubOcr.LineOcrMaxLineHeight = comboBoxNOcrLineSplitMinHeight.SelectedIndex;
Configuration.Settings.VobSubOcr.LineOcrAdvancedItalic = checkBoxNOcrItalic.Checked;
Configuration.Settings.VobSubOcr.XOrMorePixelsMakesSpace = (int)numericUpDownPixelsIsSpace.Value;
if (_ocrMethodIndex == _ocrMethodNocr)
@ -8675,8 +8679,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
Cursor = Cursors.WaitCursor;
Bitmap bitmap = GetSubtitleBitmap(subtitleListView1.SelectedItems[0].Index);
bool oldPrompt = checkBoxPromptForUnknownWords.Checked;
bool oldCorrect = checkBoxNOcrCorrect.Checked;
checkBoxNOcrCorrect.Checked = false;
bool oldCorrect = checkBoxNOcrDrawUnknownLetters.Checked;
checkBoxNOcrDrawUnknownLetters.Checked = false;
string result = OcrViaNOCR(bitmap, subtitleListView1.SelectedItems[0].Index);
checkBoxPromptForUnknownWords.Checked = oldPrompt;
Cursor = Cursors.Default;
@ -8687,7 +8691,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
minLineHeight = 7;
}
inspect.Initialize(bitmap, (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value, checkBoxRightToLeft.Checked, _nOcrDb, this, checkBoxNOcrItalic.Checked, minLineHeight);
inspect.Initialize(bitmap, _numericUpDownPixelsIsSpace, checkBoxRightToLeft.Checked, _nOcrDb, this, checkBoxNOcrItalic.Checked, minLineHeight);
if (inspect.ShowDialog(this) == DialogResult.OK)
{
Cursor = Cursors.WaitCursor;
@ -8702,7 +8706,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
Cursor = Cursors.Default;
}
checkBoxNOcrCorrect.Checked = oldCorrect;
checkBoxNOcrDrawUnknownLetters.Checked = oldCorrect;
}
}
@ -9094,11 +9098,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
base.Dispose(disposing);
}
private void numericUpDownPixelsIsSpace_ValueChanged(object sender, EventArgs e)
{
_numericUpDownPixelsIsSpace = (int)numericUpDownPixelsIsSpace.Value;
}
private void numericUpDownMaxErrorPct_ValueChanged(object sender, EventArgs e)
{
_numericUpDownMaxErrorPct = (double)numericUpDownMaxErrorPct.Value;
@ -9323,5 +9322,26 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
LoadImageCompareCharacterDatabaseList(form.ImageCompareDatabaseName);
}
}
private void numericUpDownNumberOfPixelsIsSpaceNOCR_ValueChanged(object sender, EventArgs e)
{
if (_ocrMethodIndex == _ocrMethodNocr)
{
numericUpDownPixelsIsSpace.Value = numericUpDownNumberOfPixelsIsSpaceNOCR.Value;
}
_numericUpDownPixelsIsSpace = (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value;
}
private void numericUpDownPixelsIsSpace_ValueChanged(object sender, EventArgs e)
{
if (_ocrMethodIndex == _ocrMethodBinaryImageCompare)
{
numericUpDownNumberOfPixelsIsSpaceNOCR.Value = numericUpDownPixelsIsSpace.Value;
}
_numericUpDownPixelsIsSpace = (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value;
}
}
}

View File

@ -438,6 +438,11 @@ namespace Nikse.SubtitleEdit.Logic
var lineBitmaps = splitOld.Count > splitBlankLines.Count ? splitOld : splitBlankLines;
if (lineBitmaps.Count == 1 && lineBitmaps[0].NikseBitmap?.Height > minLineHeight * 2.2)
{
lineBitmaps = SplitToLinesNew(lineBitmaps[0], minLineHeight, averageLineHeight); // more advanced split (allows for up/down)
}
//foreach (var bitmap in tempBitmaps)
//{
// // var height = bitmap.NikseBitmap.GetNonTransparentHeight();