diff --git a/Ocr/Latin.nocr b/Ocr/Latin.nocr index d28fd9358..05e583e8d 100644 Binary files a/Ocr/Latin.nocr and b/Ocr/Latin.nocr differ diff --git a/src/Forms/Ocr/BinaryOcrTrain.Designer.cs b/src/Forms/Ocr/BinaryOcrTrain.Designer.cs index c19337f31..dc61bc885 100644 --- a/src/Forms/Ocr/BinaryOcrTrain.Designer.cs +++ b/src/Forms/Ocr/BinaryOcrTrain.Designer.cs @@ -45,11 +45,14 @@ this.button1 = new System.Windows.Forms.Button(); this.comboBoxFontSizeEnd = new System.Windows.Forms.ComboBox(); this.label4 = new System.Windows.Forms.Label(); - this.groupBox2 = new System.Windows.Forms.GroupBox(); + this.groupBoxInput = new System.Windows.Forms.GroupBox(); this.folderBrowserDialog1 = new System.Windows.Forms.FolderBrowserDialog(); this.saveFileDialog1 = new System.Windows.Forms.SaveFileDialog(); + this.label2 = new System.Windows.Forms.Label(); + this.textBoxMerged = new System.Windows.Forms.TextBox(); + this.checkBoxItalic = new System.Windows.Forms.CheckBox(); this.groupBox1.SuspendLayout(); - this.groupBox2.SuspendLayout(); + this.groupBoxInput.SuspendLayout(); this.SuspendLayout(); // // openFileDialog1 @@ -92,7 +95,7 @@ this.checkBoxBold.AutoSize = true; this.checkBoxBold.Checked = true; this.checkBoxBold.CheckState = System.Windows.Forms.CheckState.Checked; - this.checkBoxBold.Location = new System.Drawing.Point(9, 524); + this.checkBoxBold.Location = new System.Drawing.Point(9, 466); this.checkBoxBold.Name = "checkBoxBold"; this.checkBoxBold.Size = new System.Drawing.Size(92, 17); this.checkBoxBold.TabIndex = 17; @@ -111,7 +114,7 @@ this.listViewFonts.HideSelection = false; this.listViewFonts.Location = new System.Drawing.Point(9, 47); this.listViewFonts.Name = "listViewFonts"; - this.listViewFonts.Size = new System.Drawing.Size(711, 429); + this.listViewFonts.Size = new System.Drawing.Size(711, 371); this.listViewFonts.TabIndex = 16; this.listViewFonts.UseCompatibleStateImageBehavior = false; this.listViewFonts.View = System.Windows.Forms.View.Details; @@ -136,7 +139,7 @@ // this.labelSubtitleFontSize.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left))); this.labelSubtitleFontSize.AutoSize = true; - this.labelSubtitleFontSize.Location = new System.Drawing.Point(5, 497); + this.labelSubtitleFontSize.Location = new System.Drawing.Point(5, 439); this.labelSubtitleFontSize.Name = "labelSubtitleFontSize"; this.labelSubtitleFontSize.Size = new System.Drawing.Size(84, 13); this.labelSubtitleFontSize.TabIndex = 6; @@ -239,7 +242,7 @@ "130", "140", "150"}); - this.comboBoxSubtitleFontSize.Location = new System.Drawing.Point(114, 494); + this.comboBoxSubtitleFontSize.Location = new System.Drawing.Point(114, 436); this.comboBoxSubtitleFontSize.Name = "comboBoxSubtitleFontSize"; this.comboBoxSubtitleFontSize.Size = new System.Drawing.Size(121, 21); this.comboBoxSubtitleFontSize.TabIndex = 7; @@ -271,7 +274,7 @@ this.buttonTrain.Name = "buttonTrain"; this.buttonTrain.Size = new System.Drawing.Size(128, 23); this.buttonTrain.TabIndex = 23; - this.buttonTrain.Text = "Start train"; + this.buttonTrain.Text = "Start trainining"; this.buttonTrain.UseVisualStyleBackColor = true; this.buttonTrain.Click += new System.EventHandler(this.buttonTrain_Click); // @@ -280,6 +283,7 @@ this.groupBox1.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) | System.Windows.Forms.AnchorStyles.Left) | System.Windows.Forms.AnchorStyles.Right))); + this.groupBox1.Controls.Add(this.checkBoxItalic); this.groupBox1.Controls.Add(this.button1); this.groupBox1.Controls.Add(this.comboBoxFontSizeEnd); this.groupBox1.Controls.Add(this.label4); @@ -288,9 +292,9 @@ this.groupBox1.Controls.Add(this.labelSubtitleFontSize); this.groupBox1.Controls.Add(this.comboBoxSubtitleFontSize); this.groupBox1.Controls.Add(this.labelSubtitleFont); - this.groupBox1.Location = new System.Drawing.Point(12, 91); + this.groupBox1.Location = new System.Drawing.Point(12, 149); this.groupBox1.Name = "groupBox1"; - this.groupBox1.Size = new System.Drawing.Size(735, 547); + this.groupBox1.Size = new System.Drawing.Size(735, 489); this.groupBox1.TabIndex = 20; this.groupBox1.TabStop = false; this.groupBox1.Text = "Training options"; @@ -298,7 +302,7 @@ // button1 // this.button1.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); - this.button1.Location = new System.Drawing.Point(556, 487); + this.button1.Location = new System.Drawing.Point(556, 429); this.button1.Name = "button1"; this.button1.Size = new System.Drawing.Size(164, 23); this.button1.TabIndex = 27; @@ -403,7 +407,7 @@ "130", "140", "150"}); - this.comboBoxFontSizeEnd.Location = new System.Drawing.Point(320, 494); + this.comboBoxFontSizeEnd.Location = new System.Drawing.Point(320, 436); this.comboBoxFontSizeEnd.Name = "comboBoxFontSizeEnd"; this.comboBoxFontSizeEnd.Size = new System.Drawing.Size(85, 21); this.comboBoxFontSizeEnd.TabIndex = 21; @@ -412,25 +416,55 @@ // this.label4.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left))); this.label4.AutoSize = true; - this.label4.Location = new System.Drawing.Point(256, 497); + this.label4.Location = new System.Drawing.Point(256, 439); this.label4.Name = "label4"; this.label4.Size = new System.Drawing.Size(58, 13); this.label4.TabIndex = 20; this.label4.Text = "to font size"; // - // groupBox2 + // groupBoxInput // - this.groupBox2.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) + this.groupBoxInput.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) | System.Windows.Forms.AnchorStyles.Right))); - this.groupBox2.Controls.Add(this.buttonInputChoose); - this.groupBox2.Controls.Add(this.label1); - this.groupBox2.Controls.Add(this.textBoxInputFile); - this.groupBox2.Location = new System.Drawing.Point(12, 12); - this.groupBox2.Name = "groupBox2"; - this.groupBox2.Size = new System.Drawing.Size(735, 73); - this.groupBox2.TabIndex = 21; - this.groupBox2.TabStop = false; - this.groupBox2.Text = "Input file"; + this.groupBoxInput.Controls.Add(this.label2); + this.groupBoxInput.Controls.Add(this.textBoxMerged); + this.groupBoxInput.Controls.Add(this.buttonInputChoose); + this.groupBoxInput.Controls.Add(this.label1); + this.groupBoxInput.Controls.Add(this.textBoxInputFile); + this.groupBoxInput.Location = new System.Drawing.Point(12, 12); + this.groupBoxInput.Name = "groupBoxInput"; + this.groupBoxInput.Size = new System.Drawing.Size(735, 131); + this.groupBoxInput.TabIndex = 21; + this.groupBoxInput.TabStop = false; + this.groupBoxInput.Text = "Input file"; + // + // label2 + // + this.label2.AutoSize = true; + this.label2.Location = new System.Drawing.Point(5, 82); + this.label2.Name = "label2"; + this.label2.Size = new System.Drawing.Size(201, 13); + this.label2.TabIndex = 11; + this.label2.Text = "Letter combinations that might be merged"; + // + // textBoxMerged + // + this.textBoxMerged.Location = new System.Drawing.Point(7, 98); + this.textBoxMerged.Name = "textBoxMerged"; + this.textBoxMerged.Size = new System.Drawing.Size(469, 20); + this.textBoxMerged.TabIndex = 12; + this.textBoxMerged.Text = "ff ft fi fj fy fl rf rt rv rw ry rt ryt tt TV tw yt yw"; + // + // checkBoxItalic + // + this.checkBoxItalic.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left))); + this.checkBoxItalic.AutoSize = true; + this.checkBoxItalic.Location = new System.Drawing.Point(114, 466); + this.checkBoxItalic.Name = "checkBoxItalic"; + this.checkBoxItalic.Size = new System.Drawing.Size(93, 17); + this.checkBoxItalic.TabIndex = 28; + this.checkBoxItalic.Text = "Also train italic"; + this.checkBoxItalic.UseVisualStyleBackColor = true; // // BinaryOcrTrain // @@ -441,7 +475,7 @@ this.Controls.Add(this.labelInfo); this.Controls.Add(this.buttonTrain); this.Controls.Add(this.groupBox1); - this.Controls.Add(this.groupBox2); + this.Controls.Add(this.groupBoxInput); this.MinimumSize = new System.Drawing.Size(770, 600); this.Name = "BinaryOcrTrain"; this.ShowIcon = false; @@ -452,8 +486,8 @@ this.Shown += new System.EventHandler(this.BinaryOcrTrain_Shown); this.groupBox1.ResumeLayout(false); this.groupBox1.PerformLayout(); - this.groupBox2.ResumeLayout(false); - this.groupBox2.PerformLayout(); + this.groupBoxInput.ResumeLayout(false); + this.groupBoxInput.PerformLayout(); this.ResumeLayout(false); this.PerformLayout(); @@ -475,11 +509,14 @@ private System.Windows.Forms.Label labelInfo; private System.Windows.Forms.Button buttonTrain; private System.Windows.Forms.GroupBox groupBox1; - private System.Windows.Forms.GroupBox groupBox2; + private System.Windows.Forms.GroupBox groupBoxInput; private System.Windows.Forms.FolderBrowserDialog folderBrowserDialog1; private System.Windows.Forms.ComboBox comboBoxFontSizeEnd; private System.Windows.Forms.Label label4; private System.Windows.Forms.SaveFileDialog saveFileDialog1; private System.Windows.Forms.Button button1; + private System.Windows.Forms.Label label2; + private System.Windows.Forms.TextBox textBoxMerged; + private System.Windows.Forms.CheckBox checkBoxItalic; } } \ No newline at end of file diff --git a/src/Forms/Ocr/BinaryOcrTrain.cs b/src/Forms/Ocr/BinaryOcrTrain.cs index 5790d6cad..521465b5b 100644 --- a/src/Forms/Ocr/BinaryOcrTrain.cs +++ b/src/Forms/Ocr/BinaryOcrTrain.cs @@ -25,7 +25,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private float _subtitleFontSize = 25.0f; private readonly Color _borderColor = Color.Black; private const float BorderWidth = 2.0f; - private bool _abort = false; + private bool _abort; public BinaryOcrTrain() { @@ -49,9 +49,9 @@ namespace Nikse.SubtitleEdit.Forms.Ocr textBoxInputFile.Text = Configuration.Settings.Tools.OcrTrainSrtFile; } - private void TrainLetter(ref int numberOfCharactersLeaned, ref int numberOfCharactersSkipped, BinaryOcrDb db, List charactersLearned, string s, bool bold, bool italic) + private void TrainLetter(ref int numberOfCharactersLeaned, ref int numberOfCharactersSkipped, BinaryOcrDb db, string s, bool bold, bool italic, bool doubleLetter) { - Bitmap bmp = GenerateImageFromTextWithStyle("H " + s, bold, italic); + var bmp = GenerateImageFromTextWithStyle("H " + s, bold, italic); var nbmp = new NikseBitmap(bmp); nbmp.MakeTwoColor(280); var list = NikseBitmapImageSplitter.SplitBitmapToLettersNew(nbmp, 10, false, false, 25); @@ -63,7 +63,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr if (match < 0) { db.Add(bob); - charactersLearned.Add(s); numberOfCharactersLeaned++; bmp.Dispose(); } @@ -183,12 +182,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void buttonTrain_Click(object sender, EventArgs e) { - if (buttonTrain.Text == "Abort") + if (buttonTrain.Text == Configuration.Settings.Language.SpellCheck.Abort) { _abort = true; return; } + _abort = false; + buttonTrain.Text = Configuration.Settings.Language.SpellCheck.Abort; + buttonOK.Enabled = false; + saveFileDialog1.DefaultExt = ".db"; saveFileDialog1.Filter = "*Binary Image Compare DB files|*.db"; if (saveFileDialog1.ShowDialog(this) != DialogResult.OK) @@ -196,7 +199,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr return; } - buttonTrain.Text = "Abort"; + buttonTrain.Text = Configuration.Settings.Language.SpellCheck.Abort; var startFontSize = Convert.ToInt32(comboBoxSubtitleFontSize.Items[comboBoxSubtitleFontSize.SelectedIndex].ToString()); var endFontSize = Convert.ToInt32(comboBoxFontSizeEnd.Items[comboBoxFontSizeEnd.SelectedIndex].ToString()); @@ -236,16 +239,41 @@ namespace Nikse.SubtitleEdit.Forms.Ocr var s = ch.ToString(); if (!charactersLearned.Contains(s)) { - TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, bicDb, charactersLearned, s, false, false); - TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, bicDb, charactersLearned, s, false, true); + charactersLearned.Add(s); + TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, bicDb, s, false, false, false); if (checkBoxBold.Checked) { - TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, bicDb, charactersLearned, s, true, false); + TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, bicDb, s, true, false, false); + } + if (checkBoxItalic.Checked) + { + TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, bicDb, s, false, true, false); } } - labelInfo.Text = string.Format("Now training font '{1}', total characters learned is {0}", numberOfCharactersLeaned, _subtitleFontName); + labelInfo.Text = string.Format("Now training font '{1}', total characters learned is {0:#,###,##0}, {2:#,###,##0} skipped", numberOfCharactersLeaned, _subtitleFontName, numberOfCharactersSkipped); } } + + foreach (var text in textBoxMerged.Text.Split(' ')) + { + if (!string.IsNullOrWhiteSpace(text)) + { + if (!charactersLearned.Contains(text) && text.Length > 1 && text.Length <= 3) + { + charactersLearned.Add(text); + TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, bicDb, text, false, false, true); + if (checkBoxBold.Checked) + { + TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, bicDb, text, true, false, true); + } + if (checkBoxItalic.Checked) + { + TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, bicDb, text, false, true, true); + } + } + } + } + if (_abort) { break; @@ -271,6 +299,9 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { labelInfo.Text = "Training completed and saved in " + saveFileDialog1.FileName; } + buttonOK.Enabled = true; + buttonTrain.Text = "Start training"; + _abort = false; } private void buttonOK_Click(object sender, EventArgs e) @@ -326,12 +357,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr for (_subtitleFontSize = 20; _subtitleFontSize <= 100; _subtitleFontSize++) { - var charactersLearned = new List(); if (!string.IsNullOrEmpty(_autoDetectFontText)) { var s = _autoDetectFontText; var bicDb = new BinaryOcrDb(null); - TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, bicDb, charactersLearned, s, false, false); + TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, bicDb, s, false, false, false); if (bicDb.FindExactMatch(_autoDetectFontBob) >= 0) { AutoDetectedFonts.Add(_subtitleFontName + " " + _subtitleFontSize); @@ -356,7 +386,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } smallestDifference = dif; - hit = compareItem; if (dif < 3) { break; // foreach ending @@ -373,7 +402,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } bicDb = new BinaryOcrDb(null); - TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, bicDb, charactersLearned, s, false, true); + TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, bicDb, s, false, true, false); if (bicDb.FindExactMatch(_autoDetectFontBob) >= 0) { AutoDetectedFonts.Add(_subtitleFontName + " " + _subtitleFontSize + " italic"); @@ -382,7 +411,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr if (checkBoxBold.Checked) { bicDb = new BinaryOcrDb(null); - TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, bicDb, charactersLearned, s, true, false); + TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, bicDb, s, true, false, false); if (bicDb.FindExactMatch(_autoDetectFontBob) >= 0) { AutoDetectedFonts.Add(_subtitleFontName + " " + _subtitleFontSize + " bold"); diff --git a/src/Forms/Ocr/VobSubNOcrTrain.Designer.cs b/src/Forms/Ocr/VobSubNOcrTrain.Designer.cs index 50d3a59c7..55b2beabe 100644 --- a/src/Forms/Ocr/VobSubNOcrTrain.Designer.cs +++ b/src/Forms/Ocr/VobSubNOcrTrain.Designer.cs @@ -42,7 +42,7 @@ this.label1 = new System.Windows.Forms.Label(); this.textBoxInputFile = new System.Windows.Forms.TextBox(); this.buttonInputChoose = new System.Windows.Forms.Button(); - this.groupBox2 = new System.Windows.Forms.GroupBox(); + this.groupBoxInput = new System.Windows.Forms.GroupBox(); this.label2 = new System.Windows.Forms.Label(); this.textBoxMerged = new System.Windows.Forms.TextBox(); this.openFileDialog1 = new System.Windows.Forms.OpenFileDialog(); @@ -52,7 +52,7 @@ this.saveFileDialog1 = new System.Windows.Forms.SaveFileDialog(); this.groupBox1.SuspendLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownSegmentsPerCharacter)).BeginInit(); - this.groupBox2.SuspendLayout(); + this.groupBoxInput.SuspendLayout(); this.SuspendLayout(); // // groupBox1 @@ -312,19 +312,19 @@ this.buttonInputChoose.UseVisualStyleBackColor = true; this.buttonInputChoose.Click += new System.EventHandler(this.buttonInputChoose_Click); // - // groupBox2 + // groupBoxInput // - this.groupBox2.Controls.Add(this.label2); - this.groupBox2.Controls.Add(this.textBoxMerged); - this.groupBox2.Controls.Add(this.buttonInputChoose); - this.groupBox2.Controls.Add(this.label1); - this.groupBox2.Controls.Add(this.textBoxInputFile); - this.groupBox2.Location = new System.Drawing.Point(12, 13); - this.groupBox2.Name = "groupBox2"; - this.groupBox2.Size = new System.Drawing.Size(493, 134); - this.groupBox2.TabIndex = 0; - this.groupBox2.TabStop = false; - this.groupBox2.Text = "Input file"; + this.groupBoxInput.Controls.Add(this.label2); + this.groupBoxInput.Controls.Add(this.textBoxMerged); + this.groupBoxInput.Controls.Add(this.buttonInputChoose); + this.groupBoxInput.Controls.Add(this.label1); + this.groupBoxInput.Controls.Add(this.textBoxInputFile); + this.groupBoxInput.Location = new System.Drawing.Point(12, 13); + this.groupBoxInput.Name = "groupBoxInput"; + this.groupBoxInput.Size = new System.Drawing.Size(493, 134); + this.groupBoxInput.TabIndex = 0; + this.groupBoxInput.TabStop = false; + this.groupBoxInput.Text = "Input file"; // // label2 // @@ -388,7 +388,7 @@ this.Controls.Add(this.labelInfo); this.Controls.Add(this.buttonOK); this.Controls.Add(this.buttonTrain); - this.Controls.Add(this.groupBox2); + this.Controls.Add(this.groupBoxInput); this.Controls.Add(this.groupBox1); this.KeyPreview = true; this.MinimumSize = new System.Drawing.Size(519, 600); @@ -401,8 +401,8 @@ this.groupBox1.ResumeLayout(false); this.groupBox1.PerformLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownSegmentsPerCharacter)).EndInit(); - this.groupBox2.ResumeLayout(false); - this.groupBox2.PerformLayout(); + this.groupBoxInput.ResumeLayout(false); + this.groupBoxInput.PerformLayout(); this.ResumeLayout(false); this.PerformLayout(); @@ -421,7 +421,7 @@ private System.Windows.Forms.ListView listViewFonts; private System.Windows.Forms.Label label3; private System.Windows.Forms.NumericUpDown numericUpDownSegmentsPerCharacter; - private System.Windows.Forms.GroupBox groupBox2; + private System.Windows.Forms.GroupBox groupBoxInput; private System.Windows.Forms.OpenFileDialog openFileDialog1; private System.Windows.Forms.Button buttonTrain; private System.Windows.Forms.Button buttonOK; diff --git a/src/Forms/Ocr/VobSubNOcrTrain.cs b/src/Forms/Ocr/VobSubNOcrTrain.cs index 490bdaafb..399dc54ee 100644 --- a/src/Forms/Ocr/VobSubNOcrTrain.cs +++ b/src/Forms/Ocr/VobSubNOcrTrain.cs @@ -142,7 +142,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, text, true, false, true); } - if (checkBoxBold.Checked) + if (checkBoxItalic.Checked) { TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, text, false, true, true); }