diff --git a/src/Forms/Main.cs b/src/Forms/Main.cs index 73bdc8294..03b3952fb 100644 --- a/src/Forms/Main.cs +++ b/src/Forms/Main.cs @@ -2386,48 +2386,69 @@ namespace Nikse.SubtitleEdit.Forms if (format == null) { - var bdnXml = new BdnXml(); - string[] arr = File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)); - var list = new List(); - foreach (string l in arr) - list.Add(l); - if (bdnXml.IsMine(list, fileName)) + try { - if (ContinueNewOrExit()) + var bdnXml = new BdnXml(); + string[] arr = File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)); + var list = new List(); + foreach (string l in arr) + list.Add(l); + if (bdnXml.IsMine(list, fileName)) { - ImportAndOcrBdnXml(fileName, bdnXml, list); + if (ContinueNewOrExit()) + { + ImportAndOcrBdnXml(fileName, bdnXml, list); + } + return; } - return; + } + catch + { + format = null; } } if (fileName.ToLower().EndsWith(".dost")) { - var dost = new Dost(); - string[] arr = File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)); - var list = new List(); - foreach (string l in arr) - list.Add(l); - if (dost.IsMine(list, fileName)) + try { - if (ContinueNewOrExit()) - ImportAndOcrDost(fileName, dost, list); - return; + var dost = new Dost(); + string[] arr = File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)); + var list = new List(); + foreach (string l in arr) + list.Add(l); + if (dost.IsMine(list, fileName)) + { + if (ContinueNewOrExit()) + ImportAndOcrDost(fileName, dost, list); + return; + } + } + catch + { + format = null; } } if (format == null || format.Name == new Scenarist().Name) { - var son = new Son(); - string[] arr = File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)); - var list = new List(); - foreach (string l in arr) - list.Add(l); - if (son.IsMine(list, fileName)) + try { - if (ContinueNewOrExit()) - ImportAndOcrSon(fileName, son, list); - return; + var son = new Son(); + string[] arr = File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)); + var list = new List(); + foreach (string l in arr) + list.Add(l); + if (son.IsMine(list, fileName)) + { + if (ContinueNewOrExit()) + ImportAndOcrSon(fileName, son, list); + return; + } + } + catch + { + format = null; } } @@ -2456,35 +2477,49 @@ namespace Nikse.SubtitleEdit.Forms if (format == null || format.Name == new Scenarist().Name) { - var sst = new SonicScenaristBitmaps(); - string[] arr = File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)); - var list = new List(); - foreach (string l in arr) - list.Add(l); - if (sst.IsMine(list, fileName)) + try { - if (ContinueNewOrExit()) - ImportAndOcrSst(fileName, sst, list); - return; + var sst = new SonicScenaristBitmaps(); + string[] arr = File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)); + var list = new List(); + foreach (string l in arr) + list.Add(l); + if (sst.IsMine(list, fileName)) + { + if (ContinueNewOrExit()) + ImportAndOcrSst(fileName, sst, list); + return; + } + } + catch + { + format = null; } } if (format == null) { - var htmlSamiArray = new HtmlSamiArray(); - string[] arr = File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)); - var list = new List(); - foreach (string l in arr) - list.Add(l); - if (htmlSamiArray.IsMine(list, fileName)) + try { - htmlSamiArray.LoadSubtitle(_subtitle, list, fileName); - _oldSubtitleFormat = htmlSamiArray; - SetCurrentFormat(Configuration.Settings.General.DefaultSubtitleFormat); - SetEncoding(Configuration.Settings.General.DefaultEncoding); - encoding = GetCurrentEncoding(); - justConverted = true; - format = GetCurrentSubtitleFormat(); + var htmlSamiArray = new HtmlSamiArray(); + string[] arr = File.ReadAllLines(fileName, Utilities.GetEncodingFromFile(fileName)); + var list = new List(); + foreach (string l in arr) + list.Add(l); + if (htmlSamiArray.IsMine(list, fileName)) + { + htmlSamiArray.LoadSubtitle(_subtitle, list, fileName); + _oldSubtitleFormat = htmlSamiArray; + SetCurrentFormat(Configuration.Settings.General.DefaultSubtitleFormat); + SetEncoding(Configuration.Settings.General.DefaultEncoding); + encoding = GetCurrentEncoding(); + justConverted = true; + format = GetCurrentSubtitleFormat(); + } + } + catch + { + format = null; } } diff --git a/src/Forms/VobSubOcr.Designer.cs b/src/Forms/VobSubOcr.Designer.cs index 4c47ab231..fe1972657 100644 --- a/src/Forms/VobSubOcr.Designer.cs +++ b/src/Forms/VobSubOcr.Designer.cs @@ -58,6 +58,17 @@ namespace Nikse.SubtitleEdit.Forms this.buttonOK = new System.Windows.Forms.Button(); this.buttonCancel = new System.Windows.Forms.Button(); this.groupBoxOcrMethod = new System.Windows.Forms.GroupBox(); + this.comboBoxOcrMethod = new System.Windows.Forms.ComboBox(); + this.groupBoxImageCompareMethod = new System.Windows.Forms.GroupBox(); + this.labelMaxErrorPercent = new System.Windows.Forms.Label(); + this.numericUpDownMaxErrorPct = new System.Windows.Forms.NumericUpDown(); + this.checkBoxRightToLeft = new System.Windows.Forms.CheckBox(); + this.numericUpDownPixelsIsSpace = new System.Windows.Forms.NumericUpDown(); + this.buttonEditCharacterDatabase = new System.Windows.Forms.Button(); + this.labelNoOfPixelsIsSpace = new System.Windows.Forms.Label(); + this.comboBoxCharacterDatabase = new System.Windows.Forms.ComboBox(); + this.labelImageDatabase = new System.Windows.Forms.Label(); + this.buttonNewCharacterDatabase = new System.Windows.Forms.Button(); this.GroupBoxTesseractMethod = new System.Windows.Forms.GroupBox(); this.buttonGetTesseractDictionaries = new System.Windows.Forms.Button(); this.checkBoxTesseractMusicOn = new System.Windows.Forms.CheckBox(); @@ -65,7 +76,6 @@ namespace Nikse.SubtitleEdit.Forms this.checkBoxUseModiInTesseractForUnknownWords = new System.Windows.Forms.CheckBox(); this.labelTesseractLanguage = new System.Windows.Forms.Label(); this.comboBoxTesseractLanguages = new System.Windows.Forms.ComboBox(); - this.comboBoxOcrMethod = new System.Windows.Forms.ComboBox(); this.groupBoxModiMethod = new System.Windows.Forms.GroupBox(); this.label1 = new System.Windows.Forms.Label(); this.comboBoxModiLanguage = new System.Windows.Forms.ComboBox(); @@ -79,14 +89,6 @@ namespace Nikse.SubtitleEdit.Forms this.checkBoxRightToLeftNOCR = new System.Windows.Forms.CheckBox(); this.numericUpDownNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.NumericUpDown(); this.labelNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.Label(); - this.groupBoxImageCompareMethod = new System.Windows.Forms.GroupBox(); - this.checkBoxRightToLeft = new System.Windows.Forms.CheckBox(); - this.numericUpDownPixelsIsSpace = new System.Windows.Forms.NumericUpDown(); - this.buttonEditCharacterDatabase = new System.Windows.Forms.Button(); - this.labelNoOfPixelsIsSpace = new System.Windows.Forms.Label(); - this.comboBoxCharacterDatabase = new System.Windows.Forms.ComboBox(); - this.labelImageDatabase = new System.Windows.Forms.Label(); - this.buttonNewCharacterDatabase = new System.Windows.Forms.Button(); this.groupBoxOCRControls = new System.Windows.Forms.GroupBox(); this.labelStartFrom = new System.Windows.Forms.Label(); this.numericUpDownStartNumber = new System.Windows.Forms.NumericUpDown(); @@ -138,19 +140,18 @@ namespace Nikse.SubtitleEdit.Forms this.folderBrowserDialog1 = new System.Windows.Forms.FolderBrowserDialog(); this.openFileDialog1 = new System.Windows.Forms.OpenFileDialog(); this.splitContainerBottom = new System.Windows.Forms.SplitContainer(); - this.timerHideStatus = new System.Windows.Forms.Timer(this.components); - this.labelMaxErrorPercent = new System.Windows.Forms.Label(); - this.numericUpDownMaxErrorPct = new System.Windows.Forms.NumericUpDown(); this.textBoxCurrentText = new Nikse.SubtitleEdit.Controls.SETextBox(); this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView(); + this.timerHideStatus = new System.Windows.Forms.Timer(this.components); this.contextMenuStripListview.SuspendLayout(); this.groupBoxOcrMethod.SuspendLayout(); + this.groupBoxImageCompareMethod.SuspendLayout(); + ((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).BeginInit(); + ((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).BeginInit(); this.GroupBoxTesseractMethod.SuspendLayout(); this.groupBoxModiMethod.SuspendLayout(); this.groupBoxNOCR.SuspendLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).BeginInit(); - this.groupBoxImageCompareMethod.SuspendLayout(); - ((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).BeginInit(); this.groupBoxOCRControls.SuspendLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).BeginInit(); this.groupBoxOcrAutoFix.SuspendLayout(); @@ -171,7 +172,6 @@ namespace Nikse.SubtitleEdit.Forms this.splitContainerBottom.Panel1.SuspendLayout(); this.splitContainerBottom.Panel2.SuspendLayout(); this.splitContainerBottom.SuspendLayout(); - ((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).BeginInit(); this.SuspendLayout(); // // contextMenuStripListview @@ -386,10 +386,10 @@ namespace Nikse.SubtitleEdit.Forms // groupBoxOcrMethod // this.groupBoxOcrMethod.Controls.Add(this.comboBoxOcrMethod); - this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod); this.groupBoxOcrMethod.Controls.Add(this.GroupBoxTesseractMethod); this.groupBoxOcrMethod.Controls.Add(this.groupBoxModiMethod); this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR); + this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod); this.groupBoxOcrMethod.Location = new System.Drawing.Point(13, 5); this.groupBoxOcrMethod.Name = "groupBoxOcrMethod"; this.groupBoxOcrMethod.Size = new System.Drawing.Size(392, 192); @@ -397,6 +397,156 @@ namespace Nikse.SubtitleEdit.Forms this.groupBoxOcrMethod.TabStop = false; this.groupBoxOcrMethod.Text = "OCR method"; // + // comboBoxOcrMethod + // + this.comboBoxOcrMethod.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboBoxOcrMethod.FormattingEnabled = true; + this.comboBoxOcrMethod.Items.AddRange(new object[] { + "OCR via tesseract", + "OCR via image compare", + "OCR via Microsoftr MODI", + "OCR via nOCR"}); + this.comboBoxOcrMethod.Location = new System.Drawing.Point(13, 20); + this.comboBoxOcrMethod.Name = "comboBoxOcrMethod"; + this.comboBoxOcrMethod.Size = new System.Drawing.Size(366, 21); + this.comboBoxOcrMethod.TabIndex = 0; + this.comboBoxOcrMethod.SelectedIndexChanged += new System.EventHandler(this.ComboBoxOcrMethodSelectedIndexChanged); + // + // groupBoxImageCompareMethod + // + this.groupBoxImageCompareMethod.Controls.Add(this.labelMaxErrorPercent); + this.groupBoxImageCompareMethod.Controls.Add(this.numericUpDownMaxErrorPct); + this.groupBoxImageCompareMethod.Controls.Add(this.checkBoxRightToLeft); + this.groupBoxImageCompareMethod.Controls.Add(this.numericUpDownPixelsIsSpace); + this.groupBoxImageCompareMethod.Controls.Add(this.buttonEditCharacterDatabase); + this.groupBoxImageCompareMethod.Controls.Add(this.labelNoOfPixelsIsSpace); + this.groupBoxImageCompareMethod.Controls.Add(this.comboBoxCharacterDatabase); + this.groupBoxImageCompareMethod.Controls.Add(this.labelImageDatabase); + this.groupBoxImageCompareMethod.Controls.Add(this.buttonNewCharacterDatabase); + this.groupBoxImageCompareMethod.Location = new System.Drawing.Point(13, 38); + this.groupBoxImageCompareMethod.Name = "groupBoxImageCompareMethod"; + this.groupBoxImageCompareMethod.Size = new System.Drawing.Size(366, 131); + this.groupBoxImageCompareMethod.TabIndex = 2; + this.groupBoxImageCompareMethod.TabStop = false; + this.groupBoxImageCompareMethod.Text = "Image compare"; + // + // labelMaxErrorPercent + // + this.labelMaxErrorPercent.AutoSize = true; + this.labelMaxErrorPercent.Location = new System.Drawing.Point(7, 83); + this.labelMaxErrorPercent.Name = "labelMaxErrorPercent"; + this.labelMaxErrorPercent.Size = new System.Drawing.Size(55, 13); + this.labelMaxErrorPercent.TabIndex = 45; + this.labelMaxErrorPercent.Text = "Max Err%"; + // + // numericUpDownMaxErrorPct + // + this.numericUpDownMaxErrorPct.DecimalPlaces = 1; + this.numericUpDownMaxErrorPct.Increment = new decimal(new int[] { + 1, + 0, + 0, + 65536}); + this.numericUpDownMaxErrorPct.Location = new System.Drawing.Point(173, 81); + this.numericUpDownMaxErrorPct.Maximum = new decimal(new int[] { + 20, + 0, + 0, + 0}); + this.numericUpDownMaxErrorPct.Minimum = new decimal(new int[] { + 5, + 0, + 0, + 65536}); + this.numericUpDownMaxErrorPct.Name = "numericUpDownMaxErrorPct"; + this.numericUpDownMaxErrorPct.Size = new System.Drawing.Size(50, 21); + this.numericUpDownMaxErrorPct.TabIndex = 44; + this.numericUpDownMaxErrorPct.Value = new decimal(new int[] { + 10, + 0, + 0, + 65536}); + // + // checkBoxRightToLeft + // + this.checkBoxRightToLeft.AutoSize = true; + this.checkBoxRightToLeft.Location = new System.Drawing.Point(9, 107); + this.checkBoxRightToLeft.Name = "checkBoxRightToLeft"; + this.checkBoxRightToLeft.Size = new System.Drawing.Size(83, 17); + this.checkBoxRightToLeft.TabIndex = 6; + this.checkBoxRightToLeft.Text = "Right to left"; + this.checkBoxRightToLeft.UseVisualStyleBackColor = true; + // + // numericUpDownPixelsIsSpace + // + this.numericUpDownPixelsIsSpace.Location = new System.Drawing.Point(173, 54); + this.numericUpDownPixelsIsSpace.Maximum = new decimal(new int[] { + 50, + 0, + 0, + 0}); + this.numericUpDownPixelsIsSpace.Minimum = new decimal(new int[] { + 1, + 0, + 0, + 0}); + this.numericUpDownPixelsIsSpace.Name = "numericUpDownPixelsIsSpace"; + this.numericUpDownPixelsIsSpace.Size = new System.Drawing.Size(50, 21); + this.numericUpDownPixelsIsSpace.TabIndex = 5; + this.numericUpDownPixelsIsSpace.Value = new decimal(new int[] { + 1, + 0, + 0, + 0}); + // + // buttonEditCharacterDatabase + // + this.buttonEditCharacterDatabase.Location = new System.Drawing.Point(278, 46); + this.buttonEditCharacterDatabase.Name = "buttonEditCharacterDatabase"; + this.buttonEditCharacterDatabase.Size = new System.Drawing.Size(68, 21); + this.buttonEditCharacterDatabase.TabIndex = 3; + this.buttonEditCharacterDatabase.Text = "Edit"; + this.buttonEditCharacterDatabase.UseVisualStyleBackColor = true; + this.buttonEditCharacterDatabase.Click += new System.EventHandler(this.ButtonEditCharacterDatabaseClick); + // + // labelNoOfPixelsIsSpace + // + this.labelNoOfPixelsIsSpace.AutoSize = true; + this.labelNoOfPixelsIsSpace.Location = new System.Drawing.Point(6, 56); + this.labelNoOfPixelsIsSpace.Name = "labelNoOfPixelsIsSpace"; + this.labelNoOfPixelsIsSpace.Size = new System.Drawing.Size(104, 13); + this.labelNoOfPixelsIsSpace.TabIndex = 4; + this.labelNoOfPixelsIsSpace.Text = "No of pixels is space"; + // + // comboBoxCharacterDatabase + // + this.comboBoxCharacterDatabase.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboBoxCharacterDatabase.FormattingEnabled = true; + this.comboBoxCharacterDatabase.Location = new System.Drawing.Point(130, 19); + this.comboBoxCharacterDatabase.Name = "comboBoxCharacterDatabase"; + this.comboBoxCharacterDatabase.Size = new System.Drawing.Size(142, 21); + this.comboBoxCharacterDatabase.TabIndex = 1; + this.comboBoxCharacterDatabase.SelectedIndexChanged += new System.EventHandler(this.ComboBoxCharacterDatabaseSelectedIndexChanged); + // + // labelImageDatabase + // + this.labelImageDatabase.AutoSize = true; + this.labelImageDatabase.Location = new System.Drawing.Point(7, 22); + this.labelImageDatabase.Name = "labelImageDatabase"; + this.labelImageDatabase.Size = new System.Drawing.Size(85, 13); + this.labelImageDatabase.TabIndex = 0; + this.labelImageDatabase.Text = "Image database"; + // + // buttonNewCharacterDatabase + // + this.buttonNewCharacterDatabase.Location = new System.Drawing.Point(278, 19); + this.buttonNewCharacterDatabase.Name = "buttonNewCharacterDatabase"; + this.buttonNewCharacterDatabase.Size = new System.Drawing.Size(68, 21); + this.buttonNewCharacterDatabase.TabIndex = 2; + this.buttonNewCharacterDatabase.Text = "New"; + this.buttonNewCharacterDatabase.UseVisualStyleBackColor = true; + this.buttonNewCharacterDatabase.Click += new System.EventHandler(this.ButtonNewCharacterDatabaseClick); + // // GroupBoxTesseractMethod // this.GroupBoxTesseractMethod.Controls.Add(this.buttonGetTesseractDictionaries); @@ -478,21 +628,6 @@ namespace Nikse.SubtitleEdit.Forms this.comboBoxTesseractLanguages.TabIndex = 1; this.comboBoxTesseractLanguages.SelectedIndexChanged += new System.EventHandler(this.ComboBoxTesseractLanguagesSelectedIndexChanged); // - // comboBoxOcrMethod - // - this.comboBoxOcrMethod.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; - this.comboBoxOcrMethod.FormattingEnabled = true; - this.comboBoxOcrMethod.Items.AddRange(new object[] { - "OCR via tesseract", - "OCR via image compare", - "OCR via Microsoftr MODI", - "OCR via nOCR"}); - this.comboBoxOcrMethod.Location = new System.Drawing.Point(13, 20); - this.comboBoxOcrMethod.Name = "comboBoxOcrMethod"; - this.comboBoxOcrMethod.Size = new System.Drawing.Size(366, 21); - this.comboBoxOcrMethod.TabIndex = 0; - this.comboBoxOcrMethod.SelectedIndexChanged += new System.EventHandler(this.ComboBoxOcrMethodSelectedIndexChanged); - // // groupBoxModiMethod // this.groupBoxModiMethod.Controls.Add(this.label1); @@ -641,104 +776,6 @@ namespace Nikse.SubtitleEdit.Forms this.labelNumberOfPixelsIsSpaceNOCR.TabIndex = 4; this.labelNumberOfPixelsIsSpaceNOCR.Text = "No of pixels is space"; // - // groupBoxImageCompareMethod - // - this.groupBoxImageCompareMethod.Controls.Add(this.labelMaxErrorPercent); - this.groupBoxImageCompareMethod.Controls.Add(this.numericUpDownMaxErrorPct); - this.groupBoxImageCompareMethod.Controls.Add(this.checkBoxRightToLeft); - this.groupBoxImageCompareMethod.Controls.Add(this.numericUpDownPixelsIsSpace); - this.groupBoxImageCompareMethod.Controls.Add(this.buttonEditCharacterDatabase); - this.groupBoxImageCompareMethod.Controls.Add(this.labelNoOfPixelsIsSpace); - this.groupBoxImageCompareMethod.Controls.Add(this.comboBoxCharacterDatabase); - this.groupBoxImageCompareMethod.Controls.Add(this.labelImageDatabase); - this.groupBoxImageCompareMethod.Controls.Add(this.buttonNewCharacterDatabase); - this.groupBoxImageCompareMethod.Location = new System.Drawing.Point(13, 38); - this.groupBoxImageCompareMethod.Name = "groupBoxImageCompareMethod"; - this.groupBoxImageCompareMethod.Size = new System.Drawing.Size(366, 131); - this.groupBoxImageCompareMethod.TabIndex = 2; - this.groupBoxImageCompareMethod.TabStop = false; - this.groupBoxImageCompareMethod.Text = "Image compare"; - // - // checkBoxRightToLeft - // - this.checkBoxRightToLeft.AutoSize = true; - this.checkBoxRightToLeft.Location = new System.Drawing.Point(9, 107); - this.checkBoxRightToLeft.Name = "checkBoxRightToLeft"; - this.checkBoxRightToLeft.Size = new System.Drawing.Size(83, 17); - this.checkBoxRightToLeft.TabIndex = 6; - this.checkBoxRightToLeft.Text = "Right to left"; - this.checkBoxRightToLeft.UseVisualStyleBackColor = true; - // - // numericUpDownPixelsIsSpace - // - this.numericUpDownPixelsIsSpace.Location = new System.Drawing.Point(173, 54); - this.numericUpDownPixelsIsSpace.Maximum = new decimal(new int[] { - 50, - 0, - 0, - 0}); - this.numericUpDownPixelsIsSpace.Minimum = new decimal(new int[] { - 1, - 0, - 0, - 0}); - this.numericUpDownPixelsIsSpace.Name = "numericUpDownPixelsIsSpace"; - this.numericUpDownPixelsIsSpace.Size = new System.Drawing.Size(50, 21); - this.numericUpDownPixelsIsSpace.TabIndex = 5; - this.numericUpDownPixelsIsSpace.Value = new decimal(new int[] { - 1, - 0, - 0, - 0}); - // - // buttonEditCharacterDatabase - // - this.buttonEditCharacterDatabase.Location = new System.Drawing.Point(278, 46); - this.buttonEditCharacterDatabase.Name = "buttonEditCharacterDatabase"; - this.buttonEditCharacterDatabase.Size = new System.Drawing.Size(68, 21); - this.buttonEditCharacterDatabase.TabIndex = 3; - this.buttonEditCharacterDatabase.Text = "Edit"; - this.buttonEditCharacterDatabase.UseVisualStyleBackColor = true; - this.buttonEditCharacterDatabase.Click += new System.EventHandler(this.ButtonEditCharacterDatabaseClick); - // - // labelNoOfPixelsIsSpace - // - this.labelNoOfPixelsIsSpace.AutoSize = true; - this.labelNoOfPixelsIsSpace.Location = new System.Drawing.Point(6, 56); - this.labelNoOfPixelsIsSpace.Name = "labelNoOfPixelsIsSpace"; - this.labelNoOfPixelsIsSpace.Size = new System.Drawing.Size(104, 13); - this.labelNoOfPixelsIsSpace.TabIndex = 4; - this.labelNoOfPixelsIsSpace.Text = "No of pixels is space"; - // - // comboBoxCharacterDatabase - // - this.comboBoxCharacterDatabase.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; - this.comboBoxCharacterDatabase.FormattingEnabled = true; - this.comboBoxCharacterDatabase.Location = new System.Drawing.Point(130, 19); - this.comboBoxCharacterDatabase.Name = "comboBoxCharacterDatabase"; - this.comboBoxCharacterDatabase.Size = new System.Drawing.Size(142, 21); - this.comboBoxCharacterDatabase.TabIndex = 1; - this.comboBoxCharacterDatabase.SelectedIndexChanged += new System.EventHandler(this.ComboBoxCharacterDatabaseSelectedIndexChanged); - // - // labelImageDatabase - // - this.labelImageDatabase.AutoSize = true; - this.labelImageDatabase.Location = new System.Drawing.Point(7, 22); - this.labelImageDatabase.Name = "labelImageDatabase"; - this.labelImageDatabase.Size = new System.Drawing.Size(85, 13); - this.labelImageDatabase.TabIndex = 0; - this.labelImageDatabase.Text = "Image database"; - // - // buttonNewCharacterDatabase - // - this.buttonNewCharacterDatabase.Location = new System.Drawing.Point(278, 19); - this.buttonNewCharacterDatabase.Name = "buttonNewCharacterDatabase"; - this.buttonNewCharacterDatabase.Size = new System.Drawing.Size(68, 21); - this.buttonNewCharacterDatabase.TabIndex = 2; - this.buttonNewCharacterDatabase.Text = "New"; - this.buttonNewCharacterDatabase.UseVisualStyleBackColor = true; - this.buttonNewCharacterDatabase.Click += new System.EventHandler(this.ButtonNewCharacterDatabaseClick); - // // groupBoxOCRControls // this.groupBoxOCRControls.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); @@ -1309,48 +1346,6 @@ namespace Nikse.SubtitleEdit.Forms this.splitContainerBottom.SplitterDistance = 658; this.splitContainerBottom.TabIndex = 39; // - // timerHideStatus - // - this.timerHideStatus.Interval = 2000; - this.timerHideStatus.Tick += new System.EventHandler(this.timerHideStatus_Tick); - // - // labelMaxErrorPercent - // - this.labelMaxErrorPercent.AutoSize = true; - this.labelMaxErrorPercent.Location = new System.Drawing.Point(7, 83); - this.labelMaxErrorPercent.Name = "labelMaxErrorPercent"; - this.labelMaxErrorPercent.Size = new System.Drawing.Size(55, 13); - this.labelMaxErrorPercent.TabIndex = 45; - this.labelMaxErrorPercent.Text = "Max Err%"; - // - // numericUpDownMaxErrorPct - // - this.numericUpDownMaxErrorPct.DecimalPlaces = 1; - this.numericUpDownMaxErrorPct.Increment = new decimal(new int[] { - 1, - 0, - 0, - 65536}); - this.numericUpDownMaxErrorPct.Location = new System.Drawing.Point(173, 81); - this.numericUpDownMaxErrorPct.Maximum = new decimal(new int[] { - 20, - 0, - 0, - 0}); - this.numericUpDownMaxErrorPct.Minimum = new decimal(new int[] { - 5, - 0, - 0, - 65536}); - this.numericUpDownMaxErrorPct.Name = "numericUpDownMaxErrorPct"; - this.numericUpDownMaxErrorPct.Size = new System.Drawing.Size(50, 21); - this.numericUpDownMaxErrorPct.TabIndex = 44; - this.numericUpDownMaxErrorPct.Value = new decimal(new int[] { - 10, - 0, - 0, - 65536}); - // // textBoxCurrentText // this.textBoxCurrentText.AllowDrop = true; @@ -1388,6 +1383,11 @@ namespace Nikse.SubtitleEdit.Forms this.subtitleListView1.SelectedIndexChanged += new System.EventHandler(this.SubtitleListView1SelectedIndexChanged); this.subtitleListView1.KeyDown += new System.Windows.Forms.KeyEventHandler(this.subtitleListView1_KeyDown); // + // timerHideStatus + // + this.timerHideStatus.Interval = 2000; + this.timerHideStatus.Tick += new System.EventHandler(this.timerHideStatus_Tick); + // // VobSubOcr // this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); @@ -1415,6 +1415,10 @@ namespace Nikse.SubtitleEdit.Forms this.Resize += new System.EventHandler(this.VobSubOcr_Resize); this.contextMenuStripListview.ResumeLayout(false); this.groupBoxOcrMethod.ResumeLayout(false); + this.groupBoxImageCompareMethod.ResumeLayout(false); + this.groupBoxImageCompareMethod.PerformLayout(); + ((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).EndInit(); + ((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).EndInit(); this.GroupBoxTesseractMethod.ResumeLayout(false); this.GroupBoxTesseractMethod.PerformLayout(); this.groupBoxModiMethod.ResumeLayout(false); @@ -1422,9 +1426,6 @@ namespace Nikse.SubtitleEdit.Forms this.groupBoxNOCR.ResumeLayout(false); this.groupBoxNOCR.PerformLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).EndInit(); - this.groupBoxImageCompareMethod.ResumeLayout(false); - this.groupBoxImageCompareMethod.PerformLayout(); - ((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).EndInit(); this.groupBoxOCRControls.ResumeLayout(false); this.groupBoxOCRControls.PerformLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).EndInit(); @@ -1450,7 +1451,6 @@ namespace Nikse.SubtitleEdit.Forms this.splitContainerBottom.Panel1.PerformLayout(); this.splitContainerBottom.Panel2.ResumeLayout(false); this.splitContainerBottom.ResumeLayout(false); - ((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).EndInit(); this.ResumeLayout(false); this.PerformLayout(); diff --git a/src/Forms/VobSubOcr.cs b/src/Forms/VobSubOcr.cs index bb4102d37..826d615ec 100644 --- a/src/Forms/VobSubOcr.cs +++ b/src/Forms/VobSubOcr.cs @@ -3077,6 +3077,55 @@ namespace Nikse.SubtitleEdit.Forms return nocrChars; } + public static List LoadNOcrForTesseract(string xmlRessourceName) + { + var nocrChars = new List(); + System.Reflection.Assembly asm = System.Reflection.Assembly.GetExecutingAssembly(); + Stream strm = asm.GetManifestResourceStream(xmlRessourceName); + if (strm != null) + { + XmlDocument doc = new XmlDocument(); + var rdr = new StreamReader(strm); + using (var zip = new System.IO.Compression.GZipStream(rdr.BaseStream, System.IO.Compression.CompressionMode.Decompress)) + { + byte[] data = new byte[175000]; + zip.Read(data, 0, 175000); + doc.LoadXml(System.Text.Encoding.UTF8.GetString(data)); + } + rdr.Close(); + + try + { + foreach (XmlNode node in doc.DocumentElement.SelectNodes("Char")) + { + var oc = new NOcrChar(node.Attributes["Text"].Value); + oc.Width = Convert.ToInt32(node.Attributes["Width"].Value, CultureInfo.InvariantCulture); + oc.Height = Convert.ToInt32(node.Attributes["Height"].Value, CultureInfo.InvariantCulture); + oc.MarginTop = Convert.ToInt32(node.Attributes["MarginTop"].Value, CultureInfo.InvariantCulture); + if (node.Attributes["Italic"] != null) + oc.Italic = Convert.ToBoolean(node.Attributes["Italic"].Value, CultureInfo.InvariantCulture); + if (node.Attributes["ExpandCount"] != null) + oc.ExpandCount = Convert.ToInt32(node.Attributes["ExpandCount"].Value, CultureInfo.InvariantCulture); + foreach (XmlNode pointNode in node.SelectNodes("Point")) + { + var op = new NOcrPoint(DecodePoint(pointNode.Attributes["Start"].Value), DecodePoint(pointNode.Attributes["End"].Value)); + XmlAttribute a = pointNode.Attributes["On"]; + if (a != null && Convert.ToBoolean(a.Value)) + oc.LinesForeground.Add(op); + else + oc.LinesBackground.Add(op); + } + nocrChars.Add(oc); + } + } + catch (Exception exception) + { + MessageBox.Show(exception.Message); + } + } + return nocrChars; + } + private static Point DecodePoint(string text) { var arr = text.Split(','); @@ -3995,6 +4044,125 @@ namespace Nikse.SubtitleEdit.Forms return "" + Utilities.RemoveHtmlTags(GetStringWithItalicTags(matches)) + ""; } + public string NocrFastCheck(Bitmap bitmap) + { + var nbmpInput = new NikseBitmap(bitmap); + nbmpInput.ReplaceNonWhiteWithTransparent(); + //bitmap = nbmp.GetBitmap(); + + var matches = new List(); + List list = NikseBitmapImageSplitter.SplitBitmapToLetters(nbmpInput, (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value, checkBoxRightToLeft.Checked, Configuration.Settings.VobSubOcr.TopToBottom); + + foreach (ImageSplitterItem item in list) + { + if (item.NikseBitmap != null) + { + item.NikseBitmap.ReplaceNonWhiteWithTransparent(); + item.Y += item.NikseBitmap.CropTopTransparent(0); + item.NikseBitmap.CropTransparentSidesAndBottom(0, true); + item.NikseBitmap.ReplaceTransparentWith(Color.Black); + } + } + int index = 0; + var expandSelectionList = new List(); + while (index < list.Count) + { + ImageSplitterItem item = list[index]; + if (item.NikseBitmap == null) + { + matches.Add(new CompareMatch(item.SpecialCharacter, false, 0, null)); + } + else + { + CompareMatch match = null; // = GetNOcrCompareMatch(item, bitmap, _nocrChars, _unItalicFactor, checkBoxNOcrItalic.Checked, !checkBoxNOcrCorrect.Checked); + + var nbmp = item.NikseBitmap; + int index2 = 0; + int topMargin = item.Y - item.ParentY; + foreach (NOcrChar oc in _nocrChars) + { + if (Math.Abs(oc.Width - nbmp.Width) < 3 && Math.Abs(oc.Height - nbmp.Height) < 4 && Math.Abs(oc.MarginTop - topMargin) < 4) + { // only very accurate matches + + bool ok = true; + index2 = 0; + while (index2 < oc.LinesForeground.Count && ok) + { + NOcrPoint op = oc.LinesForeground[index2]; + foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height)) + { + if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height) + { + Color c = nbmp.GetPixel(point.X, point.Y); + if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) + { + } + else + { + Point p = new Point(point.X - 1, point.Y); + if (p.X < 0) + p.X = 1; + c = nbmp.GetPixel(p.X, p.Y); + if (nbmp.Width > 20 && c.A > 150 && c.R + c.G + c.B > NocrMinColor) + { + } + else + { + ok = false; + break; + } + } + } + } + index2++; + } + index2 = 0; + while (index2 < oc.LinesBackground.Count && ok) + { + NOcrPoint op = oc.LinesBackground[index2]; + foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height)) + { + if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height) + { + Color c = nbmp.GetPixel(point.X, point.Y); + if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) + { + Point p = new Point(point.X, point.Y); + if (oc.Width > 19 && point.X > 0) + p.X = p.X - 1; + c = nbmp.GetPixel(p.X, p.Y); + if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) + { + ok = false; + break; + } + } + } + } + index2++; + } + if (ok) + match = new CompareMatch(oc.Text, oc.Italic, 0, null); + } + } + + + if (match == null) + { + matches.Add(new CompareMatch("*", false, 0, null)); + } + else // found image match + { + matches.Add(new CompareMatch(match.Text, match.Italic, 0, null)); + if (match.ExpandCount > 0) + index += match.ExpandCount - 1; + } + } + index++; + } + return GetStringWithItalicTags(matches); + } + static void NOcrThreadDoWork(object sender, DoWorkEventArgs e) { var p = (NOcrThreadParameter)e.Argument; @@ -4107,6 +4275,7 @@ namespace Nikse.SubtitleEdit.Forms if (comboBoxOcrMethod.SelectedIndex == 0 && _tesseractAsyncStrings == null) { + _nocrChars = null; _tesseractAsyncStrings = new string[max]; _tesseractAsyncIndex = (int)numericUpDownStartNumber.Value + 5; _tesseractThread = new BackgroundWorker(); @@ -4886,7 +5055,7 @@ namespace Nikse.SubtitleEdit.Forms line = line.Replace(" ", " "); } } - + if (wordsNotFound > 0 || correctWords == 0 || textWithOutFixes != null && textWithOutFixes.ToString().Replace("~", string.Empty).Trim().Length < 2) { @@ -4948,6 +5117,38 @@ namespace Nikse.SubtitleEdit.Forms return string.Empty; } + string tmp = Utilities.RemoveHtmlTags(line).Trim(); + if (!tmp.Trim().EndsWith("...")) + { + tmp = tmp.TrimEnd('.').TrimEnd(); + if (tmp.Length > 2 && Utilities.LowercaseLetters.Contains(tmp.Substring(tmp.Length - 1, 1))) + { + if (_nocrChars == null) + _nocrChars = LoadNOcrForTesseract("Nikse.SubtitleEdit.Resources.nOCR_TesseractHelper.xml.zip"); + string text = Utilities.RemoveHtmlTags(NocrFastCheck(bitmap).TrimEnd()); + string post = string.Empty; + if (line.EndsWith("")) + { + post = ""; + line = line.Remove(line.Length - 4, 4).Trim(); + } + if (text.EndsWith(".")) + { + line = line.TrimEnd('.').Trim(); + while (text.EndsWith(".") || text.EndsWith(" ")) + { + line += text.Substring(text.Length - 1).Trim(); + text = text.Remove(text.Length - 1, 1); + } + } + else if (text.EndsWith("l") && text.EndsWith("!") && !text.EndsWith("l!")) + { + line = line.Remove(line.Length - 1, 1) + "!"; + } + line += post; + } + } + // Log used word guesses (via word replace list) foreach (string guess in _ocrFixEngine.AutoGuessesUsed) listBoxLogSuggestions.Items.Add(guess); diff --git a/src/Logic/Subtitle.cs b/src/Logic/Subtitle.cs index 990d13599..4ffd064eb 100644 --- a/src/Logic/Subtitle.cs +++ b/src/Logic/Subtitle.cs @@ -120,7 +120,16 @@ namespace Nikse.SubtitleEdit.Logic StreamReader sr; if (useThisEncoding != null) { - sr = new StreamReader(fileName, useThisEncoding); + try + { + sr = new StreamReader(fileName, useThisEncoding); + } + catch (Exception exception) + { + System.Windows.Forms.MessageBox.Show(exception.Message); + encoding = Encoding.UTF8; + return null; + } } else { diff --git a/src/Logic/SubtitleFormats/AvidStl.cs b/src/Logic/SubtitleFormats/AvidStl.cs index 9e1066b52..6560c603c 100644 --- a/src/Logic/SubtitleFormats/AvidStl.cs +++ b/src/Logic/SubtitleFormats/AvidStl.cs @@ -122,21 +122,28 @@ namespace Nikse.SubtitleEdit.Logic.SubtitleFormats { if (!string.IsNullOrEmpty(fileName) && File.Exists(fileName)) { - var fi = new FileInfo(fileName); - if (fi.Length > 1150 && fi.Length < 1024000) // not too small or too big + try { - byte[] buffer = File.ReadAllBytes(fileName); - if (buffer[0] == 0x38 && - buffer[1] == 0x35 && - buffer[2] == 0x30 && - buffer[1024] == 0 && - buffer[1025] == 0 && - buffer[1026] == 0 && - buffer[1027] == 0xff) + var fi = new FileInfo(fileName); + if (fi.Length > 1150 && fi.Length < 1024000) // not too small or too big { - return true; + byte[] buffer = File.ReadAllBytes(fileName); + if (buffer[0] == 0x38 && + buffer[1] == 0x35 && + buffer[2] == 0x30 && + buffer[1024] == 0 && + buffer[1025] == 0 && + buffer[1026] == 0 && + buffer[1027] == 0xff) + { + return true; + } } } + catch + { + return false; + } } return false; } diff --git a/src/Logic/SubtitleFormats/Ebu.cs b/src/Logic/SubtitleFormats/Ebu.cs index 0be01d18d..3ad9fb0f8 100644 --- a/src/Logic/SubtitleFormats/Ebu.cs +++ b/src/Logic/SubtitleFormats/Ebu.cs @@ -543,15 +543,22 @@ namespace Nikse.SubtitleEdit.Logic.SubtitleFormats var fi = new FileInfo(fileName); if (fi.Length > 1024 + 128 && fi.Length < 1024000) // not too small or too big { - byte[] buffer = File.ReadAllBytes(fileName); - EbuGeneralSubtitleInformation header = ReadHeader(buffer); - if (header.DiskFormatCode.StartsWith("STL23") || - header.DiskFormatCode.StartsWith("STL24") || - header.DiskFormatCode.StartsWith("STL25") || - header.DiskFormatCode.StartsWith("STL29") || - header.DiskFormatCode.StartsWith("STL30")) + try { - return Utilities.IsInteger(header.CodePageNumber) || fileName.ToLower().EndsWith("stl"); + byte[] buffer = File.ReadAllBytes(fileName); + EbuGeneralSubtitleInformation header = ReadHeader(buffer); + if (header.DiskFormatCode.StartsWith("STL23") || + header.DiskFormatCode.StartsWith("STL24") || + header.DiskFormatCode.StartsWith("STL25") || + header.DiskFormatCode.StartsWith("STL29") || + header.DiskFormatCode.StartsWith("STL30")) + { + return Utilities.IsInteger(header.CodePageNumber) || fileName.ToLower().EndsWith("stl"); + } + } + catch + { + return false; } } } diff --git a/src/Logic/SubtitleFormats/Pac.cs b/src/Logic/SubtitleFormats/Pac.cs index b87d7c69c..56a4fa015 100644 --- a/src/Logic/SubtitleFormats/Pac.cs +++ b/src/Logic/SubtitleFormats/Pac.cs @@ -856,39 +856,46 @@ namespace Nikse.SubtitleEdit.Logic.SubtitleFormats public override bool IsMine(List lines, string fileName) { - if (!string.IsNullOrEmpty(fileName) && File.Exists(fileName)) + if (!string.IsNullOrEmpty(fileName) && File.Exists(fileName)) { - var fi = new FileInfo(fileName); - if (fi.Length > 100 && fi.Length < 1024000) // not too small or too big + try { - byte[] buffer = File.ReadAllBytes(fileName); + var fi = new FileInfo(fileName); + if (fi.Length > 100 && fi.Length < 1024000) // not too small or too big + { + byte[] buffer = File.ReadAllBytes(fileName); - if (buffer[00] == 1 && // These bytes seems to be PAC files... TODO: Verify! - buffer[01] == 0 && - buffer[02] == 0 && - buffer[03] == 0 && - buffer[04] == 0 && - buffer[05] == 0 && - buffer[06] == 0 && - buffer[07] == 0 && - buffer[08] == 0 && - buffer[09] == 0 && - buffer[10] == 0 && - buffer[11] == 0 && - buffer[12] == 0 && - buffer[13] == 0 && - buffer[14] == 0 && - buffer[15] == 0 && - buffer[16] == 0 && - buffer[17] == 0 && - buffer[18] == 0 && - buffer[19] == 0 && - buffer[20] == 0 && - //buffer[21] < 10 && // start from number - //buffer[22] == 0 && - buffer[23] == 0x60 && - fileName.ToLower().EndsWith(".pac")) - return true; + if (buffer[00] == 1 && // These bytes seems to be PAC files... TODO: Verify! + buffer[01] == 0 && + buffer[02] == 0 && + buffer[03] == 0 && + buffer[04] == 0 && + buffer[05] == 0 && + buffer[06] == 0 && + buffer[07] == 0 && + buffer[08] == 0 && + buffer[09] == 0 && + buffer[10] == 0 && + buffer[11] == 0 && + buffer[12] == 0 && + buffer[13] == 0 && + buffer[14] == 0 && + buffer[15] == 0 && + buffer[16] == 0 && + buffer[17] == 0 && + buffer[18] == 0 && + buffer[19] == 0 && + buffer[20] == 0 && + //buffer[21] < 10 && // start from number + //buffer[22] == 0 && + buffer[23] == 0x60 && + fileName.ToLower().EndsWith(".pac")) + return true; + } + } + catch + { + return false; } } return false; diff --git a/src/Logic/SubtitleFormats/Spt.cs b/src/Logic/SubtitleFormats/Spt.cs index 41fcc3222..69a952210 100644 --- a/src/Logic/SubtitleFormats/Spt.cs +++ b/src/Logic/SubtitleFormats/Spt.cs @@ -80,15 +80,22 @@ namespace Nikse.SubtitleEdit.Logic.SubtitleFormats { if (!string.IsNullOrEmpty(fileName) && File.Exists(fileName)) { - FileInfo fi = new FileInfo(fileName); - if (fi.Length > 100 && fi.Length < 1024000) // not too small or too big + try { - byte[] buffer = File.ReadAllBytes(fileName); + FileInfo fi = new FileInfo(fileName); + if (fi.Length > 100 && fi.Length < 1024000) // not too small or too big + { + byte[] buffer = File.ReadAllBytes(fileName); - if (buffer[00] > 10 && - buffer[01] == 0 && - fileName.ToLower().EndsWith(".spt")) - return true; + if (buffer[00] > 10 && + buffer[01] == 0 && + fileName.ToLower().EndsWith(".spt")) + return true; + } + } + catch + { + return false; } } return false; diff --git a/src/Logic/SubtitleFormats/TSB4.cs b/src/Logic/SubtitleFormats/TSB4.cs index 5d46e2944..374e283e3 100644 --- a/src/Logic/SubtitleFormats/TSB4.cs +++ b/src/Logic/SubtitleFormats/TSB4.cs @@ -41,7 +41,16 @@ namespace Nikse.SubtitleEdit.Logic.SubtitleFormats { return; } - byte[] array = File.ReadAllBytes(fileName); + byte[] array; + try + { + array = File.ReadAllBytes(fileName); + } + catch + { + _errorCount++; + return; + } if (array.Length < 100) { return; diff --git a/src/Resources/nOCR_TesseractHelper.xml.zip b/src/Resources/nOCR_TesseractHelper.xml.zip new file mode 100644 index 000000000..39b916f66 Binary files /dev/null and b/src/Resources/nOCR_TesseractHelper.xml.zip differ diff --git a/src/SubtitleEdit.csproj b/src/SubtitleEdit.csproj index 7f07a733e..89e3287e1 100644 --- a/src/SubtitleEdit.csproj +++ b/src/SubtitleEdit.csproj @@ -1438,6 +1438,7 @@ + Reference.map