From c46f5653d88717fe0adae69e590e19d9ee0d9a4a Mon Sep 17 00:00:00 2001 From: Nikolaj Olsson Date: Tue, 8 Jan 2019 20:42:20 +0100 Subject: [PATCH] Fix minor ocr issues --- src/Forms/Ocr/AddToOcrReplaceList.Designer.cs | 16 +++++----- src/Logic/OCR/OcrFixEngine.cs | 29 +++++++++---------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/Forms/Ocr/AddToOcrReplaceList.Designer.cs b/src/Forms/Ocr/AddToOcrReplaceList.Designer.cs index 9a30bb7ec..545cf332f 100644 --- a/src/Forms/Ocr/AddToOcrReplaceList.Designer.cs +++ b/src/Forms/Ocr/AddToOcrReplaceList.Designer.cs @@ -43,7 +43,7 @@ this.comboBoxDictionaries.FormattingEnabled = true; this.comboBoxDictionaries.Location = new System.Drawing.Point(15, 85); this.comboBoxDictionaries.Name = "comboBoxDictionaries"; - this.comboBoxDictionaries.Size = new System.Drawing.Size(191, 21); + this.comboBoxDictionaries.Size = new System.Drawing.Size(282, 21); this.comboBoxDictionaries.TabIndex = 20; // // labelLanguage @@ -57,8 +57,9 @@ // // buttonOK // + this.buttonOK.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); this.buttonOK.ImeMode = System.Windows.Forms.ImeMode.NoControl; - this.buttonOK.Location = new System.Drawing.Point(50, 127); + this.buttonOK.Location = new System.Drawing.Point(141, 127); this.buttonOK.Name = "buttonOK"; this.buttonOK.Size = new System.Drawing.Size(75, 21); this.buttonOK.TabIndex = 21; @@ -68,9 +69,10 @@ // // buttonCancel // + this.buttonCancel.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); this.buttonCancel.DialogResult = System.Windows.Forms.DialogResult.Cancel; this.buttonCancel.ImeMode = System.Windows.Forms.ImeMode.NoControl; - this.buttonCancel.Location = new System.Drawing.Point(131, 127); + this.buttonCancel.Location = new System.Drawing.Point(222, 127); this.buttonCancel.Name = "buttonCancel"; this.buttonCancel.Size = new System.Drawing.Size(75, 21); this.buttonCancel.TabIndex = 22; @@ -89,23 +91,23 @@ // // textBoxOcrFixValue // - this.textBoxOcrFixValue.Location = new System.Drawing.Point(113, 29); + this.textBoxOcrFixValue.Location = new System.Drawing.Point(141, 29); this.textBoxOcrFixValue.Name = "textBoxOcrFixValue"; - this.textBoxOcrFixValue.Size = new System.Drawing.Size(93, 20); + this.textBoxOcrFixValue.Size = new System.Drawing.Size(120, 20); this.textBoxOcrFixValue.TabIndex = 47; // // textBoxOcrFixKey // this.textBoxOcrFixKey.Location = new System.Drawing.Point(15, 29); this.textBoxOcrFixKey.Name = "textBoxOcrFixKey"; - this.textBoxOcrFixKey.Size = new System.Drawing.Size(93, 20); + this.textBoxOcrFixKey.Size = new System.Drawing.Size(120, 20); this.textBoxOcrFixKey.TabIndex = 46; // // AddToOcrReplaceList // this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; - this.ClientSize = new System.Drawing.Size(231, 159); + this.ClientSize = new System.Drawing.Size(309, 159); this.Controls.Add(this.textBoxOcrFixValue); this.Controls.Add(this.textBoxOcrFixKey); this.Controls.Add(this.comboBoxDictionaries); diff --git a/src/Logic/OCR/OcrFixEngine.cs b/src/Logic/OCR/OcrFixEngine.cs index a7c65913e..03637ac14 100644 --- a/src/Logic/OCR/OcrFixEngine.cs +++ b/src/Logic/OCR/OcrFixEngine.cs @@ -1050,44 +1050,43 @@ namespace Nikse.SubtitleEdit.Logic.Ocr if (Configuration.Settings.Tools.SpellCheckOneLetterWords) minLength = 1; - string[] words = tempLine.Replace("", string.Empty).Split(SplitChars, StringSplitOptions.RemoveEmptyEntries); + string[] words = tempLine.Replace("", string.Empty).Replace("", string.Empty).Split(SplitChars, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < words.Length; i++) { string word = words[i].TrimStart('\''); string wordNotEndTrimmed = word; word = word.TrimEnd('\''); - string wordNoItalics = HtmlUtil.RemoveOpenCloseTags(word, HtmlUtil.TagItalic); - if (!IsWordKnownOrNumber(wordNoItalics, line) && !localIgnoreWords.Contains(wordNoItalics)) + if (!IsWordKnownOrNumber(word, line) && !localIgnoreWords.Contains(word)) { - bool correct = wordNoItalics.Length > minLength && DoSpell(wordNoItalics); + bool correct = word.Length > minLength && DoSpell(word); if (!correct) - correct = wordNoItalics.Length > minLength + 1 && DoSpell(wordNoItalics.Trim('\'')); - if (!correct && wordNoItalics.Length > 3 && !wordNoItalics.EndsWith("ss", StringComparison.Ordinal) && !string.IsNullOrEmpty(_threeLetterIsoLanguageName) && + correct = word.Length > minLength + 1 && DoSpell(word.Trim('\'')); + if (!correct && word.Length > 3 && !word.EndsWith("ss", StringComparison.Ordinal) && !string.IsNullOrEmpty(_threeLetterIsoLanguageName) && (_threeLetterIsoLanguageName == "eng" || _threeLetterIsoLanguageName == "dan" || _threeLetterIsoLanguageName == "swe" || _threeLetterIsoLanguageName == "nld")) - correct = DoSpell(wordNoItalics.TrimEnd('s')); + correct = DoSpell(word.TrimEnd('s')); if (!correct) - correct = wordNoItalics.Length > minLength && DoSpell(wordNoItalics); - if (!correct && _userWordList.Contains(wordNoItalics)) + correct = word.Length > minLength && DoSpell(word); + if (!correct && _userWordList.Contains(word)) correct = true; if (!correct && !line.Contains(word)) correct = true; // already fixed if (!correct && Configuration.Settings.Tools.SpellCheckEnglishAllowInQuoteAsIng && wordNotEndTrimmed.EndsWith('\'') && - SpellCheckDictionaryName.StartsWith("en_", StringComparison.Ordinal) && wordNoItalics.EndsWith("in", StringComparison.OrdinalIgnoreCase)) + SpellCheckDictionaryName.StartsWith("en_", StringComparison.Ordinal) && word.EndsWith("in", StringComparison.OrdinalIgnoreCase)) { - correct = DoSpell(wordNoItalics + "g"); + correct = DoSpell(word + "g"); } - if (_threeLetterIsoLanguageName == "eng" && (wordNoItalics.Equals("a", StringComparison.OrdinalIgnoreCase) || wordNoItalics == "I")) + if (_threeLetterIsoLanguageName == "eng" && (word.Equals("a", StringComparison.OrdinalIgnoreCase) || word == "I")) correct = true; - else if (_threeLetterIsoLanguageName == "dan" && wordNoItalics.Equals("i", StringComparison.OrdinalIgnoreCase)) + else if (_threeLetterIsoLanguageName == "dan" && word.Equals("i", StringComparison.OrdinalIgnoreCase)) correct = true; if (!correct) { //look for match via dash'ed word, e.g. sci-fi - string dashedWord = GetDashedWordBefore(wordNoItalics, line, words, i); + string dashedWord = GetDashedWordBefore(word, line, words, i); if (!string.IsNullOrEmpty(dashedWord)) { correct = IsWordKnownOrNumber(dashedWord, line); @@ -1096,7 +1095,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr } if (!correct) { - dashedWord = GetDashedWordAfter(wordNoItalics, line, words, i); + dashedWord = GetDashedWordAfter(word, line, words, i); if (!string.IsNullOrEmpty(dashedWord)) { correct = IsWordKnownOrNumber(dashedWord, line);