Fix minor ocr issues

This commit is contained in:
Nikolaj Olsson 2019-01-08 20:42:20 +01:00
parent a9526f8b52
commit c46f5653d8
2 changed files with 23 additions and 22 deletions

View File

@ -43,7 +43,7 @@
this.comboBoxDictionaries.FormattingEnabled = true;
this.comboBoxDictionaries.Location = new System.Drawing.Point(15, 85);
this.comboBoxDictionaries.Name = "comboBoxDictionaries";
this.comboBoxDictionaries.Size = new System.Drawing.Size(191, 21);
this.comboBoxDictionaries.Size = new System.Drawing.Size(282, 21);
this.comboBoxDictionaries.TabIndex = 20;
//
// labelLanguage
@ -57,8 +57,9 @@
//
// buttonOK
//
this.buttonOK.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right)));
this.buttonOK.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.buttonOK.Location = new System.Drawing.Point(50, 127);
this.buttonOK.Location = new System.Drawing.Point(141, 127);
this.buttonOK.Name = "buttonOK";
this.buttonOK.Size = new System.Drawing.Size(75, 21);
this.buttonOK.TabIndex = 21;
@ -68,9 +69,10 @@
//
// buttonCancel
//
this.buttonCancel.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right)));
this.buttonCancel.DialogResult = System.Windows.Forms.DialogResult.Cancel;
this.buttonCancel.ImeMode = System.Windows.Forms.ImeMode.NoControl;
this.buttonCancel.Location = new System.Drawing.Point(131, 127);
this.buttonCancel.Location = new System.Drawing.Point(222, 127);
this.buttonCancel.Name = "buttonCancel";
this.buttonCancel.Size = new System.Drawing.Size(75, 21);
this.buttonCancel.TabIndex = 22;
@ -89,23 +91,23 @@
//
// textBoxOcrFixValue
//
this.textBoxOcrFixValue.Location = new System.Drawing.Point(113, 29);
this.textBoxOcrFixValue.Location = new System.Drawing.Point(141, 29);
this.textBoxOcrFixValue.Name = "textBoxOcrFixValue";
this.textBoxOcrFixValue.Size = new System.Drawing.Size(93, 20);
this.textBoxOcrFixValue.Size = new System.Drawing.Size(120, 20);
this.textBoxOcrFixValue.TabIndex = 47;
//
// textBoxOcrFixKey
//
this.textBoxOcrFixKey.Location = new System.Drawing.Point(15, 29);
this.textBoxOcrFixKey.Name = "textBoxOcrFixKey";
this.textBoxOcrFixKey.Size = new System.Drawing.Size(93, 20);
this.textBoxOcrFixKey.Size = new System.Drawing.Size(120, 20);
this.textBoxOcrFixKey.TabIndex = 46;
//
// AddToOcrReplaceList
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(231, 159);
this.ClientSize = new System.Drawing.Size(309, 159);
this.Controls.Add(this.textBoxOcrFixValue);
this.Controls.Add(this.textBoxOcrFixKey);
this.Controls.Add(this.comboBoxDictionaries);

View File

@ -1050,44 +1050,43 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
if (Configuration.Settings.Tools.SpellCheckOneLetterWords)
minLength = 1;
string[] words = tempLine.Replace("</i>", string.Empty).Split(SplitChars, StringSplitOptions.RemoveEmptyEntries);
string[] words = tempLine.Replace("<i>", string.Empty).Replace("</i>", string.Empty).Split(SplitChars, StringSplitOptions.RemoveEmptyEntries);
for (int i = 0; i < words.Length; i++)
{
string word = words[i].TrimStart('\'');
string wordNotEndTrimmed = word;
word = word.TrimEnd('\'');
string wordNoItalics = HtmlUtil.RemoveOpenCloseTags(word, HtmlUtil.TagItalic);
if (!IsWordKnownOrNumber(wordNoItalics, line) && !localIgnoreWords.Contains(wordNoItalics))
if (!IsWordKnownOrNumber(word, line) && !localIgnoreWords.Contains(word))
{
bool correct = wordNoItalics.Length > minLength && DoSpell(wordNoItalics);
bool correct = word.Length > minLength && DoSpell(word);
if (!correct)
correct = wordNoItalics.Length > minLength + 1 && DoSpell(wordNoItalics.Trim('\''));
if (!correct && wordNoItalics.Length > 3 && !wordNoItalics.EndsWith("ss", StringComparison.Ordinal) && !string.IsNullOrEmpty(_threeLetterIsoLanguageName) &&
correct = word.Length > minLength + 1 && DoSpell(word.Trim('\''));
if (!correct && word.Length > 3 && !word.EndsWith("ss", StringComparison.Ordinal) && !string.IsNullOrEmpty(_threeLetterIsoLanguageName) &&
(_threeLetterIsoLanguageName == "eng" || _threeLetterIsoLanguageName == "dan" || _threeLetterIsoLanguageName == "swe" || _threeLetterIsoLanguageName == "nld"))
correct = DoSpell(wordNoItalics.TrimEnd('s'));
correct = DoSpell(word.TrimEnd('s'));
if (!correct)
correct = wordNoItalics.Length > minLength && DoSpell(wordNoItalics);
if (!correct && _userWordList.Contains(wordNoItalics))
correct = word.Length > minLength && DoSpell(word);
if (!correct && _userWordList.Contains(word))
correct = true;
if (!correct && !line.Contains(word))
correct = true; // already fixed
if (!correct && Configuration.Settings.Tools.SpellCheckEnglishAllowInQuoteAsIng && wordNotEndTrimmed.EndsWith('\'') &&
SpellCheckDictionaryName.StartsWith("en_", StringComparison.Ordinal) && wordNoItalics.EndsWith("in", StringComparison.OrdinalIgnoreCase))
SpellCheckDictionaryName.StartsWith("en_", StringComparison.Ordinal) && word.EndsWith("in", StringComparison.OrdinalIgnoreCase))
{
correct = DoSpell(wordNoItalics + "g");
correct = DoSpell(word + "g");
}
if (_threeLetterIsoLanguageName == "eng" && (wordNoItalics.Equals("a", StringComparison.OrdinalIgnoreCase) || wordNoItalics == "I"))
if (_threeLetterIsoLanguageName == "eng" && (word.Equals("a", StringComparison.OrdinalIgnoreCase) || word == "I"))
correct = true;
else if (_threeLetterIsoLanguageName == "dan" && wordNoItalics.Equals("i", StringComparison.OrdinalIgnoreCase))
else if (_threeLetterIsoLanguageName == "dan" && word.Equals("i", StringComparison.OrdinalIgnoreCase))
correct = true;
if (!correct)
{
//look for match via dash'ed word, e.g. sci-fi
string dashedWord = GetDashedWordBefore(wordNoItalics, line, words, i);
string dashedWord = GetDashedWordBefore(word, line, words, i);
if (!string.IsNullOrEmpty(dashedWord))
{
correct = IsWordKnownOrNumber(dashedWord, line);
@ -1096,7 +1095,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
}
if (!correct)
{
dashedWord = GetDashedWordAfter(wordNoItalics, line, words, i);
dashedWord = GetDashedWordAfter(word, line, words, i);
if (!string.IsNullOrEmpty(dashedWord))
{
correct = IsWordKnownOrNumber(dashedWord, line);