Work on OCR

This commit is contained in:
Nikolaj Olsson 2017-12-18 19:18:25 +01:00
parent 9fb6cb09b6
commit 1997aaa7f3
6 changed files with 57 additions and 9 deletions

View File

@ -22,6 +22,7 @@
<Word from="ahi" to="ahí" />
<Word from="angel" to="ángel" />
<Word from="angeles" to="ángeles" />
<Word from="ansian" to="ansían" />
<Word from="apagala" to="apágala" />
<Word from="aqui" to="aquí" />
<Word from="asi" to="así" />
@ -80,6 +81,8 @@
<Word from="empezo" to="empezó" />
<Word from="empujon" to="empujón" />
<Word from="empujalo" to="empújalo" />
<Word from="energia" to="energía" />
<Word from="enfrian" to="enfrían" />
<Word from="escondanme" to="escóndanme" />
<Word from="esperame" to="espérame" />
<Word from="estara" to="estará" />
@ -942,5 +945,8 @@
<RegEx find="\Bo(log[ao]s?\b)" replaceWith="ó$1" />
<RegEx find="\Ba(log[ao]s?\b)" replaceWith="á$1" />
<RegEx find="\Bi(log[ao]s?\b)" replaceWith="í$1" />
<RegEx find="\bIes\b" replaceWith="les" />
<RegEx find="\bIos\b" replaceWith="los" />
</RegularExpressions>
</OCRFixReplaceList>

Binary file not shown.

View File

@ -54,6 +54,7 @@
this.richTextBoxParagraph = new System.Windows.Forms.RichTextBox();
this.buttonEditWholeText = new System.Windows.Forms.Button();
this.buttonEditWord = new System.Windows.Forms.Button();
this.buttonEditImageDb = new System.Windows.Forms.Button();
this.groupBoxSuggestions.SuspendLayout();
this.GroupBoxEditWord.SuspendLayout();
this.groupBoxEditWholeText.SuspendLayout();
@ -307,6 +308,7 @@
//
this.groupBoxText.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.groupBoxText.Controls.Add(this.buttonEditImageDb);
this.groupBoxText.Controls.Add(this.richTextBoxParagraph);
this.groupBoxText.Controls.Add(this.buttonEditWholeText);
this.groupBoxText.Controls.Add(this.buttonEditWord);
@ -348,6 +350,16 @@
this.buttonEditWord.UseVisualStyleBackColor = true;
this.buttonEditWord.Click += new System.EventHandler(this.ButtonEditWordClick);
//
// buttonEditImageDb
//
this.buttonEditImageDb.Location = new System.Drawing.Point(514, 17);
this.buttonEditImageDb.Name = "buttonEditImageDb";
this.buttonEditImageDb.Size = new System.Drawing.Size(172, 47);
this.buttonEditImageDb.TabIndex = 403;
this.buttonEditImageDb.Text = "Edit image db";
this.buttonEditImageDb.UseVisualStyleBackColor = true;
this.buttonEditImageDb.Click += new System.EventHandler(this.buttonEditImageDb_Click);
//
// OcrSpellCheck
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
@ -409,5 +421,6 @@
private System.Windows.Forms.Button buttonSkipText;
private System.Windows.Forms.Button buttonChangeAllWholeText;
private System.Windows.Forms.Button buttonGoogleIt;
private System.Windows.Forms.Button buttonEditImageDb;
}
}

View File

@ -1,5 +1,6 @@
using Nikse.SubtitleEdit.Core;
using Nikse.SubtitleEdit.Logic;
using Nikse.SubtitleEdit.Logic.Ocr.Binary;
using System;
using System.Collections.Generic;
using System.Drawing;
@ -23,8 +24,20 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
SkipWholeText,
SkipOnce,
UseSuggestion,
InspectCompareMatches,
}
public bool IsBinaryImageCompare
{
get
{
return buttonEditImageDb.Visible;
}
set
{
buttonEditImageDb.Visible = value;
}
}
public Action ActionResult { get; private set; }
public string Word { get; private set; }
public string Paragraph { get; private set; }
@ -37,7 +50,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
UiUtil.PreInitialize(this);
InitializeComponent();
UiUtil.FixFonts(this);
Text = Configuration.Settings.Language.SpellCheck.Title;
buttonAddToDictionary.Text = Configuration.Settings.Language.SpellCheck.AddToUserDictionary;
buttonChange.Text = Configuration.Settings.Language.SpellCheck.Change;
@ -280,5 +292,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
}
}
private void buttonEditImageDb_Click(object sender, EventArgs e)
{
ActionResult = Action.InspectCompareMatches;
DialogResult = DialogResult.OK;
}
}
}

View File

@ -4527,6 +4527,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
ButtonStopClick(null, null);
_ocrFixEngine.Abort = false;
if (_ocrFixEngine.LastAction == OcrSpellCheck.Action.InspectCompareMatches)
InspectImageCompareMatchesForCurrentImageToolStripMenuItem_Click(null, null);
return string.Empty;
}
@ -7448,7 +7452,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
if (_ocrFixEngine != null)
_ocrFixEngine.Dispose();
_ocrFixEngine = new OcrFixEngine(threeLetterISOLanguageName, hunspellName, this);
_ocrFixEngine = new OcrFixEngine(threeLetterISOLanguageName, hunspellName, this, _ocrMethodIndex == _ocrMethodBinaryImageCompare );
if (_ocrFixEngine.IsDictionaryLoaded)
{
string loadedDictionaryName = _ocrFixEngine.SpellCheckDictionaryName;
@ -7535,6 +7539,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
ShowOcrMethodGroupBox(groupBoxModiMethod);
Configuration.Settings.VobSubOcr.LastOcrMethod = "MODI";
}
_ocrFixEngine = null;
SubtitleListView1SelectedIndexChanged(null, null);
}
@ -7846,7 +7851,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
if (_ocrFixEngine != null)
_ocrFixEngine.Dispose();
_ocrFixEngine = new OcrFixEngine(string.Empty, string.Empty, this);
_ocrFixEngine = new OcrFixEngine(string.Empty, string.Empty, this, _ocrMethodIndex == _ocrMethodBinaryImageCompare);
return;
}
try

View File

@ -50,6 +50,8 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
private static readonly char[] SplitChars = { ' ', '¡', '¿', ',', '.', '!', '?', ':', ';', '(', ')', '[', ']', '{', '}', '+', '-', '£', '"', '„', '”', '“', '«', '»', '#', '&', '%', '…', '—', '♪', '\r', '\n' };
public bool Abort { get; set; }
public OcrSpellCheck.Action LastAction { get; set; } = OcrSpellCheck.Action.Abort;
public bool IsBinaryImageCompare { get; set; }
public List<string> AutoGuessesUsed { get; set; }
public List<string> UnknownWordsFound { get; set; }
public bool IsDictionaryLoaded { get; private set; }
@ -62,7 +64,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
/// <param name="threeLetterIsoLanguageName">E.g. eng for English</param>
/// <param name="hunspellName">Name of hunspell dictionary</param>
/// <param name="parentForm">Used for centering/show spell check dialog</param>
public OcrFixEngine(string threeLetterIsoLanguageName, string hunspellName, Form parentForm)
public OcrFixEngine(string threeLetterIsoLanguageName, string hunspellName, Form parentForm, bool isBinaryImageCompare = false)
{
if (threeLetterIsoLanguageName == "per")
threeLetterIsoLanguageName = "fas";
@ -70,7 +72,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
_threeLetterIsoLanguageName = threeLetterIsoLanguageName;
_parentForm = parentForm;
_spellCheck = new OcrSpellCheck { StartPosition = FormStartPosition.Manual };
_spellCheck = new OcrSpellCheck() { StartPosition = FormStartPosition.Manual, IsBinaryImageCompare = isBinaryImageCompare };
_spellCheck.Location = new Point(parentForm.Left + (parentForm.Width / 2 - _spellCheck.Width / 2),
parentForm.Top + (parentForm.Height / 2 - _spellCheck.Height / 2));
@ -1141,7 +1143,10 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
word = word.Remove(word.Length - 4, 4);
SpellCheckOcrTextResult res = SpellCheckOcrText(line, bitmap, word, suggestions);
if (Abort)
{
return null;
}
if (res.FixedWholeLine)
{
return res.Line;
@ -1201,12 +1206,11 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
var result = new SpellCheckOcrTextResult { Fixed = false, FixedWholeLine = false, Line = null, Word = null };
_spellCheck.Initialize(word, suggestions, line, bitmap);
_spellCheck.ShowDialog(_parentForm);
LastAction = _spellCheck.ActionResult;
switch (_spellCheck.ActionResult)
{
case OcrSpellCheck.Action.Abort:
Abort = true;
result.FixedWholeLine = true;
result.Line = line;
break;
case OcrSpellCheck.Action.AddToUserDictionary:
if (_userWordListXmlFileName != null)
@ -1308,6 +1312,9 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
result.Word = _spellCheck.Word;
result.Fixed = true;
break;
case OcrSpellCheck.Action.InspectCompareMatches:
Abort = true;
break;
}
if (result.Fixed)
{