Can now change ocr spell check dictionary (thx Hawk)

git-svn-id: https://subtitleedit.googlecode.com/svn/trunk@339 99eadd0c-20b8-1223-b5c4-2a2b2df33de2
This commit is contained in:
niksedk 2011-02-16 21:27:10 +00:00
parent d1ff81b077
commit 41ba1a5df7
3 changed files with 171 additions and 63 deletions

View File

@ -93,8 +93,9 @@ namespace Nikse.SubtitleEdit.Forms
this.groupBoxSubtitleImage = new System.Windows.Forms.GroupBox();
this.checkBoxShowOnlyForced = new System.Windows.Forms.CheckBox();
this.checkBoxUseTimeCodesFromIdx = new System.Windows.Forms.CheckBox();
this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView();
this.folderBrowserDialog1 = new System.Windows.Forms.FolderBrowserDialog();
this.comboBoxDictionaries = new System.Windows.Forms.ComboBox();
this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView();
((System.ComponentModel.ISupportInitialize)(this.pictureBoxSubtitleImage)).BeginInit();
this.contextMenuStripListview.SuspendLayout();
this.groupBoxOcrMethod.SuspendLayout();
@ -135,7 +136,7 @@ namespace Nikse.SubtitleEdit.Forms
this.saveImageAsToolStripMenuItem,
this.saveAllImagesToolStripMenuItem});
this.contextMenuStripListview.Name = "contextMenuStripListview";
this.contextMenuStripListview.Size = new System.Drawing.Size(244, 120);
this.contextMenuStripListview.Size = new System.Drawing.Size(244, 98);
this.contextMenuStripListview.Opening += new System.ComponentModel.CancelEventHandler(this.ContextMenuStripListviewOpening);
//
// normalToolStripMenuItem
@ -270,7 +271,7 @@ namespace Nikse.SubtitleEdit.Forms
this.checkBoxUseModiInTesseractForUnknownWords.Enabled = false;
this.checkBoxUseModiInTesseractForUnknownWords.Location = new System.Drawing.Point(22, 74);
this.checkBoxUseModiInTesseractForUnknownWords.Name = "checkBoxUseModiInTesseractForUnknownWords";
this.checkBoxUseModiInTesseractForUnknownWords.Size = new System.Drawing.Size(165, 17);
this.checkBoxUseModiInTesseractForUnknownWords.Size = new System.Drawing.Size(167, 17);
this.checkBoxUseModiInTesseractForUnknownWords.TabIndex = 39;
this.checkBoxUseModiInTesseractForUnknownWords.Text = "Try MODI for unknown words";
this.checkBoxUseModiInTesseractForUnknownWords.UseVisualStyleBackColor = true;
@ -345,7 +346,7 @@ namespace Nikse.SubtitleEdit.Forms
this.checkBoxRightToLeft.AutoSize = true;
this.checkBoxRightToLeft.Location = new System.Drawing.Point(128, 112);
this.checkBoxRightToLeft.Name = "checkBoxRightToLeft";
this.checkBoxRightToLeft.Size = new System.Drawing.Size(80, 17);
this.checkBoxRightToLeft.Size = new System.Drawing.Size(83, 17);
this.checkBoxRightToLeft.TabIndex = 40;
this.checkBoxRightToLeft.Text = "Right to left";
this.checkBoxRightToLeft.UseVisualStyleBackColor = true;
@ -504,6 +505,7 @@ namespace Nikse.SubtitleEdit.Forms
this.groupBoxOcrAutoFix.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.groupBoxOcrAutoFix.Controls.Add(this.comboBoxDictionaries);
this.groupBoxOcrAutoFix.Controls.Add(this.checkBoxGuessUnknownWords);
this.groupBoxOcrAutoFix.Controls.Add(this.tabControlLogs);
this.groupBoxOcrAutoFix.Controls.Add(this.labelFixesMade);
@ -525,7 +527,7 @@ namespace Nikse.SubtitleEdit.Forms
this.checkBoxGuessUnknownWords.CheckState = System.Windows.Forms.CheckState.Checked;
this.checkBoxGuessUnknownWords.Location = new System.Drawing.Point(11, 83);
this.checkBoxGuessUnknownWords.Name = "checkBoxGuessUnknownWords";
this.checkBoxGuessUnknownWords.Size = new System.Drawing.Size(162, 17);
this.checkBoxGuessUnknownWords.Size = new System.Drawing.Size(164, 17);
this.checkBoxGuessUnknownWords.TabIndex = 39;
this.checkBoxGuessUnknownWords.Text = "Try to guess unknown words";
this.checkBoxGuessUnknownWords.UseVisualStyleBackColor = true;
@ -631,7 +633,7 @@ namespace Nikse.SubtitleEdit.Forms
this.checkBoxPromptForUnknownWords.CheckState = System.Windows.Forms.CheckState.Checked;
this.checkBoxPromptForUnknownWords.Location = new System.Drawing.Point(11, 61);
this.checkBoxPromptForUnknownWords.Name = "checkBoxPromptForUnknownWords";
this.checkBoxPromptForUnknownWords.Size = new System.Drawing.Size(246, 17);
this.checkBoxPromptForUnknownWords.Size = new System.Drawing.Size(255, 17);
this.checkBoxPromptForUnknownWords.TabIndex = 38;
this.checkBoxPromptForUnknownWords.Text = "Prompt for unknown words (requires dictionary)";
this.checkBoxPromptForUnknownWords.UseVisualStyleBackColor = true;
@ -643,7 +645,7 @@ namespace Nikse.SubtitleEdit.Forms
this.checkBoxAutoBreakLines.CheckState = System.Windows.Forms.CheckState.Checked;
this.checkBoxAutoBreakLines.Location = new System.Drawing.Point(11, 105);
this.checkBoxAutoBreakLines.Name = "checkBoxAutoBreakLines";
this.checkBoxAutoBreakLines.Size = new System.Drawing.Size(200, 17);
this.checkBoxAutoBreakLines.Size = new System.Drawing.Size(208, 17);
this.checkBoxAutoBreakLines.TabIndex = 37;
this.checkBoxAutoBreakLines.Text = "Auto break subtitle, if line number > 2";
this.checkBoxAutoBreakLines.UseVisualStyleBackColor = true;
@ -664,7 +666,7 @@ namespace Nikse.SubtitleEdit.Forms
this.checkBoxAutoFixCommonErrors.CheckState = System.Windows.Forms.CheckState.Checked;
this.checkBoxAutoFixCommonErrors.Location = new System.Drawing.Point(11, 39);
this.checkBoxAutoFixCommonErrors.Name = "checkBoxAutoFixCommonErrors";
this.checkBoxAutoFixCommonErrors.Size = new System.Drawing.Size(137, 17);
this.checkBoxAutoFixCommonErrors.Size = new System.Drawing.Size(139, 17);
this.checkBoxAutoFixCommonErrors.TabIndex = 34;
this.checkBoxAutoFixCommonErrors.Text = "Fix common OCR errors";
this.checkBoxAutoFixCommonErrors.UseVisualStyleBackColor = true;
@ -690,7 +692,7 @@ namespace Nikse.SubtitleEdit.Forms
this.checkBoxEmphasis2Transparent.AutoSize = true;
this.checkBoxEmphasis2Transparent.Location = new System.Drawing.Point(437, 19);
this.checkBoxEmphasis2Transparent.Name = "checkBoxEmphasis2Transparent";
this.checkBoxEmphasis2Transparent.Size = new System.Drawing.Size(83, 17);
this.checkBoxEmphasis2Transparent.Size = new System.Drawing.Size(85, 17);
this.checkBoxEmphasis2Transparent.TabIndex = 6;
this.checkBoxEmphasis2Transparent.Text = "Transparent";
this.checkBoxEmphasis2Transparent.UseVisualStyleBackColor = true;
@ -701,7 +703,7 @@ namespace Nikse.SubtitleEdit.Forms
this.checkBoxEmphasis1Transparent.AutoSize = true;
this.checkBoxEmphasis1Transparent.Location = new System.Drawing.Point(304, 19);
this.checkBoxEmphasis1Transparent.Name = "checkBoxEmphasis1Transparent";
this.checkBoxEmphasis1Transparent.Size = new System.Drawing.Size(83, 17);
this.checkBoxEmphasis1Transparent.Size = new System.Drawing.Size(85, 17);
this.checkBoxEmphasis1Transparent.TabIndex = 5;
this.checkBoxEmphasis1Transparent.Text = "Transparent";
this.checkBoxEmphasis1Transparent.UseVisualStyleBackColor = true;
@ -712,7 +714,7 @@ namespace Nikse.SubtitleEdit.Forms
this.checkBoxPatternTransparent.AutoSize = true;
this.checkBoxPatternTransparent.Location = new System.Drawing.Point(167, 19);
this.checkBoxPatternTransparent.Name = "checkBoxPatternTransparent";
this.checkBoxPatternTransparent.Size = new System.Drawing.Size(83, 17);
this.checkBoxPatternTransparent.Size = new System.Drawing.Size(85, 17);
this.checkBoxPatternTransparent.TabIndex = 4;
this.checkBoxPatternTransparent.Text = "Transparent";
this.checkBoxPatternTransparent.UseVisualStyleBackColor = true;
@ -797,6 +799,19 @@ namespace Nikse.SubtitleEdit.Forms
this.checkBoxUseTimeCodesFromIdx.UseVisualStyleBackColor = true;
this.checkBoxUseTimeCodesFromIdx.CheckedChanged += new System.EventHandler(this.checkBoxUseTimeCodesFromIdx_CheckedChanged);
//
// comboBoxDictionaries
//
this.comboBoxDictionaries.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.comboBoxDictionaries.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxDictionaries.FormattingEnabled = true;
this.comboBoxDictionaries.Location = new System.Drawing.Point(127, 15);
this.comboBoxDictionaries.Name = "comboBoxDictionaries";
this.comboBoxDictionaries.Size = new System.Drawing.Size(171, 21);
this.comboBoxDictionaries.TabIndex = 41;
this.comboBoxDictionaries.Visible = false;
this.comboBoxDictionaries.SelectedIndexChanged += new System.EventHandler(this.comboBoxDictionaries_SelectedIndexChanged);
//
// subtitleListView1
//
this.subtitleListView1.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
@ -939,5 +954,6 @@ namespace Nikse.SubtitleEdit.Forms
private System.Windows.Forms.CheckBox checkBoxUseTimeCodesFromIdx;
private System.Windows.Forms.ToolStripMenuItem saveAllImagesToolStripMenuItem;
private System.Windows.Forms.FolderBrowserDialog folderBrowserDialog1;
private System.Windows.Forms.ComboBox comboBoxDictionaries;
}
}

View File

@ -145,6 +145,7 @@ namespace Nikse.SubtitleEdit.Forms
labelFixesMade.Text = string.Empty;
labelFixesMade.Left = checkBoxAutoFixCommonErrors.Left + checkBoxAutoFixCommonErrors.Width;
labelDictionaryLoaded.Text = string.Empty;
comboBoxDictionaries.Visible = false;
groupBoxImageCompareMethod.Text = language.OcrViaImageCompare;
groupBoxModiMethod.Text = language.OcrViaModi;
checkBoxAutoFixCommonErrors.Text = language.FixOcrErrors;
@ -152,6 +153,18 @@ namespace Nikse.SubtitleEdit.Forms
checkBoxRightToLeft.Left = numericUpDownPixelsIsSpace.Left;
groupBoxOCRControls.Text = language.StartOcr + " / " + language.Stop;
comboBoxDictionaries.SelectedIndexChanged -= comboBoxDictionaries_SelectedIndexChanged;
comboBoxDictionaries.Items.Clear();
comboBoxDictionaries.Items.Add(Configuration.Settings.Language.General.None);
foreach (string name in Utilities.GetDictionaryLanguages())
{
comboBoxDictionaries.Items.Add(name);
//if (name.Contains("[" + languageName + "]"))
// comboBoxDictionaries.SelectedIndex = comboBoxDictionaries.Items.Count - 1;
}
comboBoxDictionaries.SelectedIndexChanged += comboBoxDictionaries_SelectedIndexChanged;
comboBoxOcrMethod.Items.Clear();
comboBoxOcrMethod.Items.Add(language.OcrViaTesseract);
comboBoxOcrMethod.Items.Add(language.OcrViaImageCompare);
@ -1244,9 +1257,28 @@ namespace Nikse.SubtitleEdit.Forms
_languageId = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Id;
_ocrFixEngine = new OcrFixEngine(_languageId, this);
if (_ocrFixEngine.IsDictionaryLoaded)
labelDictionaryLoaded.Text = string.Format(Configuration.Settings.Language.VobSubOcr.DictionaryX, _ocrFixEngine.DictionaryCulture.NativeName);
{
labelDictionaryLoaded.Text = string.Format(Configuration.Settings.Language.VobSubOcr.DictionaryX, string.Empty); // _ocrFixEngine.DictionaryCulture.NativeName);
string loadedDictionaryName = _ocrFixEngine.SpellCheckDictionaryName;
int i = 0;
comboBoxDictionaries.SelectedIndexChanged -= comboBoxDictionaries_SelectedIndexChanged;
foreach (string item in comboBoxDictionaries.Items)
{
if (item.Contains("[" + loadedDictionaryName + "]"))
comboBoxDictionaries.SelectedIndex = i;
i++;
}
comboBoxDictionaries.SelectedIndexChanged += comboBoxDictionaries_SelectedIndexChanged;
comboBoxDictionaries.Left = labelDictionaryLoaded.Left + labelDictionaryLoaded.Width;
comboBoxDictionaries.Width = groupBoxOcrAutoFix.Width - (comboBoxDictionaries.Left + 5);
comboBoxDictionaries.Visible = true;
}
else
{
labelDictionaryLoaded.Text = string.Format(Configuration.Settings.Language.VobSubOcr.DictionaryX, Configuration.Settings.Language.General.None);
comboBoxDictionaries.SelectedIndex = 0;
}
if (_modiEnabled && checkBoxUseModiInTesseractForUnknownWords.Checked)
{
@ -1971,5 +2003,29 @@ namespace Nikse.SubtitleEdit.Forms
subtitleListView1.EndUpdate();
}
public string LanguageString
{
get
{
string name = comboBoxDictionaries.SelectedItem.ToString();
int start = name.LastIndexOf("[");
int end = name.LastIndexOf("]");
if (start > 0 && end > start)
{
start++;
name = name.Substring(start, end - start);
return name;
}
return null;
}
}
private void comboBoxDictionaries_SelectedIndexChanged(object sender, EventArgs e)
{
Configuration.Settings.General.SpellCheckLanguage = LanguageString;
if (_ocrFixEngine != null)
_ocrFixEngine.SpellCheckDictionaryName = LanguageString;
}
}
}

View File

@ -34,11 +34,13 @@ namespace Nikse.SubtitleEdit.Logic.OCR
Hunspell _hunspell;
readonly OcrSpellCheck _spellCheck;
readonly Form _parentForm;
private string _spellCheckDictionaryName;
public bool Abort { get; set; }
public List<string> AutoGuessesUsed { get; set; }
public List<string> UnknownWordsFound { get; set; }
public bool IsDictionaryLoaded { get; private set; }
public CultureInfo DictionaryCulture { get; private set; }
/// <summary>
@ -121,63 +123,97 @@ namespace Nikse.SubtitleEdit.Logic.OCR
if (dictionaryFileName == null)
return;
_fiveLetterWordListLanguageName = Path.GetFileName(dictionaryFileName).Substring(0, 5);
string dictionary = Utilities.DictionaryFolder + _fiveLetterWordListLanguageName;
_wordSkipList = new List<string>();
_wordSkipList.Add(Configuration.Settings.Tools.MusicSymbol);
_wordSkipList.Add("*");
_wordSkipList.Add("%");
_wordSkipList.Add("#");
_wordSkipList.Add("+");
// Load names etc list (names/noise words)
_namesEtcList = new List<string>();
_namesEtcMultiWordList = new List<string>();
Utilities.LoadNamesEtcWordLists(_namesEtcList, _namesEtcMultiWordList, _fiveLetterWordListLanguageName);
_namesEtcListUppercase = new List<string>();
foreach (string name in _namesEtcList)
_namesEtcListUppercase.Add(name.ToUpper());
_namesEtcListWithApostrophe = new List<string>();
if (threeLetterIsoLanguageName.ToLower() == "eng")
{
foreach (string namesItem in _namesEtcList)
{
if (!namesItem.EndsWith("s"))
_namesEtcListWithApostrophe.Add(namesItem + "'s");
else
_namesEtcListWithApostrophe.Add(namesItem + "'");
}
}
// Load user words
_userWordList = new List<string>();
_userWordListXmlFileName = Utilities.LoadUserWordList(_userWordList, _fiveLetterWordListLanguageName);
// Find abbreviations
_abbreviationList = new List<string>();
foreach (string name in _namesEtcList)
{
if (name.EndsWith("."))
_abbreviationList.Add(name);
}
foreach (string name in _userWordList)
{
if (name.EndsWith("."))
_abbreviationList.Add(name);
}
// Load NHunspell spellchecker
_hunspell = new Hunspell(dictionary + ".aff", dictionary + ".dic");
IsDictionaryLoaded = true;
DictionaryCulture = culture;
LoadSpellingDictionariesViaDictionaryFileName(threeLetterIsoLanguageName, culture, dictionaryFileName, true);
return;
}
}
return;
}
private void LoadSpellingDictionariesViaDictionaryFileName(string threeLetterIsoLanguageName, CultureInfo culture, string dictionaryFileName, bool resetSkipList)
{
_fiveLetterWordListLanguageName = Path.GetFileName(dictionaryFileName).Substring(0, 5);
string dictionary = Utilities.DictionaryFolder + _fiveLetterWordListLanguageName;
if (resetSkipList)
{
_wordSkipList = new List<string>();
_wordSkipList.Add(Configuration.Settings.Tools.MusicSymbol);
_wordSkipList.Add("*");
_wordSkipList.Add("%");
_wordSkipList.Add("#");
_wordSkipList.Add("+");
}
// Load names etc list (names/noise words)
_namesEtcList = new List<string>();
_namesEtcMultiWordList = new List<string>();
Utilities.LoadNamesEtcWordLists(_namesEtcList, _namesEtcMultiWordList, _fiveLetterWordListLanguageName);
_namesEtcListUppercase = new List<string>();
foreach (string name in _namesEtcList)
_namesEtcListUppercase.Add(name.ToUpper());
_namesEtcListWithApostrophe = new List<string>();
if (threeLetterIsoLanguageName.ToLower() == "eng")
{
foreach (string namesItem in _namesEtcList)
{
if (!namesItem.EndsWith("s"))
_namesEtcListWithApostrophe.Add(namesItem + "'s");
else
_namesEtcListWithApostrophe.Add(namesItem + "'");
}
}
// Load user words
_userWordList = new List<string>();
_userWordListXmlFileName = Utilities.LoadUserWordList(_userWordList, _fiveLetterWordListLanguageName);
// Find abbreviations
_abbreviationList = new List<string>();
foreach (string name in _namesEtcList)
{
if (name.EndsWith("."))
_abbreviationList.Add(name);
}
foreach (string name in _userWordList)
{
if (name.EndsWith("."))
_abbreviationList.Add(name);
}
// Load NHunspell spellchecker
_hunspell = new Hunspell(dictionary + ".aff", dictionary + ".dic");
IsDictionaryLoaded = true;
_spellCheckDictionaryName = dictionary;
DictionaryCulture = culture;
}
public string SpellCheckDictionaryName
{
get
{
string[] parts = _spellCheckDictionaryName.Split(Path.DirectorySeparatorChar.ToString().ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
if (parts.Length > 0)
return parts[parts.Length - 1];
return string.Empty;
}
set
{
string _spellCheckDictionaryName = Path.Combine(Utilities.DictionaryFolder, value);
CultureInfo ci;
try
{
ci = new CultureInfo(value);
}
catch
{
ci = CultureInfo.CurrentCulture;
}
LoadSpellingDictionariesViaDictionaryFileName(ci.ThreeLetterISOLanguageName, ci, _spellCheckDictionaryName, false);
}
}
internal static Dictionary<string, string> LoadReplaceList(XmlDocument doc, string name)
{
var list = new Dictionary<string, string>();