From 41ba1a5df736d7ebcbac54d3ba12b2f9ff60a689 Mon Sep 17 00:00:00 2001 From: niksedk Date: Wed, 16 Feb 2011 21:27:10 +0000 Subject: [PATCH] Can now change ocr spell check dictionary (thx Hawk) git-svn-id: https://subtitleedit.googlecode.com/svn/trunk@339 99eadd0c-20b8-1223-b5c4-2a2b2df33de2 --- src/Forms/VobSubOcr.Designer.cs | 38 ++++++--- src/Forms/VobSubOcr.cs | 58 +++++++++++++- src/Logic/OCR/OcrFixEngine.cs | 138 ++++++++++++++++++++------------ 3 files changed, 171 insertions(+), 63 deletions(-) diff --git a/src/Forms/VobSubOcr.Designer.cs b/src/Forms/VobSubOcr.Designer.cs index 9a856bb19..ed2dbc6eb 100644 --- a/src/Forms/VobSubOcr.Designer.cs +++ b/src/Forms/VobSubOcr.Designer.cs @@ -93,8 +93,9 @@ namespace Nikse.SubtitleEdit.Forms this.groupBoxSubtitleImage = new System.Windows.Forms.GroupBox(); this.checkBoxShowOnlyForced = new System.Windows.Forms.CheckBox(); this.checkBoxUseTimeCodesFromIdx = new System.Windows.Forms.CheckBox(); - this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView(); this.folderBrowserDialog1 = new System.Windows.Forms.FolderBrowserDialog(); + this.comboBoxDictionaries = new System.Windows.Forms.ComboBox(); + this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView(); ((System.ComponentModel.ISupportInitialize)(this.pictureBoxSubtitleImage)).BeginInit(); this.contextMenuStripListview.SuspendLayout(); this.groupBoxOcrMethod.SuspendLayout(); @@ -135,7 +136,7 @@ namespace Nikse.SubtitleEdit.Forms this.saveImageAsToolStripMenuItem, this.saveAllImagesToolStripMenuItem}); this.contextMenuStripListview.Name = "contextMenuStripListview"; - this.contextMenuStripListview.Size = new System.Drawing.Size(244, 120); + this.contextMenuStripListview.Size = new System.Drawing.Size(244, 98); this.contextMenuStripListview.Opening += new System.ComponentModel.CancelEventHandler(this.ContextMenuStripListviewOpening); // // normalToolStripMenuItem @@ -270,7 +271,7 @@ namespace Nikse.SubtitleEdit.Forms this.checkBoxUseModiInTesseractForUnknownWords.Enabled = false; this.checkBoxUseModiInTesseractForUnknownWords.Location = new System.Drawing.Point(22, 74); this.checkBoxUseModiInTesseractForUnknownWords.Name = "checkBoxUseModiInTesseractForUnknownWords"; - this.checkBoxUseModiInTesseractForUnknownWords.Size = new System.Drawing.Size(165, 17); + this.checkBoxUseModiInTesseractForUnknownWords.Size = new System.Drawing.Size(167, 17); this.checkBoxUseModiInTesseractForUnknownWords.TabIndex = 39; this.checkBoxUseModiInTesseractForUnknownWords.Text = "Try MODI for unknown words"; this.checkBoxUseModiInTesseractForUnknownWords.UseVisualStyleBackColor = true; @@ -345,7 +346,7 @@ namespace Nikse.SubtitleEdit.Forms this.checkBoxRightToLeft.AutoSize = true; this.checkBoxRightToLeft.Location = new System.Drawing.Point(128, 112); this.checkBoxRightToLeft.Name = "checkBoxRightToLeft"; - this.checkBoxRightToLeft.Size = new System.Drawing.Size(80, 17); + this.checkBoxRightToLeft.Size = new System.Drawing.Size(83, 17); this.checkBoxRightToLeft.TabIndex = 40; this.checkBoxRightToLeft.Text = "Right to left"; this.checkBoxRightToLeft.UseVisualStyleBackColor = true; @@ -504,6 +505,7 @@ namespace Nikse.SubtitleEdit.Forms this.groupBoxOcrAutoFix.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) | System.Windows.Forms.AnchorStyles.Left) | System.Windows.Forms.AnchorStyles.Right))); + this.groupBoxOcrAutoFix.Controls.Add(this.comboBoxDictionaries); this.groupBoxOcrAutoFix.Controls.Add(this.checkBoxGuessUnknownWords); this.groupBoxOcrAutoFix.Controls.Add(this.tabControlLogs); this.groupBoxOcrAutoFix.Controls.Add(this.labelFixesMade); @@ -525,7 +527,7 @@ namespace Nikse.SubtitleEdit.Forms this.checkBoxGuessUnknownWords.CheckState = System.Windows.Forms.CheckState.Checked; this.checkBoxGuessUnknownWords.Location = new System.Drawing.Point(11, 83); this.checkBoxGuessUnknownWords.Name = "checkBoxGuessUnknownWords"; - this.checkBoxGuessUnknownWords.Size = new System.Drawing.Size(162, 17); + this.checkBoxGuessUnknownWords.Size = new System.Drawing.Size(164, 17); this.checkBoxGuessUnknownWords.TabIndex = 39; this.checkBoxGuessUnknownWords.Text = "Try to guess unknown words"; this.checkBoxGuessUnknownWords.UseVisualStyleBackColor = true; @@ -631,7 +633,7 @@ namespace Nikse.SubtitleEdit.Forms this.checkBoxPromptForUnknownWords.CheckState = System.Windows.Forms.CheckState.Checked; this.checkBoxPromptForUnknownWords.Location = new System.Drawing.Point(11, 61); this.checkBoxPromptForUnknownWords.Name = "checkBoxPromptForUnknownWords"; - this.checkBoxPromptForUnknownWords.Size = new System.Drawing.Size(246, 17); + this.checkBoxPromptForUnknownWords.Size = new System.Drawing.Size(255, 17); this.checkBoxPromptForUnknownWords.TabIndex = 38; this.checkBoxPromptForUnknownWords.Text = "Prompt for unknown words (requires dictionary)"; this.checkBoxPromptForUnknownWords.UseVisualStyleBackColor = true; @@ -643,7 +645,7 @@ namespace Nikse.SubtitleEdit.Forms this.checkBoxAutoBreakLines.CheckState = System.Windows.Forms.CheckState.Checked; this.checkBoxAutoBreakLines.Location = new System.Drawing.Point(11, 105); this.checkBoxAutoBreakLines.Name = "checkBoxAutoBreakLines"; - this.checkBoxAutoBreakLines.Size = new System.Drawing.Size(200, 17); + this.checkBoxAutoBreakLines.Size = new System.Drawing.Size(208, 17); this.checkBoxAutoBreakLines.TabIndex = 37; this.checkBoxAutoBreakLines.Text = "Auto break subtitle, if line number > 2"; this.checkBoxAutoBreakLines.UseVisualStyleBackColor = true; @@ -664,7 +666,7 @@ namespace Nikse.SubtitleEdit.Forms this.checkBoxAutoFixCommonErrors.CheckState = System.Windows.Forms.CheckState.Checked; this.checkBoxAutoFixCommonErrors.Location = new System.Drawing.Point(11, 39); this.checkBoxAutoFixCommonErrors.Name = "checkBoxAutoFixCommonErrors"; - this.checkBoxAutoFixCommonErrors.Size = new System.Drawing.Size(137, 17); + this.checkBoxAutoFixCommonErrors.Size = new System.Drawing.Size(139, 17); this.checkBoxAutoFixCommonErrors.TabIndex = 34; this.checkBoxAutoFixCommonErrors.Text = "Fix common OCR errors"; this.checkBoxAutoFixCommonErrors.UseVisualStyleBackColor = true; @@ -690,7 +692,7 @@ namespace Nikse.SubtitleEdit.Forms this.checkBoxEmphasis2Transparent.AutoSize = true; this.checkBoxEmphasis2Transparent.Location = new System.Drawing.Point(437, 19); this.checkBoxEmphasis2Transparent.Name = "checkBoxEmphasis2Transparent"; - this.checkBoxEmphasis2Transparent.Size = new System.Drawing.Size(83, 17); + this.checkBoxEmphasis2Transparent.Size = new System.Drawing.Size(85, 17); this.checkBoxEmphasis2Transparent.TabIndex = 6; this.checkBoxEmphasis2Transparent.Text = "Transparent"; this.checkBoxEmphasis2Transparent.UseVisualStyleBackColor = true; @@ -701,7 +703,7 @@ namespace Nikse.SubtitleEdit.Forms this.checkBoxEmphasis1Transparent.AutoSize = true; this.checkBoxEmphasis1Transparent.Location = new System.Drawing.Point(304, 19); this.checkBoxEmphasis1Transparent.Name = "checkBoxEmphasis1Transparent"; - this.checkBoxEmphasis1Transparent.Size = new System.Drawing.Size(83, 17); + this.checkBoxEmphasis1Transparent.Size = new System.Drawing.Size(85, 17); this.checkBoxEmphasis1Transparent.TabIndex = 5; this.checkBoxEmphasis1Transparent.Text = "Transparent"; this.checkBoxEmphasis1Transparent.UseVisualStyleBackColor = true; @@ -712,7 +714,7 @@ namespace Nikse.SubtitleEdit.Forms this.checkBoxPatternTransparent.AutoSize = true; this.checkBoxPatternTransparent.Location = new System.Drawing.Point(167, 19); this.checkBoxPatternTransparent.Name = "checkBoxPatternTransparent"; - this.checkBoxPatternTransparent.Size = new System.Drawing.Size(83, 17); + this.checkBoxPatternTransparent.Size = new System.Drawing.Size(85, 17); this.checkBoxPatternTransparent.TabIndex = 4; this.checkBoxPatternTransparent.Text = "Transparent"; this.checkBoxPatternTransparent.UseVisualStyleBackColor = true; @@ -797,6 +799,19 @@ namespace Nikse.SubtitleEdit.Forms this.checkBoxUseTimeCodesFromIdx.UseVisualStyleBackColor = true; this.checkBoxUseTimeCodesFromIdx.CheckedChanged += new System.EventHandler(this.checkBoxUseTimeCodesFromIdx_CheckedChanged); // + // comboBoxDictionaries + // + this.comboBoxDictionaries.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) + | System.Windows.Forms.AnchorStyles.Right))); + this.comboBoxDictionaries.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboBoxDictionaries.FormattingEnabled = true; + this.comboBoxDictionaries.Location = new System.Drawing.Point(127, 15); + this.comboBoxDictionaries.Name = "comboBoxDictionaries"; + this.comboBoxDictionaries.Size = new System.Drawing.Size(171, 21); + this.comboBoxDictionaries.TabIndex = 41; + this.comboBoxDictionaries.Visible = false; + this.comboBoxDictionaries.SelectedIndexChanged += new System.EventHandler(this.comboBoxDictionaries_SelectedIndexChanged); + // // subtitleListView1 // this.subtitleListView1.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) @@ -939,5 +954,6 @@ namespace Nikse.SubtitleEdit.Forms private System.Windows.Forms.CheckBox checkBoxUseTimeCodesFromIdx; private System.Windows.Forms.ToolStripMenuItem saveAllImagesToolStripMenuItem; private System.Windows.Forms.FolderBrowserDialog folderBrowserDialog1; + private System.Windows.Forms.ComboBox comboBoxDictionaries; } } \ No newline at end of file diff --git a/src/Forms/VobSubOcr.cs b/src/Forms/VobSubOcr.cs index f4443fb88..7a4caed4c 100644 --- a/src/Forms/VobSubOcr.cs +++ b/src/Forms/VobSubOcr.cs @@ -145,6 +145,7 @@ namespace Nikse.SubtitleEdit.Forms labelFixesMade.Text = string.Empty; labelFixesMade.Left = checkBoxAutoFixCommonErrors.Left + checkBoxAutoFixCommonErrors.Width; labelDictionaryLoaded.Text = string.Empty; + comboBoxDictionaries.Visible = false; groupBoxImageCompareMethod.Text = language.OcrViaImageCompare; groupBoxModiMethod.Text = language.OcrViaModi; checkBoxAutoFixCommonErrors.Text = language.FixOcrErrors; @@ -152,6 +153,18 @@ namespace Nikse.SubtitleEdit.Forms checkBoxRightToLeft.Left = numericUpDownPixelsIsSpace.Left; groupBoxOCRControls.Text = language.StartOcr + " / " + language.Stop; + comboBoxDictionaries.SelectedIndexChanged -= comboBoxDictionaries_SelectedIndexChanged; + comboBoxDictionaries.Items.Clear(); + comboBoxDictionaries.Items.Add(Configuration.Settings.Language.General.None); + foreach (string name in Utilities.GetDictionaryLanguages()) + { + comboBoxDictionaries.Items.Add(name); + //if (name.Contains("[" + languageName + "]")) + // comboBoxDictionaries.SelectedIndex = comboBoxDictionaries.Items.Count - 1; + } + comboBoxDictionaries.SelectedIndexChanged += comboBoxDictionaries_SelectedIndexChanged; + + comboBoxOcrMethod.Items.Clear(); comboBoxOcrMethod.Items.Add(language.OcrViaTesseract); comboBoxOcrMethod.Items.Add(language.OcrViaImageCompare); @@ -1244,9 +1257,28 @@ namespace Nikse.SubtitleEdit.Forms _languageId = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Id; _ocrFixEngine = new OcrFixEngine(_languageId, this); if (_ocrFixEngine.IsDictionaryLoaded) - labelDictionaryLoaded.Text = string.Format(Configuration.Settings.Language.VobSubOcr.DictionaryX, _ocrFixEngine.DictionaryCulture.NativeName); + { + labelDictionaryLoaded.Text = string.Format(Configuration.Settings.Language.VobSubOcr.DictionaryX, string.Empty); // _ocrFixEngine.DictionaryCulture.NativeName); + + string loadedDictionaryName = _ocrFixEngine.SpellCheckDictionaryName; + int i = 0; + comboBoxDictionaries.SelectedIndexChanged -= comboBoxDictionaries_SelectedIndexChanged; + foreach (string item in comboBoxDictionaries.Items) + { + if (item.Contains("[" + loadedDictionaryName + "]")) + comboBoxDictionaries.SelectedIndex = i; + i++; + } + comboBoxDictionaries.SelectedIndexChanged += comboBoxDictionaries_SelectedIndexChanged; + comboBoxDictionaries.Left = labelDictionaryLoaded.Left + labelDictionaryLoaded.Width; + comboBoxDictionaries.Width = groupBoxOcrAutoFix.Width - (comboBoxDictionaries.Left + 5); + comboBoxDictionaries.Visible = true; + } else + { labelDictionaryLoaded.Text = string.Format(Configuration.Settings.Language.VobSubOcr.DictionaryX, Configuration.Settings.Language.General.None); + comboBoxDictionaries.SelectedIndex = 0; + } if (_modiEnabled && checkBoxUseModiInTesseractForUnknownWords.Checked) { @@ -1971,5 +2003,29 @@ namespace Nikse.SubtitleEdit.Forms subtitleListView1.EndUpdate(); } + public string LanguageString + { + get + { + string name = comboBoxDictionaries.SelectedItem.ToString(); + int start = name.LastIndexOf("["); + int end = name.LastIndexOf("]"); + if (start > 0 && end > start) + { + start++; + name = name.Substring(start, end - start); + return name; + } + return null; + } + } + + private void comboBoxDictionaries_SelectedIndexChanged(object sender, EventArgs e) + { + Configuration.Settings.General.SpellCheckLanguage = LanguageString; + if (_ocrFixEngine != null) + _ocrFixEngine.SpellCheckDictionaryName = LanguageString; + } + } } diff --git a/src/Logic/OCR/OcrFixEngine.cs b/src/Logic/OCR/OcrFixEngine.cs index 1c42847cb..917301437 100644 --- a/src/Logic/OCR/OcrFixEngine.cs +++ b/src/Logic/OCR/OcrFixEngine.cs @@ -34,11 +34,13 @@ namespace Nikse.SubtitleEdit.Logic.OCR Hunspell _hunspell; readonly OcrSpellCheck _spellCheck; readonly Form _parentForm; + private string _spellCheckDictionaryName; public bool Abort { get; set; } public List AutoGuessesUsed { get; set; } public List UnknownWordsFound { get; set; } public bool IsDictionaryLoaded { get; private set; } + public CultureInfo DictionaryCulture { get; private set; } /// @@ -121,63 +123,97 @@ namespace Nikse.SubtitleEdit.Logic.OCR if (dictionaryFileName == null) return; - _fiveLetterWordListLanguageName = Path.GetFileName(dictionaryFileName).Substring(0, 5); - string dictionary = Utilities.DictionaryFolder + _fiveLetterWordListLanguageName; - _wordSkipList = new List(); - _wordSkipList.Add(Configuration.Settings.Tools.MusicSymbol); - _wordSkipList.Add("*"); - _wordSkipList.Add("%"); - _wordSkipList.Add("#"); - _wordSkipList.Add("+"); - - // Load names etc list (names/noise words) - _namesEtcList = new List(); - _namesEtcMultiWordList = new List(); - Utilities.LoadNamesEtcWordLists(_namesEtcList, _namesEtcMultiWordList, _fiveLetterWordListLanguageName); - - _namesEtcListUppercase = new List(); - foreach (string name in _namesEtcList) - _namesEtcListUppercase.Add(name.ToUpper()); - - _namesEtcListWithApostrophe = new List(); - if (threeLetterIsoLanguageName.ToLower() == "eng") - { - foreach (string namesItem in _namesEtcList) - { - if (!namesItem.EndsWith("s")) - _namesEtcListWithApostrophe.Add(namesItem + "'s"); - else - _namesEtcListWithApostrophe.Add(namesItem + "'"); - } - } - - // Load user words - _userWordList = new List(); - _userWordListXmlFileName = Utilities.LoadUserWordList(_userWordList, _fiveLetterWordListLanguageName); - - // Find abbreviations - _abbreviationList = new List(); - foreach (string name in _namesEtcList) - { - if (name.EndsWith(".")) - _abbreviationList.Add(name); - } - foreach (string name in _userWordList) - { - if (name.EndsWith(".")) - _abbreviationList.Add(name); - } - - // Load NHunspell spellchecker - _hunspell = new Hunspell(dictionary + ".aff", dictionary + ".dic"); - IsDictionaryLoaded = true; - DictionaryCulture = culture; + LoadSpellingDictionariesViaDictionaryFileName(threeLetterIsoLanguageName, culture, dictionaryFileName, true); return; } } return; } + private void LoadSpellingDictionariesViaDictionaryFileName(string threeLetterIsoLanguageName, CultureInfo culture, string dictionaryFileName, bool resetSkipList) + { + _fiveLetterWordListLanguageName = Path.GetFileName(dictionaryFileName).Substring(0, 5); + string dictionary = Utilities.DictionaryFolder + _fiveLetterWordListLanguageName; + if (resetSkipList) + { + _wordSkipList = new List(); + _wordSkipList.Add(Configuration.Settings.Tools.MusicSymbol); + _wordSkipList.Add("*"); + _wordSkipList.Add("%"); + _wordSkipList.Add("#"); + _wordSkipList.Add("+"); + } + + // Load names etc list (names/noise words) + _namesEtcList = new List(); + _namesEtcMultiWordList = new List(); + Utilities.LoadNamesEtcWordLists(_namesEtcList, _namesEtcMultiWordList, _fiveLetterWordListLanguageName); + + _namesEtcListUppercase = new List(); + foreach (string name in _namesEtcList) + _namesEtcListUppercase.Add(name.ToUpper()); + + _namesEtcListWithApostrophe = new List(); + if (threeLetterIsoLanguageName.ToLower() == "eng") + { + foreach (string namesItem in _namesEtcList) + { + if (!namesItem.EndsWith("s")) + _namesEtcListWithApostrophe.Add(namesItem + "'s"); + else + _namesEtcListWithApostrophe.Add(namesItem + "'"); + } + } + + // Load user words + _userWordList = new List(); + _userWordListXmlFileName = Utilities.LoadUserWordList(_userWordList, _fiveLetterWordListLanguageName); + + // Find abbreviations + _abbreviationList = new List(); + foreach (string name in _namesEtcList) + { + if (name.EndsWith(".")) + _abbreviationList.Add(name); + } + foreach (string name in _userWordList) + { + if (name.EndsWith(".")) + _abbreviationList.Add(name); + } + + // Load NHunspell spellchecker + _hunspell = new Hunspell(dictionary + ".aff", dictionary + ".dic"); + IsDictionaryLoaded = true; + _spellCheckDictionaryName = dictionary; + DictionaryCulture = culture; + } + + public string SpellCheckDictionaryName + { + get + { + string[] parts = _spellCheckDictionaryName.Split(Path.DirectorySeparatorChar.ToString().ToCharArray(), StringSplitOptions.RemoveEmptyEntries); + if (parts.Length > 0) + return parts[parts.Length - 1]; + return string.Empty; + } + set + { + string _spellCheckDictionaryName = Path.Combine(Utilities.DictionaryFolder, value); + CultureInfo ci; + try + { + ci = new CultureInfo(value); + } + catch + { + ci = CultureInfo.CurrentCulture; + } + LoadSpellingDictionariesViaDictionaryFileName(ci.ThreeLetterISOLanguageName, ci, _spellCheckDictionaryName, false); + } + } + internal static Dictionary LoadReplaceList(XmlDocument doc, string name) { var list = new Dictionary();