diff --git a/Changelog.txt b/Changelog.txt index 9f2b64730..eb339cc27 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -21,6 +21,7 @@ * Update Romanian translation - thx Mircea * Update Basque translation - thx Xabier * Update Portuguese translation - thx moob + * Update Tesseract OCR from 3.02 to 4.0 (alpha) * Ctrl+a/ctrl+d/ctrl+shift+i works in more lists - thx tormento * Remember OCR spell check dictionary for tesseract - thx raymondjpg * FIXED: @@ -37,6 +38,8 @@ * Do not allow navigating before zero in video - thx darnn * Fix issue with nested tags in "Remove text for HI" - thx darnn * Fix image render issue regarding italic/font - thx Cemal + * Fix bottom margin in batch image export - thx Cemal + * Fix possible crash in list view - thx lambdacore12 3.5.6 (27th February 2018) * NEW: diff --git a/LanguageMaster.xml b/LanguageMaster.xml index d36b971f2..b0ba52372 100644 --- a/LanguageMaster.xml +++ b/LanguageMaster.xml @@ -2318,7 +2318,6 @@ Keep changes? Save all images (png/bdn xml)... Save all images with HTML index... {0} images saved in {1} - Try Microsoft MODI OCR for unknown words Dictionary: {0} Right to left Show only forced subtitles diff --git a/Tesseract4/tessdata/configs/hocr b/Tesseract4/tessdata/configs/hocr new file mode 100644 index 000000000..9f63e41eb --- /dev/null +++ b/Tesseract4/tessdata/configs/hocr @@ -0,0 +1,3 @@ +tessedit_create_hocr 1 +tessedit_pageseg_mode 1 +hocr_font_info 0 diff --git a/Tesseract4/tessdata/eng.traineddata b/Tesseract4/tessdata/eng.traineddata new file mode 100644 index 000000000..bbef46750 Binary files /dev/null and b/Tesseract4/tessdata/eng.traineddata differ diff --git a/Tesseract4/tesseract.exe b/Tesseract4/tesseract.exe new file mode 100644 index 000000000..b64dc7ecd Binary files /dev/null and b/Tesseract4/tesseract.exe differ diff --git a/build.bat b/build.bat index 196474ef2..b674a9b85 100644 --- a/build.bat +++ b/build.bat @@ -91,9 +91,9 @@ PUSHD "src\bin\Release" IF EXIST "temp_zip" RD /S /Q "temp_zip" IF NOT EXIST "temp_zip" MD "temp_zip" IF NOT EXIST "temp_zip\Languages" MD "temp_zip\Languages" -IF NOT EXIST "temp_zip\Tesseract" MD "temp_zip\Tesseract" -IF NOT EXIST "temp_zip\Tesseract\tessdata" MD "temp_zip\Tesseract\tessdata" -IF NOT EXIST "temp_zip\Tesseract\tessdata\configs" MD "temp_zip\Tesseract\tessdata\configs" +IF NOT EXIST "temp_zip\Tesseract4" MD "temp_zip\Tesseract4" +IF NOT EXIST "temp_zip\Tesseract4\tessdata" MD "temp_zip\Tesseract4\tessdata" +IF NOT EXIST "temp_zip\Tesseract4\tessdata\configs" MD "temp_zip\Tesseract4\tessdata\configs" COPY /Y /V "..\..\..\LICENSE.txt" "temp_zip\" COPY /Y /V "..\..\..\Changelog.txt" "temp_zip\" @@ -101,11 +101,9 @@ COPY /Y /V "Hunspellx86.dll" "temp_zip\" COPY /Y /V "Hunspellx64.dll" "temp_zip\" COPY /Y /V "SubtitleEdit.exe" "temp_zip\" COPY /Y /V "Languages\*.xml" "temp_zip\Languages\" -COPY /Y /V "..\..\..\Tesseract\msvcp90.dll" "temp_zip\Tesseract\" -COPY /Y /V "..\..\..\Tesseract\msvcr90.dll" "temp_zip\Tesseract\" -COPY /Y /V "..\..\..\Tesseract\tesseract.exe" "temp_zip\Tesseract\" -COPY /Y /V "..\..\..\Tesseract\tessdata\configs\hocr" "temp_zip\Tesseract\tessdata\configs\" -COPY /Y /V "..\..\..\Tesseract\tessdata\*.traineddata" "temp_zip\Tesseract\tessdata\" +COPY /Y /V "..\..\..\Tesseract4\tesseract.exe" "temp_zip\Tesseract4\" +COPY /Y /V "..\..\..\Tesseract4\tessdata\configs\hocr" "temp_zip\Tesseract4\tessdata\configs\" +COPY /Y /V "..\..\..\Tesseract4\tessdata\*.traineddata" "temp_zip\Tesseract4\tessdata\" PUSHD "temp_zip" START "" /B /WAIT "%SEVENZIP%" a -tzip -mx=9 "SE%VERSION%.zip" * >NUL diff --git a/installer/Subtitle_Edit_installer.iss b/installer/Subtitle_Edit_installer.iss index cf9f84295..e6f5e3966 100644 --- a/installer/Subtitle_Edit_installer.iss +++ b/installer/Subtitle_Edit_installer.iss @@ -266,12 +266,9 @@ Source: {#bindir}\SubtitleEdit.exe; DestDir: {app}; Source: {#bindir}\SubtitleEdit.resources.dll; DestDir: {app}; Flags: ignoreversion; Components: main Source: ..\Changelog.txt; DestDir: {app}; Flags: ignoreversion; Components: main Source: ..\LICENSE.txt; DestDir: {app}; Flags: ignoreversion; Components: main -Source: ..\Tesseract\msvcp90.dll; DestDir: {app}\Tesseract; Flags: ignoreversion; Components: main -Source: ..\Tesseract\msvcr90.dll; DestDir: {app}\Tesseract; Flags: ignoreversion; Components: main -Source: ..\Tesseract\tessdata\configs\hocr; DestDir: {app}\Tesseract\tessdata\configs; Flags: ignoreversion; Components: main -Source: ..\Tesseract\tessdata\eng.traineddata; DestDir: {app}\Tesseract\tessdata; Flags: ignoreversion; Components: main -Source: ..\Tesseract\tessdata\music.traineddata; DestDir: {app}\Tesseract\tessdata; Flags: ignoreversion; Components: main -Source: ..\Tesseract\tesseract.exe; DestDir: {app}\Tesseract; Flags: ignoreversion; Components: main +Source: ..\Tesseract4\tessdata\configs\hocr; DestDir: {app}\Tesseract4\tessdata\configs; Flags: ignoreversion; Components: main +Source: ..\Tesseract4\tessdata\eng.traineddata; DestDir: {app}\Tesseract4\tessdata; Flags: ignoreversion; Components: main +Source: ..\Tesseract4\tesseract.exe; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main Source: Icons\uninstall.ico; DestDir: {app}\Icons; Flags: ignoreversion; Components: main diff --git a/libse/Configuration.cs b/libse/Configuration.cs index 53a07ce8f..99fd5b577 100644 --- a/libse/Configuration.cs +++ b/libse/Configuration.cs @@ -23,7 +23,7 @@ namespace Nikse.SubtitleEdit.Core public static readonly string SceneChangesDirectory = DataDirectory + "SceneChanges" + Path.DirectorySeparatorChar; public static readonly string AutoBackupDirectory = DataDirectory + "AutoBackup" + Path.DirectorySeparatorChar; public static readonly string VobSubCompareDirectory = DataDirectory + "VobSub" + Path.DirectorySeparatorChar; - public static readonly string TesseractDirectory = DataDirectory + "Tesseract" + Path.DirectorySeparatorChar; + public static readonly string TesseractDirectory = DataDirectory + "Tesseract4" + Path.DirectorySeparatorChar; public static readonly string WaveformsDirectory = DataDirectory + "Waveforms" + Path.DirectorySeparatorChar; public static readonly string PluginsDirectory = DataDirectory + "Plugins" + Path.DirectorySeparatorChar; public static readonly string IconsDirectory = BaseDirectory + "Icons" + Path.DirectorySeparatorChar; @@ -54,21 +54,9 @@ namespace Nikse.SubtitleEdit.Core } - public static Settings Settings - { - get - { - return Instance.Value._settings.Value; - } - } + public static Settings Settings => Instance.Value._settings.Value; - public static IEnumerable AvailableEncodings - { - get - { - return Instance.Value._encodings; - } - } + public static IEnumerable AvailableEncodings => Instance.Value._encodings; private static string GetInstallerPath() { @@ -117,6 +105,7 @@ namespace Nikse.SubtitleEdit.Core } catch { + // ignored } } Directory.CreateDirectory(Path.Combine(appDataRoamingPath, "Dictionaries")); diff --git a/libse/Language.cs b/libse/Language.cs index 26c2f4802..4e66cd37d 100644 --- a/libse/Language.cs +++ b/libse/Language.cs @@ -2649,7 +2649,6 @@ Keep changes?", SaveAllSubtitleImagesAsBdnXml = "Save all images (png/bdn xml)...", SaveAllSubtitleImagesWithHtml = "Save all images with HTML index...", XImagesSavedInY = "{0} images saved in {1}", - TryModiForUnknownWords = "Try Microsoft MODI OCR for unknown words", DictionaryX = "Dictionary: {0}", RightToLeft = "Right to left", ShowOnlyForcedSubtitles = "Show only forced subtitles", diff --git a/libse/LanguageDeserializer.cs b/libse/LanguageDeserializer.cs index 7c95b546a..13e91c43d 100644 --- a/libse/LanguageDeserializer.cs +++ b/libse/LanguageDeserializer.cs @@ -6259,9 +6259,6 @@ namespace Nikse.SubtitleEdit.Core case "VobSubOcr/XImagesSavedInY": language.VobSubOcr.XImagesSavedInY = reader.Value; break; - case "VobSubOcr/TryModiForUnknownWords": - language.VobSubOcr.TryModiForUnknownWords = reader.Value; - break; case "VobSubOcr/DictionaryX": language.VobSubOcr.DictionaryX = reader.Value; break; diff --git a/libse/LanguageStructure.cs b/libse/LanguageStructure.cs index a66cb0549..c41ba9b0c 100644 --- a/libse/LanguageStructure.cs +++ b/libse/LanguageStructure.cs @@ -2524,7 +2524,6 @@ public string SaveAllSubtitleImagesAsBdnXml { get; set; } public string SaveAllSubtitleImagesWithHtml { get; set; } public string XImagesSavedInY { get; set; } - public string TryModiForUnknownWords { get; set; } public string DictionaryX { get; set; } public string RightToLeft { get; set; } public string ShowOnlyForcedSubtitles { get; set; } diff --git a/libse/LibSE.csproj b/libse/LibSE.csproj index 86dce6706..168b0cebd 100644 --- a/libse/LibSE.csproj +++ b/libse/LibSE.csproj @@ -531,6 +531,7 @@ + diff --git a/libse/Settings.cs b/libse/Settings.cs index adb00eac9..8b225ac3b 100644 --- a/libse/Settings.cs +++ b/libse/Settings.cs @@ -865,8 +865,8 @@ namespace Nikse.SubtitleEdit.Core public int LastModiLanguageId { get; set; } public string LastOcrMethod { get; set; } public string TesseractLastLanguage { get; set; } - public bool UseModiInTesseractForUnknownWords { get; set; } public bool UseItalicsInTesseract { get; set; } + public int TesseractEngineMode { get; set; } public bool UseMusicSymbolsInTesseract { get; set; } public bool RightToLeft { get; set; } public bool TopToBottom { get; set; } @@ -2680,12 +2680,12 @@ namespace Nikse.SubtitleEdit.Core subNode = node.SelectSingleNode("TesseractLastLanguage"); if (subNode != null) settings.VobSubOcr.TesseractLastLanguage = subNode.InnerText; - subNode = node.SelectSingleNode("UseModiInTesseractForUnknownWords"); - if (subNode != null) - settings.VobSubOcr.UseModiInTesseractForUnknownWords = Convert.ToBoolean(subNode.InnerText); subNode = node.SelectSingleNode("UseItalicsInTesseract"); if (subNode != null) settings.VobSubOcr.UseItalicsInTesseract = Convert.ToBoolean(subNode.InnerText); + subNode = node.SelectSingleNode("TesseractEngineMode"); + if (subNode != null) + settings.VobSubOcr.TesseractEngineMode = Convert.ToInt32(subNode.InnerText); subNode = node.SelectSingleNode("UseMusicSymbolsInTesseract"); if (subNode != null) settings.VobSubOcr.UseMusicSymbolsInTesseract = Convert.ToBoolean(subNode.InnerText); @@ -3808,8 +3808,8 @@ namespace Nikse.SubtitleEdit.Core textWriter.WriteElementString("LastModiLanguageId", settings.VobSubOcr.LastModiLanguageId.ToString(CultureInfo.InvariantCulture)); textWriter.WriteElementString("LastOcrMethod", settings.VobSubOcr.LastOcrMethod); textWriter.WriteElementString("TesseractLastLanguage", settings.VobSubOcr.TesseractLastLanguage); - textWriter.WriteElementString("UseModiInTesseractForUnknownWords", settings.VobSubOcr.UseModiInTesseractForUnknownWords.ToString()); textWriter.WriteElementString("UseItalicsInTesseract", settings.VobSubOcr.UseItalicsInTesseract.ToString()); + textWriter.WriteElementString("TesseractEngineMode", settings.VobSubOcr.TesseractEngineMode.ToString()); textWriter.WriteElementString("UseMusicSymbolsInTesseract", settings.VobSubOcr.UseMusicSymbolsInTesseract.ToString()); textWriter.WriteElementString("RightToLeft", settings.VobSubOcr.RightToLeft.ToString()); textWriter.WriteElementString("TopToBottom", settings.VobSubOcr.TopToBottom.ToString()); diff --git a/libse/TesseractDictionary.cs b/libse/TesseractDictionary.cs new file mode 100644 index 000000000..fded8fc24 --- /dev/null +++ b/libse/TesseractDictionary.cs @@ -0,0 +1,195 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; + +namespace Nikse.SubtitleEdit.Core +{ + public class TesseractDictionary + { + + private const string DownloadUrlTemplate = "https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/{0}.traineddata"; + + /// + /// Dictionaries containing both 3.5 + 4.0 data - see https://github.com/tesseract-ocr/tessdata + /// + private static readonly string[] Dictionaries = + { + "afr", + "amh", + "ara", + "asm", + "aze", + "aze_cyrl", + "bel", + "ben", + "bod", + "bos", + "bre", + "bul", + "cat", + "ceb", + "ces", + "chi_sim", + "chi_sim_vert", + "chi_tra", + "chi_tra_vert", + "chr", + "cos", + "cym", + "dan", + "deu", + "div", + "dzo", + "ell", + "eng", + "enm", + "epo", + "equ", + "est", + "eus", + "fao", + "fas", + "fil", + "fin", + "fra", + "frk", + "frm", + "fry", + "gla", + "gle", + "glg", + "grc", + "guj", + "hat", + "heb", + "hin", + "hrv", + "hun", + "hye", + "iku", + "ind", + "isl", + "ita", + "jav", + "jpn", + "jpn_vert", + "kan", + "kat", + "kaz", + "khm", + "kir", + "kor", + "kor_vert", + "kur", + "kur_ara", + "lao", + "lat", + "lav", + "lit", + "ltz", + "mal", + "mar", + "mkd", + "mlt", + "mon", + "mri", + "msa", + "mya", + "nep", + "nld", + "nor", + "oci", + "ori", + "osd", + "pan", + "pol", + "por", + "pus", + "que", + "ron", + "rus", + "san", + "sin", + "slk", + "slv", + "snd", + "spa", + "sqi", + "srp", + "srp_latn", + "sun", + "swa", + "swe", + "syr", + "tam", + "tat", + "tel", + "tgk", + "tgl", + "tha", + "tir", + "ton", + "tur", + "uig", + "ukr", + "urd", + "uzb", + "uzb_cyrl", + "vie", + "yid", + "yor" + }; + + public string Code { get; set; } + public string Name { get; set; } + public string Url { get; set; } + + public static List List() + { + var list = new List(); + var cultures = CultureInfo.GetCultures(CultureTypes.NeutralCultures); + foreach (var dictionary in Dictionaries) + { + list.Add(new TesseractDictionary + { + Name = MakeName(dictionary, cultures), + Code = dictionary, + Url = string.Format(DownloadUrlTemplate, dictionary) + }); + } + return list; + } + + private static string MakeName(string dictionary, CultureInfo[] cultures) + { + string code = dictionary; + string post = string.Empty; + var idx = code.IndexOf('_'); + if (idx > 0) + { + post = $" ({code.Substring(idx).Trim('_')})"; + code = code.Substring(0, idx).Trim('_'); + } + + try + { + var cultureInfo = cultures.FirstOrDefault(ci => string.Equals(ci.ThreeLetterISOLanguageName, code, StringComparison.OrdinalIgnoreCase)); + if (cultureInfo != null) + code = cultureInfo.EnglishName; + } + catch + { + // ignore + } + + return code + post; + } + + public override string ToString() + { + return Name; + } + + } +} diff --git a/src/Forms/GetTesseractDictionaries.Designer.cs b/src/Forms/GetTesseractDictionaries.Designer.cs index fc58eb661..1c7b7b218 100644 --- a/src/Forms/GetTesseractDictionaries.Designer.cs +++ b/src/Forms/GetTesseractDictionaries.Designer.cs @@ -1,6 +1,6 @@ namespace Nikse.SubtitleEdit.Forms { - partial class GetTesseractDictionaries + sealed partial class GetTesseractDictionaries { /// /// Required designer variable. diff --git a/src/Forms/GetTesseractDictionaries.cs b/src/Forms/GetTesseractDictionaries.cs index 3d949f133..27be6fed6 100644 --- a/src/Forms/GetTesseractDictionaries.cs +++ b/src/Forms/GetTesseractDictionaries.cs @@ -6,17 +6,14 @@ using System.IO; using System.IO.Compression; using System.Net; using System.Windows.Forms; -using System.Xml; namespace Nikse.SubtitleEdit.Forms { - public partial class GetTesseractDictionaries : Form + public sealed partial class GetTesseractDictionaries : Form { - private List _dictionaryDownloadLinks = new List(); - private List _descriptions = new List(); - private string _xmlName = null; - private string _dictionaryFileName = null; + private string _dictionaryFileName; internal string ChosenLanguage { get; private set; } + private readonly List _dictionaries; public GetTesseractDictionaries() { @@ -31,57 +28,24 @@ namespace Nikse.SubtitleEdit.Forms buttonDownload.Text = Configuration.Settings.Language.GetTesseractDictionaries.Download; labelPleaseWait.Text = string.Empty; buttonOK.Text = Configuration.Settings.Language.General.Ok; - LoadDictionaryList("Nikse.SubtitleEdit.Resources.TesseractDictionaries.xml.gz"); FixLargeFonts(); + _dictionaries = TesseractDictionary.List(); + LoadDictionaryList(); } - private void LoadDictionaryList(string xmlRessourceName) + private void LoadDictionaryList() { - _dictionaryDownloadLinks = new List(); - _descriptions = new List(); - _xmlName = xmlRessourceName; - System.Reflection.Assembly asm = System.Reflection.Assembly.GetExecutingAssembly(); - Stream strm = asm.GetManifestResourceStream(_xmlName); - if (strm != null) + comboBoxDictionaries.BeginUpdate(); + comboBoxDictionaries.Items.Clear(); + foreach (var d in _dictionaries) { - comboBoxDictionaries.Items.Clear(); - XmlDocument doc = new XmlDocument(); - using (var rdr = new StreamReader(strm)) - using (var zip = new GZipStream(rdr.BaseStream, CompressionMode.Decompress)) + if (!string.IsNullOrEmpty(d.Url)) { - byte[] data = new byte[195000]; - int bytesRead = zip.Read(data, 0, data.Length); - var s = System.Text.Encoding.UTF8.GetString(data, 0, bytesRead).Trim(); - try - { - doc.LoadXml(s); - } - catch (Exception exception) - { - MessageBox.Show(exception.Message); - } - } - - foreach (XmlNode node in doc.DocumentElement.SelectNodes("Dictionary")) - { - string englishName = node.SelectSingleNode("EnglishName").InnerText; - string downloadLink = node.SelectSingleNode("DownloadLink").InnerText; - - string description = string.Empty; - if (node.SelectSingleNode("Description") != null) - description = node.SelectSingleNode("Description").InnerText; - - if (!string.IsNullOrEmpty(downloadLink)) - { - string name = englishName; - - comboBoxDictionaries.Items.Add(name); - _dictionaryDownloadLinks.Add(downloadLink); - _descriptions.Add(description); - } - comboBoxDictionaries.SelectedIndex = 0; + comboBoxDictionaries.Items.Add(d); } } + comboBoxDictionaries.SelectedIndex = 0; + comboBoxDictionaries.EndUpdate(); comboBoxDictionaries.AutoCompleteSource = AutoCompleteSource.ListItems; comboBoxDictionaries.AutoCompleteMode = AutoCompleteMode.Append; } @@ -105,7 +69,7 @@ namespace Nikse.SubtitleEdit.Forms Cursor = Cursors.WaitCursor; int index = comboBoxDictionaries.SelectedIndex; - string url = _dictionaryDownloadLinks[index]; + string url = _dictionaries[index].Url; ChosenLanguage = comboBoxDictionaries.Items[index].ToString(); var wc = new WebClient { Proxy = Utilities.GetProxy() }; diff --git a/src/Forms/Ocr/VobSubOcr.Designer.cs b/src/Forms/Ocr/VobSubOcr.Designer.cs index 5f05bc890..adcd44f41 100644 --- a/src/Forms/Ocr/VobSubOcr.Designer.cs +++ b/src/Forms/Ocr/VobSubOcr.Designer.cs @@ -39,6 +39,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.nOcrTrainingToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.toolStripSeparator4 = new System.Windows.Forms.ToolStripSeparator(); this.toolStripMenuItemSetUnItalicFactor = new System.Windows.Forms.ToolStripMenuItem(); + this.setForecolorThresholdToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.toolStripSeparator3 = new System.Windows.Forms.ToolStripSeparator(); this.deleteToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.labelSubtitleText = new System.Windows.Forms.Label(); @@ -48,23 +49,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.buttonCancel = new System.Windows.Forms.Button(); this.groupBoxOcrMethod = new System.Windows.Forms.GroupBox(); this.comboBoxOcrMethod = new System.Windows.Forms.ComboBox(); - this.groupBoxImageCompareMethod = new System.Windows.Forms.GroupBox(); - this.labelMinLineSplitHeight = new System.Windows.Forms.Label(); - this.comboBoxLineSplitMinLineHeight = new System.Windows.Forms.ComboBox(); - this.labelMaxErrorPercent = new System.Windows.Forms.Label(); - this.numericUpDownMaxErrorPct = new System.Windows.Forms.NumericUpDown(); - this.checkBoxRightToLeft = new System.Windows.Forms.CheckBox(); - this.numericUpDownPixelsIsSpace = new System.Windows.Forms.NumericUpDown(); - this.buttonEditCharacterDatabase = new System.Windows.Forms.Button(); - this.labelNoOfPixelsIsSpace = new System.Windows.Forms.Label(); - this.comboBoxCharacterDatabase = new System.Windows.Forms.ComboBox(); - this.labelImageDatabase = new System.Windows.Forms.Label(); - this.buttonNewCharacterDatabase = new System.Windows.Forms.Button(); this.GroupBoxTesseractMethod = new System.Windows.Forms.GroupBox(); this.buttonGetTesseractDictionaries = new System.Windows.Forms.Button(); this.checkBoxTesseractMusicOn = new System.Windows.Forms.CheckBox(); this.checkBoxTesseractItalicsOn = new System.Windows.Forms.CheckBox(); - this.checkBoxUseModiInTesseractForUnknownWords = new System.Windows.Forms.CheckBox(); this.labelTesseractLanguage = new System.Windows.Forms.Label(); this.comboBoxTesseractLanguages = new System.Windows.Forms.ComboBox(); this.groupBoxModiMethod = new System.Windows.Forms.GroupBox(); @@ -80,6 +68,18 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.checkBoxRightToLeftNOCR = new System.Windows.Forms.CheckBox(); this.numericUpDownNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.NumericUpDown(); this.labelNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.Label(); + this.groupBoxImageCompareMethod = new System.Windows.Forms.GroupBox(); + this.labelMinLineSplitHeight = new System.Windows.Forms.Label(); + this.comboBoxLineSplitMinLineHeight = new System.Windows.Forms.ComboBox(); + this.labelMaxErrorPercent = new System.Windows.Forms.Label(); + this.numericUpDownMaxErrorPct = new System.Windows.Forms.NumericUpDown(); + this.checkBoxRightToLeft = new System.Windows.Forms.CheckBox(); + this.numericUpDownPixelsIsSpace = new System.Windows.Forms.NumericUpDown(); + this.buttonEditCharacterDatabase = new System.Windows.Forms.Button(); + this.labelNoOfPixelsIsSpace = new System.Windows.Forms.Label(); + this.comboBoxCharacterDatabase = new System.Windows.Forms.ComboBox(); + this.labelImageDatabase = new System.Windows.Forms.Label(); + this.buttonNewCharacterDatabase = new System.Windows.Forms.Button(); this.groupBoxOCRControls = new System.Windows.Forms.GroupBox(); this.labelStartFrom = new System.Windows.Forms.Label(); this.numericUpDownStartNumber = new System.Windows.Forms.NumericUpDown(); @@ -141,16 +141,17 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.textBoxCurrentText = new Nikse.SubtitleEdit.Controls.SETextBox(); this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView(); this.timerHideStatus = new System.Windows.Forms.Timer(this.components); - this.setForecolorThresholdToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.comboBoxTesseractEngineMode = new System.Windows.Forms.ComboBox(); + this.labelTesseractEngineMode = new System.Windows.Forms.Label(); this.contextMenuStripListview.SuspendLayout(); this.groupBoxOcrMethod.SuspendLayout(); - this.groupBoxImageCompareMethod.SuspendLayout(); - ((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).BeginInit(); - ((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).BeginInit(); this.GroupBoxTesseractMethod.SuspendLayout(); this.groupBoxModiMethod.SuspendLayout(); this.groupBoxNOCR.SuspendLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).BeginInit(); + this.groupBoxImageCompareMethod.SuspendLayout(); + ((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).BeginInit(); + ((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).BeginInit(); this.groupBoxOCRControls.SuspendLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).BeginInit(); this.groupBoxOcrAutoFix.SuspendLayout(); @@ -200,7 +201,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.toolStripSeparator3, this.deleteToolStripMenuItem}); this.contextMenuStripListview.Name = "contextMenuStripListview"; - this.contextMenuStripListview.Size = new System.Drawing.Size(306, 364); + this.contextMenuStripListview.Size = new System.Drawing.Size(306, 342); this.contextMenuStripListview.Opening += new System.ComponentModel.CancelEventHandler(this.ContextMenuStripListviewOpening); // // normalToolStripMenuItem @@ -339,6 +340,13 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.toolStripMenuItemSetUnItalicFactor.Text = "Set un-italic factor..."; this.toolStripMenuItemSetUnItalicFactor.Click += new System.EventHandler(this.toolStripMenuItemSetUnItalicFactor_Click); // + // setForecolorThresholdToolStripMenuItem + // + this.setForecolorThresholdToolStripMenuItem.Name = "setForecolorThresholdToolStripMenuItem"; + this.setForecolorThresholdToolStripMenuItem.Size = new System.Drawing.Size(305, 22); + this.setForecolorThresholdToolStripMenuItem.Text = "Set forecolor threshold..."; + this.setForecolorThresholdToolStripMenuItem.Click += new System.EventHandler(this.setForecolorThresholdToolStripMenuItem_Click); + // // toolStripSeparator3 // this.toolStripSeparator3.Name = "toolStripSeparator3"; @@ -406,10 +414,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr // groupBoxOcrMethod // this.groupBoxOcrMethod.Controls.Add(this.comboBoxOcrMethod); - this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod); this.groupBoxOcrMethod.Controls.Add(this.GroupBoxTesseractMethod); this.groupBoxOcrMethod.Controls.Add(this.groupBoxModiMethod); this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR); + this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod); this.groupBoxOcrMethod.Location = new System.Drawing.Point(13, 5); this.groupBoxOcrMethod.Name = "groupBoxOcrMethod"; this.groupBoxOcrMethod.Size = new System.Drawing.Size(392, 192); @@ -432,6 +440,223 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.comboBoxOcrMethod.TabIndex = 0; this.comboBoxOcrMethod.SelectedIndexChanged += new System.EventHandler(this.ComboBoxOcrMethodSelectedIndexChanged); // + // GroupBoxTesseractMethod + // + this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractEngineMode); + this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractEngineMode); + this.GroupBoxTesseractMethod.Controls.Add(this.buttonGetTesseractDictionaries); + this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractMusicOn); + this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractItalicsOn); + this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractLanguage); + this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractLanguages); + this.GroupBoxTesseractMethod.Location = new System.Drawing.Point(13, 31); + this.GroupBoxTesseractMethod.Name = "GroupBoxTesseractMethod"; + this.GroupBoxTesseractMethod.Size = new System.Drawing.Size(366, 131); + this.GroupBoxTesseractMethod.TabIndex = 1; + this.GroupBoxTesseractMethod.TabStop = false; + this.GroupBoxTesseractMethod.Text = "Tesseract"; + // + // buttonGetTesseractDictionaries + // + this.buttonGetTesseractDictionaries.Location = new System.Drawing.Point(300, 28); + this.buttonGetTesseractDictionaries.Name = "buttonGetTesseractDictionaries"; + this.buttonGetTesseractDictionaries.Size = new System.Drawing.Size(29, 23); + this.buttonGetTesseractDictionaries.TabIndex = 2; + this.buttonGetTesseractDictionaries.Text = "..."; + this.buttonGetTesseractDictionaries.UseVisualStyleBackColor = true; + this.buttonGetTesseractDictionaries.Click += new System.EventHandler(this.buttonGetTesseractDictionaries_Click); + // + // checkBoxTesseractMusicOn + // + this.checkBoxTesseractMusicOn.AutoSize = true; + this.checkBoxTesseractMusicOn.Checked = true; + this.checkBoxTesseractMusicOn.CheckState = System.Windows.Forms.CheckState.Checked; + this.checkBoxTesseractMusicOn.Location = new System.Drawing.Point(99, 69); + this.checkBoxTesseractMusicOn.Name = "checkBoxTesseractMusicOn"; + this.checkBoxTesseractMusicOn.Size = new System.Drawing.Size(93, 17); + this.checkBoxTesseractMusicOn.TabIndex = 4; + this.checkBoxTesseractMusicOn.Text = "Music symbols"; + this.checkBoxTesseractMusicOn.UseVisualStyleBackColor = true; + // + // checkBoxTesseractItalicsOn + // + this.checkBoxTesseractItalicsOn.AutoSize = true; + this.checkBoxTesseractItalicsOn.Checked = true; + this.checkBoxTesseractItalicsOn.CheckState = System.Windows.Forms.CheckState.Checked; + this.checkBoxTesseractItalicsOn.Location = new System.Drawing.Point(22, 69); + this.checkBoxTesseractItalicsOn.Name = "checkBoxTesseractItalicsOn"; + this.checkBoxTesseractItalicsOn.Size = new System.Drawing.Size(54, 17); + this.checkBoxTesseractItalicsOn.TabIndex = 3; + this.checkBoxTesseractItalicsOn.Text = "Italics"; + this.checkBoxTesseractItalicsOn.UseVisualStyleBackColor = true; + // + // labelTesseractLanguage + // + this.labelTesseractLanguage.AutoSize = true; + this.labelTesseractLanguage.Location = new System.Drawing.Point(18, 32); + this.labelTesseractLanguage.Name = "labelTesseractLanguage"; + this.labelTesseractLanguage.Size = new System.Drawing.Size(54, 13); + this.labelTesseractLanguage.TabIndex = 0; + this.labelTesseractLanguage.Text = "Language"; + // + // comboBoxTesseractLanguages + // + this.comboBoxTesseractLanguages.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboBoxTesseractLanguages.FormattingEnabled = true; + this.comboBoxTesseractLanguages.Location = new System.Drawing.Point(99, 29); + this.comboBoxTesseractLanguages.Name = "comboBoxTesseractLanguages"; + this.comboBoxTesseractLanguages.Size = new System.Drawing.Size(195, 21); + this.comboBoxTesseractLanguages.TabIndex = 1; + this.comboBoxTesseractLanguages.SelectedIndexChanged += new System.EventHandler(this.ComboBoxTesseractLanguagesSelectedIndexChanged); + // + // groupBoxModiMethod + // + this.groupBoxModiMethod.Controls.Add(this.label1); + this.groupBoxModiMethod.Controls.Add(this.comboBoxModiLanguage); + this.groupBoxModiMethod.Location = new System.Drawing.Point(7, 50); + this.groupBoxModiMethod.Name = "groupBoxModiMethod"; + this.groupBoxModiMethod.Size = new System.Drawing.Size(366, 131); + this.groupBoxModiMethod.TabIndex = 3; + this.groupBoxModiMethod.TabStop = false; + this.groupBoxModiMethod.Text = "MODI"; + // + // label1 + // + this.label1.AutoSize = true; + this.label1.Location = new System.Drawing.Point(11, 58); + this.label1.Name = "label1"; + this.label1.Size = new System.Drawing.Size(54, 13); + this.label1.TabIndex = 33; + this.label1.Text = "Language"; + // + // comboBoxModiLanguage + // + this.comboBoxModiLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboBoxModiLanguage.FormattingEnabled = true; + this.comboBoxModiLanguage.Location = new System.Drawing.Point(108, 55); + this.comboBoxModiLanguage.Name = "comboBoxModiLanguage"; + this.comboBoxModiLanguage.Size = new System.Drawing.Size(195, 21); + this.comboBoxModiLanguage.TabIndex = 0; + this.comboBoxModiLanguage.SelectedIndexChanged += new System.EventHandler(this.ComboBoxModiLanguageSelectedIndexChanged); + // + // groupBoxNOCR + // + this.groupBoxNOCR.Controls.Add(this.buttonLineOcrEditLanguage); + this.groupBoxNOCR.Controls.Add(this.buttonLineOcrNewLanguage); + this.groupBoxNOCR.Controls.Add(this.label2); + this.groupBoxNOCR.Controls.Add(this.comboBoxNOcrLanguage); + this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrItalic); + this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrCorrect); + this.groupBoxNOCR.Controls.Add(this.checkBoxRightToLeftNOCR); + this.groupBoxNOCR.Controls.Add(this.numericUpDownNumberOfPixelsIsSpaceNOCR); + this.groupBoxNOCR.Controls.Add(this.labelNumberOfPixelsIsSpaceNOCR); + this.groupBoxNOCR.Location = new System.Drawing.Point(7, 38); + this.groupBoxNOCR.Name = "groupBoxNOCR"; + this.groupBoxNOCR.Size = new System.Drawing.Size(366, 131); + this.groupBoxNOCR.TabIndex = 7; + this.groupBoxNOCR.TabStop = false; + this.groupBoxNOCR.Text = "nOCR"; + // + // buttonLineOcrEditLanguage + // + this.buttonLineOcrEditLanguage.Location = new System.Drawing.Point(210, 97); + this.buttonLineOcrEditLanguage.Name = "buttonLineOcrEditLanguage"; + this.buttonLineOcrEditLanguage.Size = new System.Drawing.Size(68, 21); + this.buttonLineOcrEditLanguage.TabIndex = 41; + this.buttonLineOcrEditLanguage.Text = "Edit"; + this.buttonLineOcrEditLanguage.UseVisualStyleBackColor = true; + this.buttonLineOcrEditLanguage.Click += new System.EventHandler(this.buttonLineOcrEditLanguage_Click); + // + // buttonLineOcrNewLanguage + // + this.buttonLineOcrNewLanguage.Location = new System.Drawing.Point(283, 97); + this.buttonLineOcrNewLanguage.Name = "buttonLineOcrNewLanguage"; + this.buttonLineOcrNewLanguage.Size = new System.Drawing.Size(68, 21); + this.buttonLineOcrNewLanguage.TabIndex = 40; + this.buttonLineOcrNewLanguage.Text = "New"; + this.buttonLineOcrNewLanguage.UseVisualStyleBackColor = true; + this.buttonLineOcrNewLanguage.Click += new System.EventHandler(this.buttonLineOcrNewLanguage_Click); + // + // label2 + // + this.label2.AutoSize = true; + this.label2.Location = new System.Drawing.Point(11, 101); + this.label2.Name = "label2"; + this.label2.Size = new System.Drawing.Size(54, 13); + this.label2.TabIndex = 35; + this.label2.Text = "Language"; + // + // comboBoxNOcrLanguage + // + this.comboBoxNOcrLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboBoxNOcrLanguage.FormattingEnabled = true; + this.comboBoxNOcrLanguage.Location = new System.Drawing.Point(74, 97); + this.comboBoxNOcrLanguage.Name = "comboBoxNOcrLanguage"; + this.comboBoxNOcrLanguage.Size = new System.Drawing.Size(130, 21); + this.comboBoxNOcrLanguage.TabIndex = 34; + this.comboBoxNOcrLanguage.SelectedIndexChanged += new System.EventHandler(this.comboBoxNOcrLanguage_SelectedIndexChanged); + // + // checkBoxNOcrItalic + // + this.checkBoxNOcrItalic.AutoSize = true; + this.checkBoxNOcrItalic.Location = new System.Drawing.Point(15, 42); + this.checkBoxNOcrItalic.Name = "checkBoxNOcrItalic"; + this.checkBoxNOcrItalic.Size = new System.Drawing.Size(92, 17); + this.checkBoxNOcrItalic.TabIndex = 8; + this.checkBoxNOcrItalic.Text = "Contains italic"; + this.checkBoxNOcrItalic.UseVisualStyleBackColor = true; + // + // checkBoxNOcrCorrect + // + this.checkBoxNOcrCorrect.AutoSize = true; + this.checkBoxNOcrCorrect.Location = new System.Drawing.Point(235, 17); + this.checkBoxNOcrCorrect.Name = "checkBoxNOcrCorrect"; + this.checkBoxNOcrCorrect.Size = new System.Drawing.Size(116, 17); + this.checkBoxNOcrCorrect.TabIndex = 7; + this.checkBoxNOcrCorrect.Text = "Draw missing texts"; + this.checkBoxNOcrCorrect.UseVisualStyleBackColor = true; + // + // checkBoxRightToLeftNOCR + // + this.checkBoxRightToLeftNOCR.AutoSize = true; + this.checkBoxRightToLeftNOCR.Location = new System.Drawing.Point(235, 40); + this.checkBoxRightToLeftNOCR.Name = "checkBoxRightToLeftNOCR"; + this.checkBoxRightToLeftNOCR.Size = new System.Drawing.Size(83, 17); + this.checkBoxRightToLeftNOCR.TabIndex = 6; + this.checkBoxRightToLeftNOCR.Text = "Right to left"; + this.checkBoxRightToLeftNOCR.UseVisualStyleBackColor = true; + // + // numericUpDownNumberOfPixelsIsSpaceNOCR + // + this.numericUpDownNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(122, 17); + this.numericUpDownNumberOfPixelsIsSpaceNOCR.Maximum = new decimal(new int[] { + 50, + 0, + 0, + 0}); + this.numericUpDownNumberOfPixelsIsSpaceNOCR.Minimum = new decimal(new int[] { + 1, + 0, + 0, + 0}); + this.numericUpDownNumberOfPixelsIsSpaceNOCR.Name = "numericUpDownNumberOfPixelsIsSpaceNOCR"; + this.numericUpDownNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(50, 21); + this.numericUpDownNumberOfPixelsIsSpaceNOCR.TabIndex = 5; + this.numericUpDownNumberOfPixelsIsSpaceNOCR.Value = new decimal(new int[] { + 12, + 0, + 0, + 0}); + // + // labelNumberOfPixelsIsSpaceNOCR + // + this.labelNumberOfPixelsIsSpaceNOCR.AutoSize = true; + this.labelNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(12, 20); + this.labelNumberOfPixelsIsSpaceNOCR.Name = "labelNumberOfPixelsIsSpaceNOCR"; + this.labelNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(104, 13); + this.labelNumberOfPixelsIsSpaceNOCR.TabIndex = 4; + this.labelNumberOfPixelsIsSpaceNOCR.Text = "No of pixels is space"; + // // groupBoxImageCompareMethod // this.groupBoxImageCompareMethod.Controls.Add(this.labelMinLineSplitHeight); @@ -660,235 +885,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.buttonNewCharacterDatabase.UseVisualStyleBackColor = true; this.buttonNewCharacterDatabase.Click += new System.EventHandler(this.ButtonNewCharacterDatabaseClick); // - // GroupBoxTesseractMethod - // - this.GroupBoxTesseractMethod.Controls.Add(this.buttonGetTesseractDictionaries); - this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractMusicOn); - this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractItalicsOn); - this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxUseModiInTesseractForUnknownWords); - this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractLanguage); - this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractLanguages); - this.GroupBoxTesseractMethod.Location = new System.Drawing.Point(13, 31); - this.GroupBoxTesseractMethod.Name = "GroupBoxTesseractMethod"; - this.GroupBoxTesseractMethod.Size = new System.Drawing.Size(366, 131); - this.GroupBoxTesseractMethod.TabIndex = 1; - this.GroupBoxTesseractMethod.TabStop = false; - this.GroupBoxTesseractMethod.Text = "Tesseract"; - // - // buttonGetTesseractDictionaries - // - this.buttonGetTesseractDictionaries.Location = new System.Drawing.Point(300, 30); - this.buttonGetTesseractDictionaries.Name = "buttonGetTesseractDictionaries"; - this.buttonGetTesseractDictionaries.Size = new System.Drawing.Size(29, 23); - this.buttonGetTesseractDictionaries.TabIndex = 2; - this.buttonGetTesseractDictionaries.Text = "..."; - this.buttonGetTesseractDictionaries.UseVisualStyleBackColor = true; - this.buttonGetTesseractDictionaries.Click += new System.EventHandler(this.buttonGetTesseractDictionaries_Click); - // - // checkBoxTesseractMusicOn - // - this.checkBoxTesseractMusicOn.AutoSize = true; - this.checkBoxTesseractMusicOn.Checked = true; - this.checkBoxTesseractMusicOn.CheckState = System.Windows.Forms.CheckState.Checked; - this.checkBoxTesseractMusicOn.Location = new System.Drawing.Point(99, 101); - this.checkBoxTesseractMusicOn.Name = "checkBoxTesseractMusicOn"; - this.checkBoxTesseractMusicOn.Size = new System.Drawing.Size(93, 17); - this.checkBoxTesseractMusicOn.TabIndex = 4; - this.checkBoxTesseractMusicOn.Text = "Music symbols"; - this.checkBoxTesseractMusicOn.UseVisualStyleBackColor = true; - // - // checkBoxTesseractItalicsOn - // - this.checkBoxTesseractItalicsOn.AutoSize = true; - this.checkBoxTesseractItalicsOn.Checked = true; - this.checkBoxTesseractItalicsOn.CheckState = System.Windows.Forms.CheckState.Checked; - this.checkBoxTesseractItalicsOn.Location = new System.Drawing.Point(22, 101); - this.checkBoxTesseractItalicsOn.Name = "checkBoxTesseractItalicsOn"; - this.checkBoxTesseractItalicsOn.Size = new System.Drawing.Size(54, 17); - this.checkBoxTesseractItalicsOn.TabIndex = 3; - this.checkBoxTesseractItalicsOn.Text = "Italics"; - this.checkBoxTesseractItalicsOn.UseVisualStyleBackColor = true; - // - // checkBoxUseModiInTesseractForUnknownWords - // - this.checkBoxUseModiInTesseractForUnknownWords.AutoSize = true; - this.checkBoxUseModiInTesseractForUnknownWords.Checked = true; - this.checkBoxUseModiInTesseractForUnknownWords.CheckState = System.Windows.Forms.CheckState.Checked; - this.checkBoxUseModiInTesseractForUnknownWords.Enabled = false; - this.checkBoxUseModiInTesseractForUnknownWords.Location = new System.Drawing.Point(22, 74); - this.checkBoxUseModiInTesseractForUnknownWords.Name = "checkBoxUseModiInTesseractForUnknownWords"; - this.checkBoxUseModiInTesseractForUnknownWords.Size = new System.Drawing.Size(167, 17); - this.checkBoxUseModiInTesseractForUnknownWords.TabIndex = 2; - this.checkBoxUseModiInTesseractForUnknownWords.Text = "Try MODI for unknown words"; - this.checkBoxUseModiInTesseractForUnknownWords.UseVisualStyleBackColor = true; - // - // labelTesseractLanguage - // - this.labelTesseractLanguage.AutoSize = true; - this.labelTesseractLanguage.Location = new System.Drawing.Point(18, 34); - this.labelTesseractLanguage.Name = "labelTesseractLanguage"; - this.labelTesseractLanguage.Size = new System.Drawing.Size(54, 13); - this.labelTesseractLanguage.TabIndex = 0; - this.labelTesseractLanguage.Text = "Language"; - // - // comboBoxTesseractLanguages - // - this.comboBoxTesseractLanguages.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; - this.comboBoxTesseractLanguages.FormattingEnabled = true; - this.comboBoxTesseractLanguages.Location = new System.Drawing.Point(99, 31); - this.comboBoxTesseractLanguages.Name = "comboBoxTesseractLanguages"; - this.comboBoxTesseractLanguages.Size = new System.Drawing.Size(195, 21); - this.comboBoxTesseractLanguages.TabIndex = 1; - this.comboBoxTesseractLanguages.SelectedIndexChanged += new System.EventHandler(this.ComboBoxTesseractLanguagesSelectedIndexChanged); - // - // groupBoxModiMethod - // - this.groupBoxModiMethod.Controls.Add(this.label1); - this.groupBoxModiMethod.Controls.Add(this.comboBoxModiLanguage); - this.groupBoxModiMethod.Location = new System.Drawing.Point(7, 50); - this.groupBoxModiMethod.Name = "groupBoxModiMethod"; - this.groupBoxModiMethod.Size = new System.Drawing.Size(366, 131); - this.groupBoxModiMethod.TabIndex = 3; - this.groupBoxModiMethod.TabStop = false; - this.groupBoxModiMethod.Text = "MODI"; - // - // label1 - // - this.label1.AutoSize = true; - this.label1.Location = new System.Drawing.Point(11, 58); - this.label1.Name = "label1"; - this.label1.Size = new System.Drawing.Size(54, 13); - this.label1.TabIndex = 33; - this.label1.Text = "Language"; - // - // comboBoxModiLanguage - // - this.comboBoxModiLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; - this.comboBoxModiLanguage.FormattingEnabled = true; - this.comboBoxModiLanguage.Location = new System.Drawing.Point(108, 55); - this.comboBoxModiLanguage.Name = "comboBoxModiLanguage"; - this.comboBoxModiLanguage.Size = new System.Drawing.Size(195, 21); - this.comboBoxModiLanguage.TabIndex = 0; - this.comboBoxModiLanguage.SelectedIndexChanged += new System.EventHandler(this.ComboBoxModiLanguageSelectedIndexChanged); - // - // groupBoxNOCR - // - this.groupBoxNOCR.Controls.Add(this.buttonLineOcrEditLanguage); - this.groupBoxNOCR.Controls.Add(this.buttonLineOcrNewLanguage); - this.groupBoxNOCR.Controls.Add(this.label2); - this.groupBoxNOCR.Controls.Add(this.comboBoxNOcrLanguage); - this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrItalic); - this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrCorrect); - this.groupBoxNOCR.Controls.Add(this.checkBoxRightToLeftNOCR); - this.groupBoxNOCR.Controls.Add(this.numericUpDownNumberOfPixelsIsSpaceNOCR); - this.groupBoxNOCR.Controls.Add(this.labelNumberOfPixelsIsSpaceNOCR); - this.groupBoxNOCR.Location = new System.Drawing.Point(7, 38); - this.groupBoxNOCR.Name = "groupBoxNOCR"; - this.groupBoxNOCR.Size = new System.Drawing.Size(366, 131); - this.groupBoxNOCR.TabIndex = 7; - this.groupBoxNOCR.TabStop = false; - this.groupBoxNOCR.Text = "nOCR"; - // - // buttonLineOcrEditLanguage - // - this.buttonLineOcrEditLanguage.Location = new System.Drawing.Point(210, 97); - this.buttonLineOcrEditLanguage.Name = "buttonLineOcrEditLanguage"; - this.buttonLineOcrEditLanguage.Size = new System.Drawing.Size(68, 21); - this.buttonLineOcrEditLanguage.TabIndex = 41; - this.buttonLineOcrEditLanguage.Text = "Edit"; - this.buttonLineOcrEditLanguage.UseVisualStyleBackColor = true; - this.buttonLineOcrEditLanguage.Click += new System.EventHandler(this.buttonLineOcrEditLanguage_Click); - // - // buttonLineOcrNewLanguage - // - this.buttonLineOcrNewLanguage.Location = new System.Drawing.Point(283, 97); - this.buttonLineOcrNewLanguage.Name = "buttonLineOcrNewLanguage"; - this.buttonLineOcrNewLanguage.Size = new System.Drawing.Size(68, 21); - this.buttonLineOcrNewLanguage.TabIndex = 40; - this.buttonLineOcrNewLanguage.Text = "New"; - this.buttonLineOcrNewLanguage.UseVisualStyleBackColor = true; - this.buttonLineOcrNewLanguage.Click += new System.EventHandler(this.buttonLineOcrNewLanguage_Click); - // - // label2 - // - this.label2.AutoSize = true; - this.label2.Location = new System.Drawing.Point(11, 101); - this.label2.Name = "label2"; - this.label2.Size = new System.Drawing.Size(54, 13); - this.label2.TabIndex = 35; - this.label2.Text = "Language"; - // - // comboBoxNOcrLanguage - // - this.comboBoxNOcrLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; - this.comboBoxNOcrLanguage.FormattingEnabled = true; - this.comboBoxNOcrLanguage.Location = new System.Drawing.Point(74, 97); - this.comboBoxNOcrLanguage.Name = "comboBoxNOcrLanguage"; - this.comboBoxNOcrLanguage.Size = new System.Drawing.Size(130, 21); - this.comboBoxNOcrLanguage.TabIndex = 34; - this.comboBoxNOcrLanguage.SelectedIndexChanged += new System.EventHandler(this.comboBoxNOcrLanguage_SelectedIndexChanged); - // - // checkBoxNOcrItalic - // - this.checkBoxNOcrItalic.AutoSize = true; - this.checkBoxNOcrItalic.Location = new System.Drawing.Point(15, 42); - this.checkBoxNOcrItalic.Name = "checkBoxNOcrItalic"; - this.checkBoxNOcrItalic.Size = new System.Drawing.Size(92, 17); - this.checkBoxNOcrItalic.TabIndex = 8; - this.checkBoxNOcrItalic.Text = "Contains italic"; - this.checkBoxNOcrItalic.UseVisualStyleBackColor = true; - // - // checkBoxNOcrCorrect - // - this.checkBoxNOcrCorrect.AutoSize = true; - this.checkBoxNOcrCorrect.Location = new System.Drawing.Point(235, 17); - this.checkBoxNOcrCorrect.Name = "checkBoxNOcrCorrect"; - this.checkBoxNOcrCorrect.Size = new System.Drawing.Size(116, 17); - this.checkBoxNOcrCorrect.TabIndex = 7; - this.checkBoxNOcrCorrect.Text = "Draw missing texts"; - this.checkBoxNOcrCorrect.UseVisualStyleBackColor = true; - // - // checkBoxRightToLeftNOCR - // - this.checkBoxRightToLeftNOCR.AutoSize = true; - this.checkBoxRightToLeftNOCR.Location = new System.Drawing.Point(235, 40); - this.checkBoxRightToLeftNOCR.Name = "checkBoxRightToLeftNOCR"; - this.checkBoxRightToLeftNOCR.Size = new System.Drawing.Size(83, 17); - this.checkBoxRightToLeftNOCR.TabIndex = 6; - this.checkBoxRightToLeftNOCR.Text = "Right to left"; - this.checkBoxRightToLeftNOCR.UseVisualStyleBackColor = true; - // - // numericUpDownNumberOfPixelsIsSpaceNOCR - // - this.numericUpDownNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(122, 17); - this.numericUpDownNumberOfPixelsIsSpaceNOCR.Maximum = new decimal(new int[] { - 50, - 0, - 0, - 0}); - this.numericUpDownNumberOfPixelsIsSpaceNOCR.Minimum = new decimal(new int[] { - 1, - 0, - 0, - 0}); - this.numericUpDownNumberOfPixelsIsSpaceNOCR.Name = "numericUpDownNumberOfPixelsIsSpaceNOCR"; - this.numericUpDownNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(50, 21); - this.numericUpDownNumberOfPixelsIsSpaceNOCR.TabIndex = 5; - this.numericUpDownNumberOfPixelsIsSpaceNOCR.Value = new decimal(new int[] { - 12, - 0, - 0, - 0}); - // - // labelNumberOfPixelsIsSpaceNOCR - // - this.labelNumberOfPixelsIsSpaceNOCR.AutoSize = true; - this.labelNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(12, 20); - this.labelNumberOfPixelsIsSpaceNOCR.Name = "labelNumberOfPixelsIsSpaceNOCR"; - this.labelNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(104, 13); - this.labelNumberOfPixelsIsSpaceNOCR.TabIndex = 4; - this.labelNumberOfPixelsIsSpaceNOCR.Text = "No of pixels is space"; - // // groupBoxOCRControls // this.groupBoxOCRControls.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); @@ -1595,12 +1591,28 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.timerHideStatus.Interval = 2000; this.timerHideStatus.Tick += new System.EventHandler(this.timerHideStatus_Tick); // - // setForecolorThresholdToolStripMenuItem + // comboBoxTesseractEngineMode // - this.setForecolorThresholdToolStripMenuItem.Name = "setForecolorThresholdToolStripMenuItem"; - this.setForecolorThresholdToolStripMenuItem.Size = new System.Drawing.Size(305, 22); - this.setForecolorThresholdToolStripMenuItem.Text = "Set forecolor threshold..."; - this.setForecolorThresholdToolStripMenuItem.Click += new System.EventHandler(this.setForecolorThresholdToolStripMenuItem_Click); + this.comboBoxTesseractEngineMode.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboBoxTesseractEngineMode.FormattingEnabled = true; + this.comboBoxTesseractEngineMode.Items.AddRange(new object[] { + "Original Tesseract only (can detect italic)", + "Neural nets LSTM only ", + "Tesseract + LSTM", + "Default, based on what is available"}); + this.comboBoxTesseractEngineMode.Location = new System.Drawing.Point(98, 100); + this.comboBoxTesseractEngineMode.Name = "comboBoxTesseractEngineMode"; + this.comboBoxTesseractEngineMode.Size = new System.Drawing.Size(195, 21); + this.comboBoxTesseractEngineMode.TabIndex = 5; + // + // labelTesseractEngineMode + // + this.labelTesseractEngineMode.AutoSize = true; + this.labelTesseractEngineMode.Location = new System.Drawing.Point(18, 103); + this.labelTesseractEngineMode.Name = "labelTesseractEngineMode"; + this.labelTesseractEngineMode.Size = new System.Drawing.Size(68, 13); + this.labelTesseractEngineMode.TabIndex = 6; + this.labelTesseractEngineMode.Text = "Engine mode"; // // VobSubOcr // @@ -1629,10 +1641,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.Resize += new System.EventHandler(this.VobSubOcr_Resize); this.contextMenuStripListview.ResumeLayout(false); this.groupBoxOcrMethod.ResumeLayout(false); - this.groupBoxImageCompareMethod.ResumeLayout(false); - this.groupBoxImageCompareMethod.PerformLayout(); - ((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).EndInit(); - ((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).EndInit(); this.GroupBoxTesseractMethod.ResumeLayout(false); this.GroupBoxTesseractMethod.PerformLayout(); this.groupBoxModiMethod.ResumeLayout(false); @@ -1640,6 +1648,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.groupBoxNOCR.ResumeLayout(false); this.groupBoxNOCR.PerformLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).EndInit(); + this.groupBoxImageCompareMethod.ResumeLayout(false); + this.groupBoxImageCompareMethod.PerformLayout(); + ((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).EndInit(); + ((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).EndInit(); this.groupBoxOCRControls.ResumeLayout(false); this.groupBoxOCRControls.PerformLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).EndInit(); @@ -1710,7 +1722,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private System.Windows.Forms.Label label1; private System.Windows.Forms.GroupBox groupBoxModiMethod; private System.Windows.Forms.GroupBox GroupBoxTesseractMethod; - private System.Windows.Forms.CheckBox checkBoxUseModiInTesseractForUnknownWords; private System.Windows.Forms.Label labelTesseractLanguage; private System.Windows.Forms.ComboBox comboBoxTesseractLanguages; private System.Windows.Forms.ContextMenuStrip contextMenuStripListview; @@ -1801,5 +1812,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private System.Windows.Forms.Label labelMinLineSplitHeight; private System.Windows.Forms.ComboBox comboBoxLineSplitMinLineHeight; private System.Windows.Forms.ToolStripMenuItem setForecolorThresholdToolStripMenuItem; + private System.Windows.Forms.Label labelTesseractEngineMode; + private System.Windows.Forms.ComboBox comboBoxTesseractEngineMode; } } \ No newline at end of file diff --git a/src/Forms/Ocr/VobSubOcr.cs b/src/Forms/Ocr/VobSubOcr.cs index 6c379a2e3..43609c95d 100644 --- a/src/Forms/Ocr/VobSubOcr.cs +++ b/src/Forms/Ocr/VobSubOcr.cs @@ -302,6 +302,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private string[] _tesseractAsyncStrings; private int _tesseractAsyncIndex; private BackgroundWorker _tesseractThread; + private int _tesseractEngineMode; private readonly DateTime _windowStartTime = DateTime.Now; private int _linesOcred; @@ -426,9 +427,15 @@ namespace Nikse.SubtitleEdit.Forms.Ocr _ocrMethodImageCompare = 4; } - checkBoxUseModiInTesseractForUnknownWords.Text = language.TryModiForUnknownWords; checkBoxTesseractItalicsOn.Checked = Configuration.Settings.VobSubOcr.UseItalicsInTesseract; checkBoxTesseractItalicsOn.Text = Configuration.Settings.Language.General.Italic; + if (Configuration.Settings.VobSubOcr.TesseractEngineMode >= 0 && + Configuration.Settings.VobSubOcr.TesseractEngineMode < comboBoxTesseractEngineMode.Items.Count) + { + comboBoxTesseractEngineMode.SelectedIndex = Configuration.Settings.VobSubOcr.TesseractEngineMode; + } + comboBoxTesseractEngineMode.Left = labelTesseractEngineMode.Left + labelTesseractEngineMode.Width + 5; + comboBoxTesseractEngineMode.Width = GroupBoxTesseractMethod.Width - comboBoxTesseractEngineMode.Left - 10; checkBoxTesseractMusicOn.Checked = Configuration.Settings.VobSubOcr.UseMusicSymbolsInTesseract; checkBoxTesseractMusicOn.Text = Configuration.Settings.Language.Settings.MusicSymbol; @@ -5314,7 +5321,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void FormVobSubOcr_Shown(object sender, EventArgs e) { - checkBoxUseModiInTesseractForUnknownWords.Checked = Configuration.Settings.VobSubOcr.UseModiInTesseractForUnknownWords; if (_mp4List != null) { checkBoxShowOnlyForced.Visible = false; @@ -5841,6 +5847,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void ButtonStartOcrClick(object sender, EventArgs e) { + _tesseractEngineMode = comboBoxTesseractEngineMode.SelectedIndex; _isLatinDb = comboBoxCharacterDatabase.SelectedItem != null && comboBoxCharacterDatabase.SelectedItem.ToString().Equals("Latin", StringComparison.Ordinal); Configuration.Settings.VobSubOcr.RightToLeft = checkBoxRightToLeft.Checked; _lastLine = null; @@ -6144,11 +6151,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr var nbmp = new NikseBitmap(bmp); nbmp.ReplaceYellowWithWhite(); // optimized replace - string tempTiffFileName = Path.GetTempPath() + Guid.NewGuid() + ".png"; + string pngFileName = Path.GetTempPath() + Guid.NewGuid() + ".png"; string tempTextFileName; using (var b = nbmp.GetBitmap()) { - b.Save(tempTiffFileName, System.Drawing.Imaging.ImageFormat.Png); + b.Save(pngFileName, System.Drawing.Imaging.ImageFormat.Png); tempTextFileName = Path.GetTempPath() + Guid.NewGuid(); } @@ -6156,10 +6163,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { process.StartInfo = new ProcessStartInfo(Configuration.TesseractDirectory + "tesseract.exe"); process.StartInfo.UseShellExecute = true; - process.StartInfo.Arguments = "\"" + tempTiffFileName + "\" \"" + tempTextFileName + "\" -l " + language; - - if (checkBoxTesseractMusicOn.Checked) - process.StartInfo.Arguments += "+music"; + process.StartInfo.Arguments = "\"" + pngFileName + "\" \"" + tempTextFileName + "\" --oem " + _tesseractEngineMode + " -l " + language; if (!string.IsNullOrEmpty(psmMode)) process.StartInfo.Arguments += " " + psmMode.Trim(); @@ -6186,11 +6190,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { if (Configuration.IsRunningOnLinux() || Configuration.IsRunningOnMac()) { - MessageBox.Show("Unable to start 'Tesseract' - make sure tesseract-ocr 3.x is installed!"); + MessageBox.Show("Unable to start 'Tesseract' - make sure tesseract-ocr 4.x is installed!"); } else { - MessageBox.Show("Unable to start 'Tesseract' (" + Configuration.TesseractDirectory + "tesseract.exe) - make sure tesseract-ocr 3.x is installed!"); + MessageBox.Show("Unable to start 'Tesseract' (" + Configuration.TesseractDirectory + "tesseract.exe) - make sure tesseract-ocr 4.x is installed!"); } throw; } @@ -6209,7 +6213,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr result = ParseHocr(result); File.Delete(outputFileName); } - File.Delete(tempTiffFileName); + File.Delete(pngFileName); } catch { @@ -6292,7 +6296,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr const int badWords = 0; string textWithOutFixes; - if (_tesseractAsyncStrings != null && !string.IsNullOrEmpty(_tesseractAsyncStrings[index])) + if (!string.IsNullOrEmpty(_tesseractAsyncStrings?[index])) { textWithOutFixes = _tesseractAsyncStrings[index]; } @@ -6819,53 +6823,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { _ocrFixEngine.AutoGuessesUsed.Clear(); _ocrFixEngine.UnknownWordsFound.Clear(); - - if (_modiEnabled && checkBoxUseModiInTesseractForUnknownWords.Checked) - { - // which is best - modi or Tesseract - we find out here - string modiText = CallModi(index); - - if (modiText.Length == 0) - modiText = CallModi(index); // retry... strange MODI - if (modiText.Length == 0) - modiText = CallModi(index); // retry... strange MODI - - if (modiText.Length > 1 && - !modiText.Contains("CD") && - (!modiText.Contains('0') || line.Contains('0')) && - (!modiText.Contains('2') || line.Contains('2')) && - (!modiText.Contains('3') || line.Contains('4')) && - (!modiText.Contains('5') || line.Contains('5')) && - (!modiText.Contains('9') || line.Contains('9')) && - (!modiText.Contains('•') || line.Contains('•')) && - (!modiText.Contains(')') || line.Contains(')')) && - Utilities.CountTagInText(modiText, '(') < 2 && Utilities.CountTagInText(modiText, ')') < 2 && - Utilities.GetNumberOfLines(modiText) < 4) - { - int modiWordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(modiText, out correctWords); - //if (modiWordsNotFound > 0) - { - string modiTextOcrFixed = modiText; - if (checkBoxAutoFixCommonErrors.Checked) - modiTextOcrFixed = _ocrFixEngine.FixOcrErrors(modiText, index, _lastLine, false, GetAutoGuessLevel()); - int modiOcrCorrectedWordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(modiTextOcrFixed, out correctWords); - if (modiOcrCorrectedWordsNotFound <= modiWordsNotFound) - modiText = modiTextOcrFixed; - } - - if (modiWordsNotFound < wordsNotFound || (textWithOutFixes.Length == 1 && modiWordsNotFound == 0)) - line = modiText; // use the modi OCR'ed text - else if (wordsNotFound == modiWordsNotFound && modiText.EndsWith('!') && (line.EndsWith('l') || line.EndsWith('fl'))) - line = modiText; - } - - // take the best option - before OCR fixing, which we do again to save suggestions and prompt for user input - line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel()); - } - else - { // fix some error manually (modi not available) - line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel()); - } + line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel()); } if (_ocrFixEngine.Abort) @@ -6875,39 +6833,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr return string.Empty; } - //check Tesseract... find an other way to do this... - //string tmp = HtmlUtil.RemoveHtmlTags(line).Trim(); - //if (!tmp.TrimEnd().EndsWith("...")) - //{ - // tmp = tmp.TrimEnd('.').TrimEnd(); - // if (tmp.Length > 2 && Utilities.LowercaseLetters.Contains(tmp[tmp.Length - 1])) - // { - // if (_nocrChars == null) - // _nocrChars = LoadNOcrForTesseract("Nikse.SubtitleEdit.Resources.nOCR_TesseractHelper.xml.zip"); - // string text = HtmlUtil.RemoveHtmlTags(NocrFastCheck(bitmap).TrimEnd()); - // string post = string.Empty; - // if (line.EndsWith("")) - // { - // post = ""; - // line = line.Remove(line.Length - 4, 4).Trim(); - // } - // if (text.EndsWith('.')) - // { - // line = line.TrimEnd('.').Trim(); - // while (text.EndsWith('.') || text.EndsWith(' ')) - // { - // line += text.Substring(text.Length - 1).Trim(); - // text = text.Remove(text.Length - 1, 1); - // } - // } - // else if (text.EndsWith('l') && text.EndsWith('!') && !text.EndsWith("l!")) - // { - // line = line.Remove(line.Length - 1, 1) + "!"; - // } - // line += post; - // } - //} - // Log used word guesses (via word replace list) foreach (string guess in _ocrFixEngine.AutoGuessesUsed) listBoxLogSuggestions.Items.Add(guess); @@ -6938,7 +6863,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr if (textWithOutFixes.Trim() != line.Trim()) { _tesseractOcrAutoFixes++; - labelFixesMade.Text = string.Format(" - {0}", _tesseractOcrAutoFixes); + labelFixesMade.Text = $" - {_tesseractOcrAutoFixes}"; LogOcrFix(index, textWithOutFixes, line); } @@ -6990,9 +6915,20 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private string TesseractResizeAndRetry(Bitmap bitmap) { - string result = Tesseract3DoOcrViaExe(ResizeBitmap(bitmap, bitmap.Width * 3, bitmap.Height * 2), _languageId, null); + string result; + using (var b = ResizeBitmap(bitmap, bitmap.Width * 3, bitmap.Height * 2)) + { + result = Tesseract3DoOcrViaExe(b, _languageId, null); + } + if (string.IsNullOrWhiteSpace(result)) - result = Tesseract3DoOcrViaExe(ResizeBitmap(bitmap, bitmap.Width * 4, bitmap.Height * 2), _languageId, "-psm 7"); + { + using (var b = ResizeBitmap(bitmap, bitmap.Width * 4, bitmap.Height * 2)) + { + result = Tesseract3DoOcrViaExe(b, _languageId, "-psm 7"); + } + } + return result.TrimEnd(); } @@ -7108,7 +7044,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void InitializeModi() { _modiEnabled = false; - checkBoxUseModiInTesseractForUnknownWords.Enabled = false; comboBoxModiLanguage.Enabled = false; try { @@ -7119,7 +7054,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr _modiEnabled = _modiDoc != null; comboBoxModiLanguage.Enabled = _modiEnabled; - checkBoxUseModiInTesseractForUnknownWords.Enabled = _modiEnabled; } catch { @@ -7523,17 +7457,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr comboBoxDictionaries_SelectedIndexChanged(null, null); } - if (_modiEnabled && checkBoxUseModiInTesseractForUnknownWords.Checked) - { - string tesseractLanguageText = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Text; - int i = 0; - foreach (var modiLanguage in comboBoxModiLanguage.Items) - { - if ((modiLanguage as ModiLanguage).Text == tesseractLanguageText) - comboBoxModiLanguage.SelectedIndex = i; - i++; - } - } comboBoxModiLanguage.SelectedIndex = -1; } @@ -8430,8 +8353,9 @@ namespace Nikse.SubtitleEdit.Forms.Ocr DisposeImageCompareBitmaps(); Configuration.Settings.VobSubOcr.UseItalicsInTesseract = checkBoxTesseractItalicsOn.Checked; + if (comboBoxTesseractEngineMode.SelectedIndex != -1) + Configuration.Settings.VobSubOcr.TesseractEngineMode = comboBoxTesseractEngineMode.SelectedIndex; Configuration.Settings.VobSubOcr.ItalicFactor = _unItalicFactor; - Configuration.Settings.VobSubOcr.UseModiInTesseractForUnknownWords = checkBoxUseModiInTesseractForUnknownWords.Checked; Configuration.Settings.VobSubOcr.PromptForUnknownWords = checkBoxPromptForUnknownWords.Checked; Configuration.Settings.VobSubOcr.GuessUnknownWords = checkBoxGuessUnknownWords.Checked; Configuration.Settings.VobSubOcr.AutoBreakSubtitleIfMoreThanTwoLines = checkBoxAutoBreakLines.Checked;