diff --git a/Changelog.txt b/Changelog.txt
index 9f2b64730..eb339cc27 100644
--- a/Changelog.txt
+++ b/Changelog.txt
@@ -21,6 +21,7 @@
* Update Romanian translation - thx Mircea
* Update Basque translation - thx Xabier
* Update Portuguese translation - thx moob
+ * Update Tesseract OCR from 3.02 to 4.0 (alpha)
* Ctrl+a/ctrl+d/ctrl+shift+i works in more lists - thx tormento
* Remember OCR spell check dictionary for tesseract - thx raymondjpg
* FIXED:
@@ -37,6 +38,8 @@
* Do not allow navigating before zero in video - thx darnn
* Fix issue with nested tags in "Remove text for HI" - thx darnn
* Fix image render issue regarding italic/font - thx Cemal
+ * Fix bottom margin in batch image export - thx Cemal
+ * Fix possible crash in list view - thx lambdacore12
3.5.6 (27th February 2018)
* NEW:
diff --git a/LanguageMaster.xml b/LanguageMaster.xml
index d36b971f2..b0ba52372 100644
--- a/LanguageMaster.xml
+++ b/LanguageMaster.xml
@@ -2318,7 +2318,6 @@ Keep changes?
Save all images (png/bdn xml)...
Save all images with HTML index...
{0} images saved in {1}
- Try Microsoft MODI OCR for unknown words
Dictionary: {0}
Right to left
Show only forced subtitles
diff --git a/Tesseract4/tessdata/configs/hocr b/Tesseract4/tessdata/configs/hocr
new file mode 100644
index 000000000..9f63e41eb
--- /dev/null
+++ b/Tesseract4/tessdata/configs/hocr
@@ -0,0 +1,3 @@
+tessedit_create_hocr 1
+tessedit_pageseg_mode 1
+hocr_font_info 0
diff --git a/Tesseract4/tessdata/eng.traineddata b/Tesseract4/tessdata/eng.traineddata
new file mode 100644
index 000000000..bbef46750
Binary files /dev/null and b/Tesseract4/tessdata/eng.traineddata differ
diff --git a/Tesseract4/tesseract.exe b/Tesseract4/tesseract.exe
new file mode 100644
index 000000000..b64dc7ecd
Binary files /dev/null and b/Tesseract4/tesseract.exe differ
diff --git a/build.bat b/build.bat
index 196474ef2..b674a9b85 100644
--- a/build.bat
+++ b/build.bat
@@ -91,9 +91,9 @@ PUSHD "src\bin\Release"
IF EXIST "temp_zip" RD /S /Q "temp_zip"
IF NOT EXIST "temp_zip" MD "temp_zip"
IF NOT EXIST "temp_zip\Languages" MD "temp_zip\Languages"
-IF NOT EXIST "temp_zip\Tesseract" MD "temp_zip\Tesseract"
-IF NOT EXIST "temp_zip\Tesseract\tessdata" MD "temp_zip\Tesseract\tessdata"
-IF NOT EXIST "temp_zip\Tesseract\tessdata\configs" MD "temp_zip\Tesseract\tessdata\configs"
+IF NOT EXIST "temp_zip\Tesseract4" MD "temp_zip\Tesseract4"
+IF NOT EXIST "temp_zip\Tesseract4\tessdata" MD "temp_zip\Tesseract4\tessdata"
+IF NOT EXIST "temp_zip\Tesseract4\tessdata\configs" MD "temp_zip\Tesseract4\tessdata\configs"
COPY /Y /V "..\..\..\LICENSE.txt" "temp_zip\"
COPY /Y /V "..\..\..\Changelog.txt" "temp_zip\"
@@ -101,11 +101,9 @@ COPY /Y /V "Hunspellx86.dll" "temp_zip\"
COPY /Y /V "Hunspellx64.dll" "temp_zip\"
COPY /Y /V "SubtitleEdit.exe" "temp_zip\"
COPY /Y /V "Languages\*.xml" "temp_zip\Languages\"
-COPY /Y /V "..\..\..\Tesseract\msvcp90.dll" "temp_zip\Tesseract\"
-COPY /Y /V "..\..\..\Tesseract\msvcr90.dll" "temp_zip\Tesseract\"
-COPY /Y /V "..\..\..\Tesseract\tesseract.exe" "temp_zip\Tesseract\"
-COPY /Y /V "..\..\..\Tesseract\tessdata\configs\hocr" "temp_zip\Tesseract\tessdata\configs\"
-COPY /Y /V "..\..\..\Tesseract\tessdata\*.traineddata" "temp_zip\Tesseract\tessdata\"
+COPY /Y /V "..\..\..\Tesseract4\tesseract.exe" "temp_zip\Tesseract4\"
+COPY /Y /V "..\..\..\Tesseract4\tessdata\configs\hocr" "temp_zip\Tesseract4\tessdata\configs\"
+COPY /Y /V "..\..\..\Tesseract4\tessdata\*.traineddata" "temp_zip\Tesseract4\tessdata\"
PUSHD "temp_zip"
START "" /B /WAIT "%SEVENZIP%" a -tzip -mx=9 "SE%VERSION%.zip" * >NUL
diff --git a/installer/Subtitle_Edit_installer.iss b/installer/Subtitle_Edit_installer.iss
index cf9f84295..e6f5e3966 100644
--- a/installer/Subtitle_Edit_installer.iss
+++ b/installer/Subtitle_Edit_installer.iss
@@ -266,12 +266,9 @@ Source: {#bindir}\SubtitleEdit.exe; DestDir: {app};
Source: {#bindir}\SubtitleEdit.resources.dll; DestDir: {app}; Flags: ignoreversion; Components: main
Source: ..\Changelog.txt; DestDir: {app}; Flags: ignoreversion; Components: main
Source: ..\LICENSE.txt; DestDir: {app}; Flags: ignoreversion; Components: main
-Source: ..\Tesseract\msvcp90.dll; DestDir: {app}\Tesseract; Flags: ignoreversion; Components: main
-Source: ..\Tesseract\msvcr90.dll; DestDir: {app}\Tesseract; Flags: ignoreversion; Components: main
-Source: ..\Tesseract\tessdata\configs\hocr; DestDir: {app}\Tesseract\tessdata\configs; Flags: ignoreversion; Components: main
-Source: ..\Tesseract\tessdata\eng.traineddata; DestDir: {app}\Tesseract\tessdata; Flags: ignoreversion; Components: main
-Source: ..\Tesseract\tessdata\music.traineddata; DestDir: {app}\Tesseract\tessdata; Flags: ignoreversion; Components: main
-Source: ..\Tesseract\tesseract.exe; DestDir: {app}\Tesseract; Flags: ignoreversion; Components: main
+Source: ..\Tesseract4\tessdata\configs\hocr; DestDir: {app}\Tesseract4\tessdata\configs; Flags: ignoreversion; Components: main
+Source: ..\Tesseract4\tessdata\eng.traineddata; DestDir: {app}\Tesseract4\tessdata; Flags: ignoreversion; Components: main
+Source: ..\Tesseract4\tesseract.exe; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main
Source: Icons\uninstall.ico; DestDir: {app}\Icons; Flags: ignoreversion; Components: main
diff --git a/libse/Configuration.cs b/libse/Configuration.cs
index 53a07ce8f..99fd5b577 100644
--- a/libse/Configuration.cs
+++ b/libse/Configuration.cs
@@ -23,7 +23,7 @@ namespace Nikse.SubtitleEdit.Core
public static readonly string SceneChangesDirectory = DataDirectory + "SceneChanges" + Path.DirectorySeparatorChar;
public static readonly string AutoBackupDirectory = DataDirectory + "AutoBackup" + Path.DirectorySeparatorChar;
public static readonly string VobSubCompareDirectory = DataDirectory + "VobSub" + Path.DirectorySeparatorChar;
- public static readonly string TesseractDirectory = DataDirectory + "Tesseract" + Path.DirectorySeparatorChar;
+ public static readonly string TesseractDirectory = DataDirectory + "Tesseract4" + Path.DirectorySeparatorChar;
public static readonly string WaveformsDirectory = DataDirectory + "Waveforms" + Path.DirectorySeparatorChar;
public static readonly string PluginsDirectory = DataDirectory + "Plugins" + Path.DirectorySeparatorChar;
public static readonly string IconsDirectory = BaseDirectory + "Icons" + Path.DirectorySeparatorChar;
@@ -54,21 +54,9 @@ namespace Nikse.SubtitleEdit.Core
}
- public static Settings Settings
- {
- get
- {
- return Instance.Value._settings.Value;
- }
- }
+ public static Settings Settings => Instance.Value._settings.Value;
- public static IEnumerable AvailableEncodings
- {
- get
- {
- return Instance.Value._encodings;
- }
- }
+ public static IEnumerable AvailableEncodings => Instance.Value._encodings;
private static string GetInstallerPath()
{
@@ -117,6 +105,7 @@ namespace Nikse.SubtitleEdit.Core
}
catch
{
+ // ignored
}
}
Directory.CreateDirectory(Path.Combine(appDataRoamingPath, "Dictionaries"));
diff --git a/libse/Language.cs b/libse/Language.cs
index 26c2f4802..4e66cd37d 100644
--- a/libse/Language.cs
+++ b/libse/Language.cs
@@ -2649,7 +2649,6 @@ Keep changes?",
SaveAllSubtitleImagesAsBdnXml = "Save all images (png/bdn xml)...",
SaveAllSubtitleImagesWithHtml = "Save all images with HTML index...",
XImagesSavedInY = "{0} images saved in {1}",
- TryModiForUnknownWords = "Try Microsoft MODI OCR for unknown words",
DictionaryX = "Dictionary: {0}",
RightToLeft = "Right to left",
ShowOnlyForcedSubtitles = "Show only forced subtitles",
diff --git a/libse/LanguageDeserializer.cs b/libse/LanguageDeserializer.cs
index 7c95b546a..13e91c43d 100644
--- a/libse/LanguageDeserializer.cs
+++ b/libse/LanguageDeserializer.cs
@@ -6259,9 +6259,6 @@ namespace Nikse.SubtitleEdit.Core
case "VobSubOcr/XImagesSavedInY":
language.VobSubOcr.XImagesSavedInY = reader.Value;
break;
- case "VobSubOcr/TryModiForUnknownWords":
- language.VobSubOcr.TryModiForUnknownWords = reader.Value;
- break;
case "VobSubOcr/DictionaryX":
language.VobSubOcr.DictionaryX = reader.Value;
break;
diff --git a/libse/LanguageStructure.cs b/libse/LanguageStructure.cs
index a66cb0549..c41ba9b0c 100644
--- a/libse/LanguageStructure.cs
+++ b/libse/LanguageStructure.cs
@@ -2524,7 +2524,6 @@
public string SaveAllSubtitleImagesAsBdnXml { get; set; }
public string SaveAllSubtitleImagesWithHtml { get; set; }
public string XImagesSavedInY { get; set; }
- public string TryModiForUnknownWords { get; set; }
public string DictionaryX { get; set; }
public string RightToLeft { get; set; }
public string ShowOnlyForcedSubtitles { get; set; }
diff --git a/libse/LibSE.csproj b/libse/LibSE.csproj
index 86dce6706..168b0cebd 100644
--- a/libse/LibSE.csproj
+++ b/libse/LibSE.csproj
@@ -531,6 +531,7 @@
+
diff --git a/libse/Settings.cs b/libse/Settings.cs
index adb00eac9..8b225ac3b 100644
--- a/libse/Settings.cs
+++ b/libse/Settings.cs
@@ -865,8 +865,8 @@ namespace Nikse.SubtitleEdit.Core
public int LastModiLanguageId { get; set; }
public string LastOcrMethod { get; set; }
public string TesseractLastLanguage { get; set; }
- public bool UseModiInTesseractForUnknownWords { get; set; }
public bool UseItalicsInTesseract { get; set; }
+ public int TesseractEngineMode { get; set; }
public bool UseMusicSymbolsInTesseract { get; set; }
public bool RightToLeft { get; set; }
public bool TopToBottom { get; set; }
@@ -2680,12 +2680,12 @@ namespace Nikse.SubtitleEdit.Core
subNode = node.SelectSingleNode("TesseractLastLanguage");
if (subNode != null)
settings.VobSubOcr.TesseractLastLanguage = subNode.InnerText;
- subNode = node.SelectSingleNode("UseModiInTesseractForUnknownWords");
- if (subNode != null)
- settings.VobSubOcr.UseModiInTesseractForUnknownWords = Convert.ToBoolean(subNode.InnerText);
subNode = node.SelectSingleNode("UseItalicsInTesseract");
if (subNode != null)
settings.VobSubOcr.UseItalicsInTesseract = Convert.ToBoolean(subNode.InnerText);
+ subNode = node.SelectSingleNode("TesseractEngineMode");
+ if (subNode != null)
+ settings.VobSubOcr.TesseractEngineMode = Convert.ToInt32(subNode.InnerText);
subNode = node.SelectSingleNode("UseMusicSymbolsInTesseract");
if (subNode != null)
settings.VobSubOcr.UseMusicSymbolsInTesseract = Convert.ToBoolean(subNode.InnerText);
@@ -3808,8 +3808,8 @@ namespace Nikse.SubtitleEdit.Core
textWriter.WriteElementString("LastModiLanguageId", settings.VobSubOcr.LastModiLanguageId.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("LastOcrMethod", settings.VobSubOcr.LastOcrMethod);
textWriter.WriteElementString("TesseractLastLanguage", settings.VobSubOcr.TesseractLastLanguage);
- textWriter.WriteElementString("UseModiInTesseractForUnknownWords", settings.VobSubOcr.UseModiInTesseractForUnknownWords.ToString());
textWriter.WriteElementString("UseItalicsInTesseract", settings.VobSubOcr.UseItalicsInTesseract.ToString());
+ textWriter.WriteElementString("TesseractEngineMode", settings.VobSubOcr.TesseractEngineMode.ToString());
textWriter.WriteElementString("UseMusicSymbolsInTesseract", settings.VobSubOcr.UseMusicSymbolsInTesseract.ToString());
textWriter.WriteElementString("RightToLeft", settings.VobSubOcr.RightToLeft.ToString());
textWriter.WriteElementString("TopToBottom", settings.VobSubOcr.TopToBottom.ToString());
diff --git a/libse/TesseractDictionary.cs b/libse/TesseractDictionary.cs
new file mode 100644
index 000000000..fded8fc24
--- /dev/null
+++ b/libse/TesseractDictionary.cs
@@ -0,0 +1,195 @@
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+
+namespace Nikse.SubtitleEdit.Core
+{
+ public class TesseractDictionary
+ {
+
+ private const string DownloadUrlTemplate = "https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/{0}.traineddata";
+
+ ///
+ /// Dictionaries containing both 3.5 + 4.0 data - see https://github.com/tesseract-ocr/tessdata
+ ///
+ private static readonly string[] Dictionaries =
+ {
+ "afr",
+ "amh",
+ "ara",
+ "asm",
+ "aze",
+ "aze_cyrl",
+ "bel",
+ "ben",
+ "bod",
+ "bos",
+ "bre",
+ "bul",
+ "cat",
+ "ceb",
+ "ces",
+ "chi_sim",
+ "chi_sim_vert",
+ "chi_tra",
+ "chi_tra_vert",
+ "chr",
+ "cos",
+ "cym",
+ "dan",
+ "deu",
+ "div",
+ "dzo",
+ "ell",
+ "eng",
+ "enm",
+ "epo",
+ "equ",
+ "est",
+ "eus",
+ "fao",
+ "fas",
+ "fil",
+ "fin",
+ "fra",
+ "frk",
+ "frm",
+ "fry",
+ "gla",
+ "gle",
+ "glg",
+ "grc",
+ "guj",
+ "hat",
+ "heb",
+ "hin",
+ "hrv",
+ "hun",
+ "hye",
+ "iku",
+ "ind",
+ "isl",
+ "ita",
+ "jav",
+ "jpn",
+ "jpn_vert",
+ "kan",
+ "kat",
+ "kaz",
+ "khm",
+ "kir",
+ "kor",
+ "kor_vert",
+ "kur",
+ "kur_ara",
+ "lao",
+ "lat",
+ "lav",
+ "lit",
+ "ltz",
+ "mal",
+ "mar",
+ "mkd",
+ "mlt",
+ "mon",
+ "mri",
+ "msa",
+ "mya",
+ "nep",
+ "nld",
+ "nor",
+ "oci",
+ "ori",
+ "osd",
+ "pan",
+ "pol",
+ "por",
+ "pus",
+ "que",
+ "ron",
+ "rus",
+ "san",
+ "sin",
+ "slk",
+ "slv",
+ "snd",
+ "spa",
+ "sqi",
+ "srp",
+ "srp_latn",
+ "sun",
+ "swa",
+ "swe",
+ "syr",
+ "tam",
+ "tat",
+ "tel",
+ "tgk",
+ "tgl",
+ "tha",
+ "tir",
+ "ton",
+ "tur",
+ "uig",
+ "ukr",
+ "urd",
+ "uzb",
+ "uzb_cyrl",
+ "vie",
+ "yid",
+ "yor"
+ };
+
+ public string Code { get; set; }
+ public string Name { get; set; }
+ public string Url { get; set; }
+
+ public static List List()
+ {
+ var list = new List();
+ var cultures = CultureInfo.GetCultures(CultureTypes.NeutralCultures);
+ foreach (var dictionary in Dictionaries)
+ {
+ list.Add(new TesseractDictionary
+ {
+ Name = MakeName(dictionary, cultures),
+ Code = dictionary,
+ Url = string.Format(DownloadUrlTemplate, dictionary)
+ });
+ }
+ return list;
+ }
+
+ private static string MakeName(string dictionary, CultureInfo[] cultures)
+ {
+ string code = dictionary;
+ string post = string.Empty;
+ var idx = code.IndexOf('_');
+ if (idx > 0)
+ {
+ post = $" ({code.Substring(idx).Trim('_')})";
+ code = code.Substring(0, idx).Trim('_');
+ }
+
+ try
+ {
+ var cultureInfo = cultures.FirstOrDefault(ci => string.Equals(ci.ThreeLetterISOLanguageName, code, StringComparison.OrdinalIgnoreCase));
+ if (cultureInfo != null)
+ code = cultureInfo.EnglishName;
+ }
+ catch
+ {
+ // ignore
+ }
+
+ return code + post;
+ }
+
+ public override string ToString()
+ {
+ return Name;
+ }
+
+ }
+}
diff --git a/src/Forms/GetTesseractDictionaries.Designer.cs b/src/Forms/GetTesseractDictionaries.Designer.cs
index fc58eb661..1c7b7b218 100644
--- a/src/Forms/GetTesseractDictionaries.Designer.cs
+++ b/src/Forms/GetTesseractDictionaries.Designer.cs
@@ -1,6 +1,6 @@
namespace Nikse.SubtitleEdit.Forms
{
- partial class GetTesseractDictionaries
+ sealed partial class GetTesseractDictionaries
{
///
/// Required designer variable.
diff --git a/src/Forms/GetTesseractDictionaries.cs b/src/Forms/GetTesseractDictionaries.cs
index 3d949f133..27be6fed6 100644
--- a/src/Forms/GetTesseractDictionaries.cs
+++ b/src/Forms/GetTesseractDictionaries.cs
@@ -6,17 +6,14 @@ using System.IO;
using System.IO.Compression;
using System.Net;
using System.Windows.Forms;
-using System.Xml;
namespace Nikse.SubtitleEdit.Forms
{
- public partial class GetTesseractDictionaries : Form
+ public sealed partial class GetTesseractDictionaries : Form
{
- private List _dictionaryDownloadLinks = new List();
- private List _descriptions = new List();
- private string _xmlName = null;
- private string _dictionaryFileName = null;
+ private string _dictionaryFileName;
internal string ChosenLanguage { get; private set; }
+ private readonly List _dictionaries;
public GetTesseractDictionaries()
{
@@ -31,57 +28,24 @@ namespace Nikse.SubtitleEdit.Forms
buttonDownload.Text = Configuration.Settings.Language.GetTesseractDictionaries.Download;
labelPleaseWait.Text = string.Empty;
buttonOK.Text = Configuration.Settings.Language.General.Ok;
- LoadDictionaryList("Nikse.SubtitleEdit.Resources.TesseractDictionaries.xml.gz");
FixLargeFonts();
+ _dictionaries = TesseractDictionary.List();
+ LoadDictionaryList();
}
- private void LoadDictionaryList(string xmlRessourceName)
+ private void LoadDictionaryList()
{
- _dictionaryDownloadLinks = new List();
- _descriptions = new List();
- _xmlName = xmlRessourceName;
- System.Reflection.Assembly asm = System.Reflection.Assembly.GetExecutingAssembly();
- Stream strm = asm.GetManifestResourceStream(_xmlName);
- if (strm != null)
+ comboBoxDictionaries.BeginUpdate();
+ comboBoxDictionaries.Items.Clear();
+ foreach (var d in _dictionaries)
{
- comboBoxDictionaries.Items.Clear();
- XmlDocument doc = new XmlDocument();
- using (var rdr = new StreamReader(strm))
- using (var zip = new GZipStream(rdr.BaseStream, CompressionMode.Decompress))
+ if (!string.IsNullOrEmpty(d.Url))
{
- byte[] data = new byte[195000];
- int bytesRead = zip.Read(data, 0, data.Length);
- var s = System.Text.Encoding.UTF8.GetString(data, 0, bytesRead).Trim();
- try
- {
- doc.LoadXml(s);
- }
- catch (Exception exception)
- {
- MessageBox.Show(exception.Message);
- }
- }
-
- foreach (XmlNode node in doc.DocumentElement.SelectNodes("Dictionary"))
- {
- string englishName = node.SelectSingleNode("EnglishName").InnerText;
- string downloadLink = node.SelectSingleNode("DownloadLink").InnerText;
-
- string description = string.Empty;
- if (node.SelectSingleNode("Description") != null)
- description = node.SelectSingleNode("Description").InnerText;
-
- if (!string.IsNullOrEmpty(downloadLink))
- {
- string name = englishName;
-
- comboBoxDictionaries.Items.Add(name);
- _dictionaryDownloadLinks.Add(downloadLink);
- _descriptions.Add(description);
- }
- comboBoxDictionaries.SelectedIndex = 0;
+ comboBoxDictionaries.Items.Add(d);
}
}
+ comboBoxDictionaries.SelectedIndex = 0;
+ comboBoxDictionaries.EndUpdate();
comboBoxDictionaries.AutoCompleteSource = AutoCompleteSource.ListItems;
comboBoxDictionaries.AutoCompleteMode = AutoCompleteMode.Append;
}
@@ -105,7 +69,7 @@ namespace Nikse.SubtitleEdit.Forms
Cursor = Cursors.WaitCursor;
int index = comboBoxDictionaries.SelectedIndex;
- string url = _dictionaryDownloadLinks[index];
+ string url = _dictionaries[index].Url;
ChosenLanguage = comboBoxDictionaries.Items[index].ToString();
var wc = new WebClient { Proxy = Utilities.GetProxy() };
diff --git a/src/Forms/Ocr/VobSubOcr.Designer.cs b/src/Forms/Ocr/VobSubOcr.Designer.cs
index 5f05bc890..adcd44f41 100644
--- a/src/Forms/Ocr/VobSubOcr.Designer.cs
+++ b/src/Forms/Ocr/VobSubOcr.Designer.cs
@@ -39,6 +39,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.nOcrTrainingToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
this.toolStripSeparator4 = new System.Windows.Forms.ToolStripSeparator();
this.toolStripMenuItemSetUnItalicFactor = new System.Windows.Forms.ToolStripMenuItem();
+ this.setForecolorThresholdToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
this.toolStripSeparator3 = new System.Windows.Forms.ToolStripSeparator();
this.deleteToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
this.labelSubtitleText = new System.Windows.Forms.Label();
@@ -48,23 +49,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.buttonCancel = new System.Windows.Forms.Button();
this.groupBoxOcrMethod = new System.Windows.Forms.GroupBox();
this.comboBoxOcrMethod = new System.Windows.Forms.ComboBox();
- this.groupBoxImageCompareMethod = new System.Windows.Forms.GroupBox();
- this.labelMinLineSplitHeight = new System.Windows.Forms.Label();
- this.comboBoxLineSplitMinLineHeight = new System.Windows.Forms.ComboBox();
- this.labelMaxErrorPercent = new System.Windows.Forms.Label();
- this.numericUpDownMaxErrorPct = new System.Windows.Forms.NumericUpDown();
- this.checkBoxRightToLeft = new System.Windows.Forms.CheckBox();
- this.numericUpDownPixelsIsSpace = new System.Windows.Forms.NumericUpDown();
- this.buttonEditCharacterDatabase = new System.Windows.Forms.Button();
- this.labelNoOfPixelsIsSpace = new System.Windows.Forms.Label();
- this.comboBoxCharacterDatabase = new System.Windows.Forms.ComboBox();
- this.labelImageDatabase = new System.Windows.Forms.Label();
- this.buttonNewCharacterDatabase = new System.Windows.Forms.Button();
this.GroupBoxTesseractMethod = new System.Windows.Forms.GroupBox();
this.buttonGetTesseractDictionaries = new System.Windows.Forms.Button();
this.checkBoxTesseractMusicOn = new System.Windows.Forms.CheckBox();
this.checkBoxTesseractItalicsOn = new System.Windows.Forms.CheckBox();
- this.checkBoxUseModiInTesseractForUnknownWords = new System.Windows.Forms.CheckBox();
this.labelTesseractLanguage = new System.Windows.Forms.Label();
this.comboBoxTesseractLanguages = new System.Windows.Forms.ComboBox();
this.groupBoxModiMethod = new System.Windows.Forms.GroupBox();
@@ -80,6 +68,18 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.checkBoxRightToLeftNOCR = new System.Windows.Forms.CheckBox();
this.numericUpDownNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.NumericUpDown();
this.labelNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.Label();
+ this.groupBoxImageCompareMethod = new System.Windows.Forms.GroupBox();
+ this.labelMinLineSplitHeight = new System.Windows.Forms.Label();
+ this.comboBoxLineSplitMinLineHeight = new System.Windows.Forms.ComboBox();
+ this.labelMaxErrorPercent = new System.Windows.Forms.Label();
+ this.numericUpDownMaxErrorPct = new System.Windows.Forms.NumericUpDown();
+ this.checkBoxRightToLeft = new System.Windows.Forms.CheckBox();
+ this.numericUpDownPixelsIsSpace = new System.Windows.Forms.NumericUpDown();
+ this.buttonEditCharacterDatabase = new System.Windows.Forms.Button();
+ this.labelNoOfPixelsIsSpace = new System.Windows.Forms.Label();
+ this.comboBoxCharacterDatabase = new System.Windows.Forms.ComboBox();
+ this.labelImageDatabase = new System.Windows.Forms.Label();
+ this.buttonNewCharacterDatabase = new System.Windows.Forms.Button();
this.groupBoxOCRControls = new System.Windows.Forms.GroupBox();
this.labelStartFrom = new System.Windows.Forms.Label();
this.numericUpDownStartNumber = new System.Windows.Forms.NumericUpDown();
@@ -141,16 +141,17 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.textBoxCurrentText = new Nikse.SubtitleEdit.Controls.SETextBox();
this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView();
this.timerHideStatus = new System.Windows.Forms.Timer(this.components);
- this.setForecolorThresholdToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
+ this.comboBoxTesseractEngineMode = new System.Windows.Forms.ComboBox();
+ this.labelTesseractEngineMode = new System.Windows.Forms.Label();
this.contextMenuStripListview.SuspendLayout();
this.groupBoxOcrMethod.SuspendLayout();
- this.groupBoxImageCompareMethod.SuspendLayout();
- ((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).BeginInit();
- ((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).BeginInit();
this.GroupBoxTesseractMethod.SuspendLayout();
this.groupBoxModiMethod.SuspendLayout();
this.groupBoxNOCR.SuspendLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).BeginInit();
+ this.groupBoxImageCompareMethod.SuspendLayout();
+ ((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).BeginInit();
+ ((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).BeginInit();
this.groupBoxOCRControls.SuspendLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).BeginInit();
this.groupBoxOcrAutoFix.SuspendLayout();
@@ -200,7 +201,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.toolStripSeparator3,
this.deleteToolStripMenuItem});
this.contextMenuStripListview.Name = "contextMenuStripListview";
- this.contextMenuStripListview.Size = new System.Drawing.Size(306, 364);
+ this.contextMenuStripListview.Size = new System.Drawing.Size(306, 342);
this.contextMenuStripListview.Opening += new System.ComponentModel.CancelEventHandler(this.ContextMenuStripListviewOpening);
//
// normalToolStripMenuItem
@@ -339,6 +340,13 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.toolStripMenuItemSetUnItalicFactor.Text = "Set un-italic factor...";
this.toolStripMenuItemSetUnItalicFactor.Click += new System.EventHandler(this.toolStripMenuItemSetUnItalicFactor_Click);
//
+ // setForecolorThresholdToolStripMenuItem
+ //
+ this.setForecolorThresholdToolStripMenuItem.Name = "setForecolorThresholdToolStripMenuItem";
+ this.setForecolorThresholdToolStripMenuItem.Size = new System.Drawing.Size(305, 22);
+ this.setForecolorThresholdToolStripMenuItem.Text = "Set forecolor threshold...";
+ this.setForecolorThresholdToolStripMenuItem.Click += new System.EventHandler(this.setForecolorThresholdToolStripMenuItem_Click);
+ //
// toolStripSeparator3
//
this.toolStripSeparator3.Name = "toolStripSeparator3";
@@ -406,10 +414,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
// groupBoxOcrMethod
//
this.groupBoxOcrMethod.Controls.Add(this.comboBoxOcrMethod);
- this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod);
this.groupBoxOcrMethod.Controls.Add(this.GroupBoxTesseractMethod);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxModiMethod);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR);
+ this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod);
this.groupBoxOcrMethod.Location = new System.Drawing.Point(13, 5);
this.groupBoxOcrMethod.Name = "groupBoxOcrMethod";
this.groupBoxOcrMethod.Size = new System.Drawing.Size(392, 192);
@@ -432,6 +440,223 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.comboBoxOcrMethod.TabIndex = 0;
this.comboBoxOcrMethod.SelectedIndexChanged += new System.EventHandler(this.ComboBoxOcrMethodSelectedIndexChanged);
//
+ // GroupBoxTesseractMethod
+ //
+ this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractEngineMode);
+ this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractEngineMode);
+ this.GroupBoxTesseractMethod.Controls.Add(this.buttonGetTesseractDictionaries);
+ this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractMusicOn);
+ this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractItalicsOn);
+ this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractLanguage);
+ this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractLanguages);
+ this.GroupBoxTesseractMethod.Location = new System.Drawing.Point(13, 31);
+ this.GroupBoxTesseractMethod.Name = "GroupBoxTesseractMethod";
+ this.GroupBoxTesseractMethod.Size = new System.Drawing.Size(366, 131);
+ this.GroupBoxTesseractMethod.TabIndex = 1;
+ this.GroupBoxTesseractMethod.TabStop = false;
+ this.GroupBoxTesseractMethod.Text = "Tesseract";
+ //
+ // buttonGetTesseractDictionaries
+ //
+ this.buttonGetTesseractDictionaries.Location = new System.Drawing.Point(300, 28);
+ this.buttonGetTesseractDictionaries.Name = "buttonGetTesseractDictionaries";
+ this.buttonGetTesseractDictionaries.Size = new System.Drawing.Size(29, 23);
+ this.buttonGetTesseractDictionaries.TabIndex = 2;
+ this.buttonGetTesseractDictionaries.Text = "...";
+ this.buttonGetTesseractDictionaries.UseVisualStyleBackColor = true;
+ this.buttonGetTesseractDictionaries.Click += new System.EventHandler(this.buttonGetTesseractDictionaries_Click);
+ //
+ // checkBoxTesseractMusicOn
+ //
+ this.checkBoxTesseractMusicOn.AutoSize = true;
+ this.checkBoxTesseractMusicOn.Checked = true;
+ this.checkBoxTesseractMusicOn.CheckState = System.Windows.Forms.CheckState.Checked;
+ this.checkBoxTesseractMusicOn.Location = new System.Drawing.Point(99, 69);
+ this.checkBoxTesseractMusicOn.Name = "checkBoxTesseractMusicOn";
+ this.checkBoxTesseractMusicOn.Size = new System.Drawing.Size(93, 17);
+ this.checkBoxTesseractMusicOn.TabIndex = 4;
+ this.checkBoxTesseractMusicOn.Text = "Music symbols";
+ this.checkBoxTesseractMusicOn.UseVisualStyleBackColor = true;
+ //
+ // checkBoxTesseractItalicsOn
+ //
+ this.checkBoxTesseractItalicsOn.AutoSize = true;
+ this.checkBoxTesseractItalicsOn.Checked = true;
+ this.checkBoxTesseractItalicsOn.CheckState = System.Windows.Forms.CheckState.Checked;
+ this.checkBoxTesseractItalicsOn.Location = new System.Drawing.Point(22, 69);
+ this.checkBoxTesseractItalicsOn.Name = "checkBoxTesseractItalicsOn";
+ this.checkBoxTesseractItalicsOn.Size = new System.Drawing.Size(54, 17);
+ this.checkBoxTesseractItalicsOn.TabIndex = 3;
+ this.checkBoxTesseractItalicsOn.Text = "Italics";
+ this.checkBoxTesseractItalicsOn.UseVisualStyleBackColor = true;
+ //
+ // labelTesseractLanguage
+ //
+ this.labelTesseractLanguage.AutoSize = true;
+ this.labelTesseractLanguage.Location = new System.Drawing.Point(18, 32);
+ this.labelTesseractLanguage.Name = "labelTesseractLanguage";
+ this.labelTesseractLanguage.Size = new System.Drawing.Size(54, 13);
+ this.labelTesseractLanguage.TabIndex = 0;
+ this.labelTesseractLanguage.Text = "Language";
+ //
+ // comboBoxTesseractLanguages
+ //
+ this.comboBoxTesseractLanguages.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
+ this.comboBoxTesseractLanguages.FormattingEnabled = true;
+ this.comboBoxTesseractLanguages.Location = new System.Drawing.Point(99, 29);
+ this.comboBoxTesseractLanguages.Name = "comboBoxTesseractLanguages";
+ this.comboBoxTesseractLanguages.Size = new System.Drawing.Size(195, 21);
+ this.comboBoxTesseractLanguages.TabIndex = 1;
+ this.comboBoxTesseractLanguages.SelectedIndexChanged += new System.EventHandler(this.ComboBoxTesseractLanguagesSelectedIndexChanged);
+ //
+ // groupBoxModiMethod
+ //
+ this.groupBoxModiMethod.Controls.Add(this.label1);
+ this.groupBoxModiMethod.Controls.Add(this.comboBoxModiLanguage);
+ this.groupBoxModiMethod.Location = new System.Drawing.Point(7, 50);
+ this.groupBoxModiMethod.Name = "groupBoxModiMethod";
+ this.groupBoxModiMethod.Size = new System.Drawing.Size(366, 131);
+ this.groupBoxModiMethod.TabIndex = 3;
+ this.groupBoxModiMethod.TabStop = false;
+ this.groupBoxModiMethod.Text = "MODI";
+ //
+ // label1
+ //
+ this.label1.AutoSize = true;
+ this.label1.Location = new System.Drawing.Point(11, 58);
+ this.label1.Name = "label1";
+ this.label1.Size = new System.Drawing.Size(54, 13);
+ this.label1.TabIndex = 33;
+ this.label1.Text = "Language";
+ //
+ // comboBoxModiLanguage
+ //
+ this.comboBoxModiLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
+ this.comboBoxModiLanguage.FormattingEnabled = true;
+ this.comboBoxModiLanguage.Location = new System.Drawing.Point(108, 55);
+ this.comboBoxModiLanguage.Name = "comboBoxModiLanguage";
+ this.comboBoxModiLanguage.Size = new System.Drawing.Size(195, 21);
+ this.comboBoxModiLanguage.TabIndex = 0;
+ this.comboBoxModiLanguage.SelectedIndexChanged += new System.EventHandler(this.ComboBoxModiLanguageSelectedIndexChanged);
+ //
+ // groupBoxNOCR
+ //
+ this.groupBoxNOCR.Controls.Add(this.buttonLineOcrEditLanguage);
+ this.groupBoxNOCR.Controls.Add(this.buttonLineOcrNewLanguage);
+ this.groupBoxNOCR.Controls.Add(this.label2);
+ this.groupBoxNOCR.Controls.Add(this.comboBoxNOcrLanguage);
+ this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrItalic);
+ this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrCorrect);
+ this.groupBoxNOCR.Controls.Add(this.checkBoxRightToLeftNOCR);
+ this.groupBoxNOCR.Controls.Add(this.numericUpDownNumberOfPixelsIsSpaceNOCR);
+ this.groupBoxNOCR.Controls.Add(this.labelNumberOfPixelsIsSpaceNOCR);
+ this.groupBoxNOCR.Location = new System.Drawing.Point(7, 38);
+ this.groupBoxNOCR.Name = "groupBoxNOCR";
+ this.groupBoxNOCR.Size = new System.Drawing.Size(366, 131);
+ this.groupBoxNOCR.TabIndex = 7;
+ this.groupBoxNOCR.TabStop = false;
+ this.groupBoxNOCR.Text = "nOCR";
+ //
+ // buttonLineOcrEditLanguage
+ //
+ this.buttonLineOcrEditLanguage.Location = new System.Drawing.Point(210, 97);
+ this.buttonLineOcrEditLanguage.Name = "buttonLineOcrEditLanguage";
+ this.buttonLineOcrEditLanguage.Size = new System.Drawing.Size(68, 21);
+ this.buttonLineOcrEditLanguage.TabIndex = 41;
+ this.buttonLineOcrEditLanguage.Text = "Edit";
+ this.buttonLineOcrEditLanguage.UseVisualStyleBackColor = true;
+ this.buttonLineOcrEditLanguage.Click += new System.EventHandler(this.buttonLineOcrEditLanguage_Click);
+ //
+ // buttonLineOcrNewLanguage
+ //
+ this.buttonLineOcrNewLanguage.Location = new System.Drawing.Point(283, 97);
+ this.buttonLineOcrNewLanguage.Name = "buttonLineOcrNewLanguage";
+ this.buttonLineOcrNewLanguage.Size = new System.Drawing.Size(68, 21);
+ this.buttonLineOcrNewLanguage.TabIndex = 40;
+ this.buttonLineOcrNewLanguage.Text = "New";
+ this.buttonLineOcrNewLanguage.UseVisualStyleBackColor = true;
+ this.buttonLineOcrNewLanguage.Click += new System.EventHandler(this.buttonLineOcrNewLanguage_Click);
+ //
+ // label2
+ //
+ this.label2.AutoSize = true;
+ this.label2.Location = new System.Drawing.Point(11, 101);
+ this.label2.Name = "label2";
+ this.label2.Size = new System.Drawing.Size(54, 13);
+ this.label2.TabIndex = 35;
+ this.label2.Text = "Language";
+ //
+ // comboBoxNOcrLanguage
+ //
+ this.comboBoxNOcrLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
+ this.comboBoxNOcrLanguage.FormattingEnabled = true;
+ this.comboBoxNOcrLanguage.Location = new System.Drawing.Point(74, 97);
+ this.comboBoxNOcrLanguage.Name = "comboBoxNOcrLanguage";
+ this.comboBoxNOcrLanguage.Size = new System.Drawing.Size(130, 21);
+ this.comboBoxNOcrLanguage.TabIndex = 34;
+ this.comboBoxNOcrLanguage.SelectedIndexChanged += new System.EventHandler(this.comboBoxNOcrLanguage_SelectedIndexChanged);
+ //
+ // checkBoxNOcrItalic
+ //
+ this.checkBoxNOcrItalic.AutoSize = true;
+ this.checkBoxNOcrItalic.Location = new System.Drawing.Point(15, 42);
+ this.checkBoxNOcrItalic.Name = "checkBoxNOcrItalic";
+ this.checkBoxNOcrItalic.Size = new System.Drawing.Size(92, 17);
+ this.checkBoxNOcrItalic.TabIndex = 8;
+ this.checkBoxNOcrItalic.Text = "Contains italic";
+ this.checkBoxNOcrItalic.UseVisualStyleBackColor = true;
+ //
+ // checkBoxNOcrCorrect
+ //
+ this.checkBoxNOcrCorrect.AutoSize = true;
+ this.checkBoxNOcrCorrect.Location = new System.Drawing.Point(235, 17);
+ this.checkBoxNOcrCorrect.Name = "checkBoxNOcrCorrect";
+ this.checkBoxNOcrCorrect.Size = new System.Drawing.Size(116, 17);
+ this.checkBoxNOcrCorrect.TabIndex = 7;
+ this.checkBoxNOcrCorrect.Text = "Draw missing texts";
+ this.checkBoxNOcrCorrect.UseVisualStyleBackColor = true;
+ //
+ // checkBoxRightToLeftNOCR
+ //
+ this.checkBoxRightToLeftNOCR.AutoSize = true;
+ this.checkBoxRightToLeftNOCR.Location = new System.Drawing.Point(235, 40);
+ this.checkBoxRightToLeftNOCR.Name = "checkBoxRightToLeftNOCR";
+ this.checkBoxRightToLeftNOCR.Size = new System.Drawing.Size(83, 17);
+ this.checkBoxRightToLeftNOCR.TabIndex = 6;
+ this.checkBoxRightToLeftNOCR.Text = "Right to left";
+ this.checkBoxRightToLeftNOCR.UseVisualStyleBackColor = true;
+ //
+ // numericUpDownNumberOfPixelsIsSpaceNOCR
+ //
+ this.numericUpDownNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(122, 17);
+ this.numericUpDownNumberOfPixelsIsSpaceNOCR.Maximum = new decimal(new int[] {
+ 50,
+ 0,
+ 0,
+ 0});
+ this.numericUpDownNumberOfPixelsIsSpaceNOCR.Minimum = new decimal(new int[] {
+ 1,
+ 0,
+ 0,
+ 0});
+ this.numericUpDownNumberOfPixelsIsSpaceNOCR.Name = "numericUpDownNumberOfPixelsIsSpaceNOCR";
+ this.numericUpDownNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(50, 21);
+ this.numericUpDownNumberOfPixelsIsSpaceNOCR.TabIndex = 5;
+ this.numericUpDownNumberOfPixelsIsSpaceNOCR.Value = new decimal(new int[] {
+ 12,
+ 0,
+ 0,
+ 0});
+ //
+ // labelNumberOfPixelsIsSpaceNOCR
+ //
+ this.labelNumberOfPixelsIsSpaceNOCR.AutoSize = true;
+ this.labelNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(12, 20);
+ this.labelNumberOfPixelsIsSpaceNOCR.Name = "labelNumberOfPixelsIsSpaceNOCR";
+ this.labelNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(104, 13);
+ this.labelNumberOfPixelsIsSpaceNOCR.TabIndex = 4;
+ this.labelNumberOfPixelsIsSpaceNOCR.Text = "No of pixels is space";
+ //
// groupBoxImageCompareMethod
//
this.groupBoxImageCompareMethod.Controls.Add(this.labelMinLineSplitHeight);
@@ -660,235 +885,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.buttonNewCharacterDatabase.UseVisualStyleBackColor = true;
this.buttonNewCharacterDatabase.Click += new System.EventHandler(this.ButtonNewCharacterDatabaseClick);
//
- // GroupBoxTesseractMethod
- //
- this.GroupBoxTesseractMethod.Controls.Add(this.buttonGetTesseractDictionaries);
- this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractMusicOn);
- this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractItalicsOn);
- this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxUseModiInTesseractForUnknownWords);
- this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractLanguage);
- this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractLanguages);
- this.GroupBoxTesseractMethod.Location = new System.Drawing.Point(13, 31);
- this.GroupBoxTesseractMethod.Name = "GroupBoxTesseractMethod";
- this.GroupBoxTesseractMethod.Size = new System.Drawing.Size(366, 131);
- this.GroupBoxTesseractMethod.TabIndex = 1;
- this.GroupBoxTesseractMethod.TabStop = false;
- this.GroupBoxTesseractMethod.Text = "Tesseract";
- //
- // buttonGetTesseractDictionaries
- //
- this.buttonGetTesseractDictionaries.Location = new System.Drawing.Point(300, 30);
- this.buttonGetTesseractDictionaries.Name = "buttonGetTesseractDictionaries";
- this.buttonGetTesseractDictionaries.Size = new System.Drawing.Size(29, 23);
- this.buttonGetTesseractDictionaries.TabIndex = 2;
- this.buttonGetTesseractDictionaries.Text = "...";
- this.buttonGetTesseractDictionaries.UseVisualStyleBackColor = true;
- this.buttonGetTesseractDictionaries.Click += new System.EventHandler(this.buttonGetTesseractDictionaries_Click);
- //
- // checkBoxTesseractMusicOn
- //
- this.checkBoxTesseractMusicOn.AutoSize = true;
- this.checkBoxTesseractMusicOn.Checked = true;
- this.checkBoxTesseractMusicOn.CheckState = System.Windows.Forms.CheckState.Checked;
- this.checkBoxTesseractMusicOn.Location = new System.Drawing.Point(99, 101);
- this.checkBoxTesseractMusicOn.Name = "checkBoxTesseractMusicOn";
- this.checkBoxTesseractMusicOn.Size = new System.Drawing.Size(93, 17);
- this.checkBoxTesseractMusicOn.TabIndex = 4;
- this.checkBoxTesseractMusicOn.Text = "Music symbols";
- this.checkBoxTesseractMusicOn.UseVisualStyleBackColor = true;
- //
- // checkBoxTesseractItalicsOn
- //
- this.checkBoxTesseractItalicsOn.AutoSize = true;
- this.checkBoxTesseractItalicsOn.Checked = true;
- this.checkBoxTesseractItalicsOn.CheckState = System.Windows.Forms.CheckState.Checked;
- this.checkBoxTesseractItalicsOn.Location = new System.Drawing.Point(22, 101);
- this.checkBoxTesseractItalicsOn.Name = "checkBoxTesseractItalicsOn";
- this.checkBoxTesseractItalicsOn.Size = new System.Drawing.Size(54, 17);
- this.checkBoxTesseractItalicsOn.TabIndex = 3;
- this.checkBoxTesseractItalicsOn.Text = "Italics";
- this.checkBoxTesseractItalicsOn.UseVisualStyleBackColor = true;
- //
- // checkBoxUseModiInTesseractForUnknownWords
- //
- this.checkBoxUseModiInTesseractForUnknownWords.AutoSize = true;
- this.checkBoxUseModiInTesseractForUnknownWords.Checked = true;
- this.checkBoxUseModiInTesseractForUnknownWords.CheckState = System.Windows.Forms.CheckState.Checked;
- this.checkBoxUseModiInTesseractForUnknownWords.Enabled = false;
- this.checkBoxUseModiInTesseractForUnknownWords.Location = new System.Drawing.Point(22, 74);
- this.checkBoxUseModiInTesseractForUnknownWords.Name = "checkBoxUseModiInTesseractForUnknownWords";
- this.checkBoxUseModiInTesseractForUnknownWords.Size = new System.Drawing.Size(167, 17);
- this.checkBoxUseModiInTesseractForUnknownWords.TabIndex = 2;
- this.checkBoxUseModiInTesseractForUnknownWords.Text = "Try MODI for unknown words";
- this.checkBoxUseModiInTesseractForUnknownWords.UseVisualStyleBackColor = true;
- //
- // labelTesseractLanguage
- //
- this.labelTesseractLanguage.AutoSize = true;
- this.labelTesseractLanguage.Location = new System.Drawing.Point(18, 34);
- this.labelTesseractLanguage.Name = "labelTesseractLanguage";
- this.labelTesseractLanguage.Size = new System.Drawing.Size(54, 13);
- this.labelTesseractLanguage.TabIndex = 0;
- this.labelTesseractLanguage.Text = "Language";
- //
- // comboBoxTesseractLanguages
- //
- this.comboBoxTesseractLanguages.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
- this.comboBoxTesseractLanguages.FormattingEnabled = true;
- this.comboBoxTesseractLanguages.Location = new System.Drawing.Point(99, 31);
- this.comboBoxTesseractLanguages.Name = "comboBoxTesseractLanguages";
- this.comboBoxTesseractLanguages.Size = new System.Drawing.Size(195, 21);
- this.comboBoxTesseractLanguages.TabIndex = 1;
- this.comboBoxTesseractLanguages.SelectedIndexChanged += new System.EventHandler(this.ComboBoxTesseractLanguagesSelectedIndexChanged);
- //
- // groupBoxModiMethod
- //
- this.groupBoxModiMethod.Controls.Add(this.label1);
- this.groupBoxModiMethod.Controls.Add(this.comboBoxModiLanguage);
- this.groupBoxModiMethod.Location = new System.Drawing.Point(7, 50);
- this.groupBoxModiMethod.Name = "groupBoxModiMethod";
- this.groupBoxModiMethod.Size = new System.Drawing.Size(366, 131);
- this.groupBoxModiMethod.TabIndex = 3;
- this.groupBoxModiMethod.TabStop = false;
- this.groupBoxModiMethod.Text = "MODI";
- //
- // label1
- //
- this.label1.AutoSize = true;
- this.label1.Location = new System.Drawing.Point(11, 58);
- this.label1.Name = "label1";
- this.label1.Size = new System.Drawing.Size(54, 13);
- this.label1.TabIndex = 33;
- this.label1.Text = "Language";
- //
- // comboBoxModiLanguage
- //
- this.comboBoxModiLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
- this.comboBoxModiLanguage.FormattingEnabled = true;
- this.comboBoxModiLanguage.Location = new System.Drawing.Point(108, 55);
- this.comboBoxModiLanguage.Name = "comboBoxModiLanguage";
- this.comboBoxModiLanguage.Size = new System.Drawing.Size(195, 21);
- this.comboBoxModiLanguage.TabIndex = 0;
- this.comboBoxModiLanguage.SelectedIndexChanged += new System.EventHandler(this.ComboBoxModiLanguageSelectedIndexChanged);
- //
- // groupBoxNOCR
- //
- this.groupBoxNOCR.Controls.Add(this.buttonLineOcrEditLanguage);
- this.groupBoxNOCR.Controls.Add(this.buttonLineOcrNewLanguage);
- this.groupBoxNOCR.Controls.Add(this.label2);
- this.groupBoxNOCR.Controls.Add(this.comboBoxNOcrLanguage);
- this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrItalic);
- this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrCorrect);
- this.groupBoxNOCR.Controls.Add(this.checkBoxRightToLeftNOCR);
- this.groupBoxNOCR.Controls.Add(this.numericUpDownNumberOfPixelsIsSpaceNOCR);
- this.groupBoxNOCR.Controls.Add(this.labelNumberOfPixelsIsSpaceNOCR);
- this.groupBoxNOCR.Location = new System.Drawing.Point(7, 38);
- this.groupBoxNOCR.Name = "groupBoxNOCR";
- this.groupBoxNOCR.Size = new System.Drawing.Size(366, 131);
- this.groupBoxNOCR.TabIndex = 7;
- this.groupBoxNOCR.TabStop = false;
- this.groupBoxNOCR.Text = "nOCR";
- //
- // buttonLineOcrEditLanguage
- //
- this.buttonLineOcrEditLanguage.Location = new System.Drawing.Point(210, 97);
- this.buttonLineOcrEditLanguage.Name = "buttonLineOcrEditLanguage";
- this.buttonLineOcrEditLanguage.Size = new System.Drawing.Size(68, 21);
- this.buttonLineOcrEditLanguage.TabIndex = 41;
- this.buttonLineOcrEditLanguage.Text = "Edit";
- this.buttonLineOcrEditLanguage.UseVisualStyleBackColor = true;
- this.buttonLineOcrEditLanguage.Click += new System.EventHandler(this.buttonLineOcrEditLanguage_Click);
- //
- // buttonLineOcrNewLanguage
- //
- this.buttonLineOcrNewLanguage.Location = new System.Drawing.Point(283, 97);
- this.buttonLineOcrNewLanguage.Name = "buttonLineOcrNewLanguage";
- this.buttonLineOcrNewLanguage.Size = new System.Drawing.Size(68, 21);
- this.buttonLineOcrNewLanguage.TabIndex = 40;
- this.buttonLineOcrNewLanguage.Text = "New";
- this.buttonLineOcrNewLanguage.UseVisualStyleBackColor = true;
- this.buttonLineOcrNewLanguage.Click += new System.EventHandler(this.buttonLineOcrNewLanguage_Click);
- //
- // label2
- //
- this.label2.AutoSize = true;
- this.label2.Location = new System.Drawing.Point(11, 101);
- this.label2.Name = "label2";
- this.label2.Size = new System.Drawing.Size(54, 13);
- this.label2.TabIndex = 35;
- this.label2.Text = "Language";
- //
- // comboBoxNOcrLanguage
- //
- this.comboBoxNOcrLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
- this.comboBoxNOcrLanguage.FormattingEnabled = true;
- this.comboBoxNOcrLanguage.Location = new System.Drawing.Point(74, 97);
- this.comboBoxNOcrLanguage.Name = "comboBoxNOcrLanguage";
- this.comboBoxNOcrLanguage.Size = new System.Drawing.Size(130, 21);
- this.comboBoxNOcrLanguage.TabIndex = 34;
- this.comboBoxNOcrLanguage.SelectedIndexChanged += new System.EventHandler(this.comboBoxNOcrLanguage_SelectedIndexChanged);
- //
- // checkBoxNOcrItalic
- //
- this.checkBoxNOcrItalic.AutoSize = true;
- this.checkBoxNOcrItalic.Location = new System.Drawing.Point(15, 42);
- this.checkBoxNOcrItalic.Name = "checkBoxNOcrItalic";
- this.checkBoxNOcrItalic.Size = new System.Drawing.Size(92, 17);
- this.checkBoxNOcrItalic.TabIndex = 8;
- this.checkBoxNOcrItalic.Text = "Contains italic";
- this.checkBoxNOcrItalic.UseVisualStyleBackColor = true;
- //
- // checkBoxNOcrCorrect
- //
- this.checkBoxNOcrCorrect.AutoSize = true;
- this.checkBoxNOcrCorrect.Location = new System.Drawing.Point(235, 17);
- this.checkBoxNOcrCorrect.Name = "checkBoxNOcrCorrect";
- this.checkBoxNOcrCorrect.Size = new System.Drawing.Size(116, 17);
- this.checkBoxNOcrCorrect.TabIndex = 7;
- this.checkBoxNOcrCorrect.Text = "Draw missing texts";
- this.checkBoxNOcrCorrect.UseVisualStyleBackColor = true;
- //
- // checkBoxRightToLeftNOCR
- //
- this.checkBoxRightToLeftNOCR.AutoSize = true;
- this.checkBoxRightToLeftNOCR.Location = new System.Drawing.Point(235, 40);
- this.checkBoxRightToLeftNOCR.Name = "checkBoxRightToLeftNOCR";
- this.checkBoxRightToLeftNOCR.Size = new System.Drawing.Size(83, 17);
- this.checkBoxRightToLeftNOCR.TabIndex = 6;
- this.checkBoxRightToLeftNOCR.Text = "Right to left";
- this.checkBoxRightToLeftNOCR.UseVisualStyleBackColor = true;
- //
- // numericUpDownNumberOfPixelsIsSpaceNOCR
- //
- this.numericUpDownNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(122, 17);
- this.numericUpDownNumberOfPixelsIsSpaceNOCR.Maximum = new decimal(new int[] {
- 50,
- 0,
- 0,
- 0});
- this.numericUpDownNumberOfPixelsIsSpaceNOCR.Minimum = new decimal(new int[] {
- 1,
- 0,
- 0,
- 0});
- this.numericUpDownNumberOfPixelsIsSpaceNOCR.Name = "numericUpDownNumberOfPixelsIsSpaceNOCR";
- this.numericUpDownNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(50, 21);
- this.numericUpDownNumberOfPixelsIsSpaceNOCR.TabIndex = 5;
- this.numericUpDownNumberOfPixelsIsSpaceNOCR.Value = new decimal(new int[] {
- 12,
- 0,
- 0,
- 0});
- //
- // labelNumberOfPixelsIsSpaceNOCR
- //
- this.labelNumberOfPixelsIsSpaceNOCR.AutoSize = true;
- this.labelNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(12, 20);
- this.labelNumberOfPixelsIsSpaceNOCR.Name = "labelNumberOfPixelsIsSpaceNOCR";
- this.labelNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(104, 13);
- this.labelNumberOfPixelsIsSpaceNOCR.TabIndex = 4;
- this.labelNumberOfPixelsIsSpaceNOCR.Text = "No of pixels is space";
- //
// groupBoxOCRControls
//
this.groupBoxOCRControls.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right)));
@@ -1595,12 +1591,28 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.timerHideStatus.Interval = 2000;
this.timerHideStatus.Tick += new System.EventHandler(this.timerHideStatus_Tick);
//
- // setForecolorThresholdToolStripMenuItem
+ // comboBoxTesseractEngineMode
//
- this.setForecolorThresholdToolStripMenuItem.Name = "setForecolorThresholdToolStripMenuItem";
- this.setForecolorThresholdToolStripMenuItem.Size = new System.Drawing.Size(305, 22);
- this.setForecolorThresholdToolStripMenuItem.Text = "Set forecolor threshold...";
- this.setForecolorThresholdToolStripMenuItem.Click += new System.EventHandler(this.setForecolorThresholdToolStripMenuItem_Click);
+ this.comboBoxTesseractEngineMode.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
+ this.comboBoxTesseractEngineMode.FormattingEnabled = true;
+ this.comboBoxTesseractEngineMode.Items.AddRange(new object[] {
+ "Original Tesseract only (can detect italic)",
+ "Neural nets LSTM only ",
+ "Tesseract + LSTM",
+ "Default, based on what is available"});
+ this.comboBoxTesseractEngineMode.Location = new System.Drawing.Point(98, 100);
+ this.comboBoxTesseractEngineMode.Name = "comboBoxTesseractEngineMode";
+ this.comboBoxTesseractEngineMode.Size = new System.Drawing.Size(195, 21);
+ this.comboBoxTesseractEngineMode.TabIndex = 5;
+ //
+ // labelTesseractEngineMode
+ //
+ this.labelTesseractEngineMode.AutoSize = true;
+ this.labelTesseractEngineMode.Location = new System.Drawing.Point(18, 103);
+ this.labelTesseractEngineMode.Name = "labelTesseractEngineMode";
+ this.labelTesseractEngineMode.Size = new System.Drawing.Size(68, 13);
+ this.labelTesseractEngineMode.TabIndex = 6;
+ this.labelTesseractEngineMode.Text = "Engine mode";
//
// VobSubOcr
//
@@ -1629,10 +1641,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.Resize += new System.EventHandler(this.VobSubOcr_Resize);
this.contextMenuStripListview.ResumeLayout(false);
this.groupBoxOcrMethod.ResumeLayout(false);
- this.groupBoxImageCompareMethod.ResumeLayout(false);
- this.groupBoxImageCompareMethod.PerformLayout();
- ((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).EndInit();
- ((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).EndInit();
this.GroupBoxTesseractMethod.ResumeLayout(false);
this.GroupBoxTesseractMethod.PerformLayout();
this.groupBoxModiMethod.ResumeLayout(false);
@@ -1640,6 +1648,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.groupBoxNOCR.ResumeLayout(false);
this.groupBoxNOCR.PerformLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).EndInit();
+ this.groupBoxImageCompareMethod.ResumeLayout(false);
+ this.groupBoxImageCompareMethod.PerformLayout();
+ ((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).EndInit();
+ ((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).EndInit();
this.groupBoxOCRControls.ResumeLayout(false);
this.groupBoxOCRControls.PerformLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).EndInit();
@@ -1710,7 +1722,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private System.Windows.Forms.Label label1;
private System.Windows.Forms.GroupBox groupBoxModiMethod;
private System.Windows.Forms.GroupBox GroupBoxTesseractMethod;
- private System.Windows.Forms.CheckBox checkBoxUseModiInTesseractForUnknownWords;
private System.Windows.Forms.Label labelTesseractLanguage;
private System.Windows.Forms.ComboBox comboBoxTesseractLanguages;
private System.Windows.Forms.ContextMenuStrip contextMenuStripListview;
@@ -1801,5 +1812,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private System.Windows.Forms.Label labelMinLineSplitHeight;
private System.Windows.Forms.ComboBox comboBoxLineSplitMinLineHeight;
private System.Windows.Forms.ToolStripMenuItem setForecolorThresholdToolStripMenuItem;
+ private System.Windows.Forms.Label labelTesseractEngineMode;
+ private System.Windows.Forms.ComboBox comboBoxTesseractEngineMode;
}
}
\ No newline at end of file
diff --git a/src/Forms/Ocr/VobSubOcr.cs b/src/Forms/Ocr/VobSubOcr.cs
index 6c379a2e3..43609c95d 100644
--- a/src/Forms/Ocr/VobSubOcr.cs
+++ b/src/Forms/Ocr/VobSubOcr.cs
@@ -302,6 +302,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private string[] _tesseractAsyncStrings;
private int _tesseractAsyncIndex;
private BackgroundWorker _tesseractThread;
+ private int _tesseractEngineMode;
private readonly DateTime _windowStartTime = DateTime.Now;
private int _linesOcred;
@@ -426,9 +427,15 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
_ocrMethodImageCompare = 4;
}
- checkBoxUseModiInTesseractForUnknownWords.Text = language.TryModiForUnknownWords;
checkBoxTesseractItalicsOn.Checked = Configuration.Settings.VobSubOcr.UseItalicsInTesseract;
checkBoxTesseractItalicsOn.Text = Configuration.Settings.Language.General.Italic;
+ if (Configuration.Settings.VobSubOcr.TesseractEngineMode >= 0 &&
+ Configuration.Settings.VobSubOcr.TesseractEngineMode < comboBoxTesseractEngineMode.Items.Count)
+ {
+ comboBoxTesseractEngineMode.SelectedIndex = Configuration.Settings.VobSubOcr.TesseractEngineMode;
+ }
+ comboBoxTesseractEngineMode.Left = labelTesseractEngineMode.Left + labelTesseractEngineMode.Width + 5;
+ comboBoxTesseractEngineMode.Width = GroupBoxTesseractMethod.Width - comboBoxTesseractEngineMode.Left - 10;
checkBoxTesseractMusicOn.Checked = Configuration.Settings.VobSubOcr.UseMusicSymbolsInTesseract;
checkBoxTesseractMusicOn.Text = Configuration.Settings.Language.Settings.MusicSymbol;
@@ -5314,7 +5321,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private void FormVobSubOcr_Shown(object sender, EventArgs e)
{
- checkBoxUseModiInTesseractForUnknownWords.Checked = Configuration.Settings.VobSubOcr.UseModiInTesseractForUnknownWords;
if (_mp4List != null)
{
checkBoxShowOnlyForced.Visible = false;
@@ -5841,6 +5847,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private void ButtonStartOcrClick(object sender, EventArgs e)
{
+ _tesseractEngineMode = comboBoxTesseractEngineMode.SelectedIndex;
_isLatinDb = comboBoxCharacterDatabase.SelectedItem != null && comboBoxCharacterDatabase.SelectedItem.ToString().Equals("Latin", StringComparison.Ordinal);
Configuration.Settings.VobSubOcr.RightToLeft = checkBoxRightToLeft.Checked;
_lastLine = null;
@@ -6144,11 +6151,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
var nbmp = new NikseBitmap(bmp);
nbmp.ReplaceYellowWithWhite(); // optimized replace
- string tempTiffFileName = Path.GetTempPath() + Guid.NewGuid() + ".png";
+ string pngFileName = Path.GetTempPath() + Guid.NewGuid() + ".png";
string tempTextFileName;
using (var b = nbmp.GetBitmap())
{
- b.Save(tempTiffFileName, System.Drawing.Imaging.ImageFormat.Png);
+ b.Save(pngFileName, System.Drawing.Imaging.ImageFormat.Png);
tempTextFileName = Path.GetTempPath() + Guid.NewGuid();
}
@@ -6156,10 +6163,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
process.StartInfo = new ProcessStartInfo(Configuration.TesseractDirectory + "tesseract.exe");
process.StartInfo.UseShellExecute = true;
- process.StartInfo.Arguments = "\"" + tempTiffFileName + "\" \"" + tempTextFileName + "\" -l " + language;
-
- if (checkBoxTesseractMusicOn.Checked)
- process.StartInfo.Arguments += "+music";
+ process.StartInfo.Arguments = "\"" + pngFileName + "\" \"" + tempTextFileName + "\" --oem " + _tesseractEngineMode + " -l " + language;
if (!string.IsNullOrEmpty(psmMode))
process.StartInfo.Arguments += " " + psmMode.Trim();
@@ -6186,11 +6190,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
if (Configuration.IsRunningOnLinux() || Configuration.IsRunningOnMac())
{
- MessageBox.Show("Unable to start 'Tesseract' - make sure tesseract-ocr 3.x is installed!");
+ MessageBox.Show("Unable to start 'Tesseract' - make sure tesseract-ocr 4.x is installed!");
}
else
{
- MessageBox.Show("Unable to start 'Tesseract' (" + Configuration.TesseractDirectory + "tesseract.exe) - make sure tesseract-ocr 3.x is installed!");
+ MessageBox.Show("Unable to start 'Tesseract' (" + Configuration.TesseractDirectory + "tesseract.exe) - make sure tesseract-ocr 4.x is installed!");
}
throw;
}
@@ -6209,7 +6213,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
result = ParseHocr(result);
File.Delete(outputFileName);
}
- File.Delete(tempTiffFileName);
+ File.Delete(pngFileName);
}
catch
{
@@ -6292,7 +6296,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
const int badWords = 0;
string textWithOutFixes;
- if (_tesseractAsyncStrings != null && !string.IsNullOrEmpty(_tesseractAsyncStrings[index]))
+ if (!string.IsNullOrEmpty(_tesseractAsyncStrings?[index]))
{
textWithOutFixes = _tesseractAsyncStrings[index];
}
@@ -6819,53 +6823,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
_ocrFixEngine.AutoGuessesUsed.Clear();
_ocrFixEngine.UnknownWordsFound.Clear();
-
- if (_modiEnabled && checkBoxUseModiInTesseractForUnknownWords.Checked)
- {
- // which is best - modi or Tesseract - we find out here
- string modiText = CallModi(index);
-
- if (modiText.Length == 0)
- modiText = CallModi(index); // retry... strange MODI
- if (modiText.Length == 0)
- modiText = CallModi(index); // retry... strange MODI
-
- if (modiText.Length > 1 &&
- !modiText.Contains("CD") &&
- (!modiText.Contains('0') || line.Contains('0')) &&
- (!modiText.Contains('2') || line.Contains('2')) &&
- (!modiText.Contains('3') || line.Contains('4')) &&
- (!modiText.Contains('5') || line.Contains('5')) &&
- (!modiText.Contains('9') || line.Contains('9')) &&
- (!modiText.Contains('•') || line.Contains('•')) &&
- (!modiText.Contains(')') || line.Contains(')')) &&
- Utilities.CountTagInText(modiText, '(') < 2 && Utilities.CountTagInText(modiText, ')') < 2 &&
- Utilities.GetNumberOfLines(modiText) < 4)
- {
- int modiWordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(modiText, out correctWords);
- //if (modiWordsNotFound > 0)
- {
- string modiTextOcrFixed = modiText;
- if (checkBoxAutoFixCommonErrors.Checked)
- modiTextOcrFixed = _ocrFixEngine.FixOcrErrors(modiText, index, _lastLine, false, GetAutoGuessLevel());
- int modiOcrCorrectedWordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(modiTextOcrFixed, out correctWords);
- if (modiOcrCorrectedWordsNotFound <= modiWordsNotFound)
- modiText = modiTextOcrFixed;
- }
-
- if (modiWordsNotFound < wordsNotFound || (textWithOutFixes.Length == 1 && modiWordsNotFound == 0))
- line = modiText; // use the modi OCR'ed text
- else if (wordsNotFound == modiWordsNotFound && modiText.EndsWith('!') && (line.EndsWith('l') || line.EndsWith('fl')))
- line = modiText;
- }
-
- // take the best option - before OCR fixing, which we do again to save suggestions and prompt for user input
- line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel());
- }
- else
- { // fix some error manually (modi not available)
- line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel());
- }
+ line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel());
}
if (_ocrFixEngine.Abort)
@@ -6875,39 +6833,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
return string.Empty;
}
- //check Tesseract... find an other way to do this...
- //string tmp = HtmlUtil.RemoveHtmlTags(line).Trim();
- //if (!tmp.TrimEnd().EndsWith("..."))
- //{
- // tmp = tmp.TrimEnd('.').TrimEnd();
- // if (tmp.Length > 2 && Utilities.LowercaseLetters.Contains(tmp[tmp.Length - 1]))
- // {
- // if (_nocrChars == null)
- // _nocrChars = LoadNOcrForTesseract("Nikse.SubtitleEdit.Resources.nOCR_TesseractHelper.xml.zip");
- // string text = HtmlUtil.RemoveHtmlTags(NocrFastCheck(bitmap).TrimEnd());
- // string post = string.Empty;
- // if (line.EndsWith(""))
- // {
- // post = "";
- // line = line.Remove(line.Length - 4, 4).Trim();
- // }
- // if (text.EndsWith('.'))
- // {
- // line = line.TrimEnd('.').Trim();
- // while (text.EndsWith('.') || text.EndsWith(' '))
- // {
- // line += text.Substring(text.Length - 1).Trim();
- // text = text.Remove(text.Length - 1, 1);
- // }
- // }
- // else if (text.EndsWith('l') && text.EndsWith('!') && !text.EndsWith("l!"))
- // {
- // line = line.Remove(line.Length - 1, 1) + "!";
- // }
- // line += post;
- // }
- //}
-
// Log used word guesses (via word replace list)
foreach (string guess in _ocrFixEngine.AutoGuessesUsed)
listBoxLogSuggestions.Items.Add(guess);
@@ -6938,7 +6863,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
if (textWithOutFixes.Trim() != line.Trim())
{
_tesseractOcrAutoFixes++;
- labelFixesMade.Text = string.Format(" - {0}", _tesseractOcrAutoFixes);
+ labelFixesMade.Text = $" - {_tesseractOcrAutoFixes}";
LogOcrFix(index, textWithOutFixes, line);
}
@@ -6990,9 +6915,20 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private string TesseractResizeAndRetry(Bitmap bitmap)
{
- string result = Tesseract3DoOcrViaExe(ResizeBitmap(bitmap, bitmap.Width * 3, bitmap.Height * 2), _languageId, null);
+ string result;
+ using (var b = ResizeBitmap(bitmap, bitmap.Width * 3, bitmap.Height * 2))
+ {
+ result = Tesseract3DoOcrViaExe(b, _languageId, null);
+ }
+
if (string.IsNullOrWhiteSpace(result))
- result = Tesseract3DoOcrViaExe(ResizeBitmap(bitmap, bitmap.Width * 4, bitmap.Height * 2), _languageId, "-psm 7");
+ {
+ using (var b = ResizeBitmap(bitmap, bitmap.Width * 4, bitmap.Height * 2))
+ {
+ result = Tesseract3DoOcrViaExe(b, _languageId, "-psm 7");
+ }
+ }
+
return result.TrimEnd();
}
@@ -7108,7 +7044,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private void InitializeModi()
{
_modiEnabled = false;
- checkBoxUseModiInTesseractForUnknownWords.Enabled = false;
comboBoxModiLanguage.Enabled = false;
try
{
@@ -7119,7 +7054,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
_modiEnabled = _modiDoc != null;
comboBoxModiLanguage.Enabled = _modiEnabled;
- checkBoxUseModiInTesseractForUnknownWords.Enabled = _modiEnabled;
}
catch
{
@@ -7523,17 +7457,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
comboBoxDictionaries_SelectedIndexChanged(null, null);
}
- if (_modiEnabled && checkBoxUseModiInTesseractForUnknownWords.Checked)
- {
- string tesseractLanguageText = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Text;
- int i = 0;
- foreach (var modiLanguage in comboBoxModiLanguage.Items)
- {
- if ((modiLanguage as ModiLanguage).Text == tesseractLanguageText)
- comboBoxModiLanguage.SelectedIndex = i;
- i++;
- }
- }
comboBoxModiLanguage.SelectedIndex = -1;
}
@@ -8430,8 +8353,9 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
DisposeImageCompareBitmaps();
Configuration.Settings.VobSubOcr.UseItalicsInTesseract = checkBoxTesseractItalicsOn.Checked;
+ if (comboBoxTesseractEngineMode.SelectedIndex != -1)
+ Configuration.Settings.VobSubOcr.TesseractEngineMode = comboBoxTesseractEngineMode.SelectedIndex;
Configuration.Settings.VobSubOcr.ItalicFactor = _unItalicFactor;
- Configuration.Settings.VobSubOcr.UseModiInTesseractForUnknownWords = checkBoxUseModiInTesseractForUnknownWords.Checked;
Configuration.Settings.VobSubOcr.PromptForUnknownWords = checkBoxPromptForUnknownWords.Checked;
Configuration.Settings.VobSubOcr.GuessUnknownWords = checkBoxGuessUnknownWords.Checked;
Configuration.Settings.VobSubOcr.AutoBreakSubtitleIfMoreThanTwoLines = checkBoxAutoBreakLines.Checked;