Update Tesseract OCR from 3.02 to 4.0 (alpha)

This commit is contained in:
Nikolaj Olsson 2018-03-27 17:24:47 +02:00
parent 8361d7664e
commit 696b529c37
18 changed files with 539 additions and 458 deletions

View File

@ -21,6 +21,7 @@
* Update Romanian translation - thx Mircea * Update Romanian translation - thx Mircea
* Update Basque translation - thx Xabier * Update Basque translation - thx Xabier
* Update Portuguese translation - thx moob * Update Portuguese translation - thx moob
* Update Tesseract OCR from 3.02 to 4.0 (alpha)
* Ctrl+a/ctrl+d/ctrl+shift+i works in more lists - thx tormento * Ctrl+a/ctrl+d/ctrl+shift+i works in more lists - thx tormento
* Remember OCR spell check dictionary for tesseract - thx raymondjpg * Remember OCR spell check dictionary for tesseract - thx raymondjpg
* FIXED: * FIXED:
@ -37,6 +38,8 @@
* Do not allow navigating before zero in video - thx darnn * Do not allow navigating before zero in video - thx darnn
* Fix issue with nested tags in "Remove text for HI" - thx darnn * Fix issue with nested tags in "Remove text for HI" - thx darnn
* Fix image render issue regarding italic/font - thx Cemal * Fix image render issue regarding italic/font - thx Cemal
* Fix bottom margin in batch image export - thx Cemal
* Fix possible crash in list view - thx lambdacore12
3.5.6 (27th February 2018) 3.5.6 (27th February 2018)
* NEW: * NEW:

View File

@ -2318,7 +2318,6 @@ Keep changes?</KeepChangesMessage>
<SaveAllSubtitleImagesAsBdnXml>Save all images (png/bdn xml)...</SaveAllSubtitleImagesAsBdnXml> <SaveAllSubtitleImagesAsBdnXml>Save all images (png/bdn xml)...</SaveAllSubtitleImagesAsBdnXml>
<SaveAllSubtitleImagesWithHtml>Save all images with HTML index...</SaveAllSubtitleImagesWithHtml> <SaveAllSubtitleImagesWithHtml>Save all images with HTML index...</SaveAllSubtitleImagesWithHtml>
<XImagesSavedInY>{0} images saved in {1}</XImagesSavedInY> <XImagesSavedInY>{0} images saved in {1}</XImagesSavedInY>
<TryModiForUnknownWords>Try Microsoft MODI OCR for unknown words</TryModiForUnknownWords>
<DictionaryX>Dictionary: {0}</DictionaryX> <DictionaryX>Dictionary: {0}</DictionaryX>
<RightToLeft>Right to left</RightToLeft> <RightToLeft>Right to left</RightToLeft>
<ShowOnlyForcedSubtitles>Show only forced subtitles</ShowOnlyForcedSubtitles> <ShowOnlyForcedSubtitles>Show only forced subtitles</ShowOnlyForcedSubtitles>

View File

@ -0,0 +1,3 @@
tessedit_create_hocr 1
tessedit_pageseg_mode 1
hocr_font_info 0

Binary file not shown.

BIN
Tesseract4/tesseract.exe Normal file

Binary file not shown.

View File

@ -91,9 +91,9 @@ PUSHD "src\bin\Release"
IF EXIST "temp_zip" RD /S /Q "temp_zip" IF EXIST "temp_zip" RD /S /Q "temp_zip"
IF NOT EXIST "temp_zip" MD "temp_zip" IF NOT EXIST "temp_zip" MD "temp_zip"
IF NOT EXIST "temp_zip\Languages" MD "temp_zip\Languages" IF NOT EXIST "temp_zip\Languages" MD "temp_zip\Languages"
IF NOT EXIST "temp_zip\Tesseract" MD "temp_zip\Tesseract" IF NOT EXIST "temp_zip\Tesseract4" MD "temp_zip\Tesseract4"
IF NOT EXIST "temp_zip\Tesseract\tessdata" MD "temp_zip\Tesseract\tessdata" IF NOT EXIST "temp_zip\Tesseract4\tessdata" MD "temp_zip\Tesseract4\tessdata"
IF NOT EXIST "temp_zip\Tesseract\tessdata\configs" MD "temp_zip\Tesseract\tessdata\configs" IF NOT EXIST "temp_zip\Tesseract4\tessdata\configs" MD "temp_zip\Tesseract4\tessdata\configs"
COPY /Y /V "..\..\..\LICENSE.txt" "temp_zip\" COPY /Y /V "..\..\..\LICENSE.txt" "temp_zip\"
COPY /Y /V "..\..\..\Changelog.txt" "temp_zip\" COPY /Y /V "..\..\..\Changelog.txt" "temp_zip\"
@ -101,11 +101,9 @@ COPY /Y /V "Hunspellx86.dll" "temp_zip\"
COPY /Y /V "Hunspellx64.dll" "temp_zip\" COPY /Y /V "Hunspellx64.dll" "temp_zip\"
COPY /Y /V "SubtitleEdit.exe" "temp_zip\" COPY /Y /V "SubtitleEdit.exe" "temp_zip\"
COPY /Y /V "Languages\*.xml" "temp_zip\Languages\" COPY /Y /V "Languages\*.xml" "temp_zip\Languages\"
COPY /Y /V "..\..\..\Tesseract\msvcp90.dll" "temp_zip\Tesseract\" COPY /Y /V "..\..\..\Tesseract4\tesseract.exe" "temp_zip\Tesseract4\"
COPY /Y /V "..\..\..\Tesseract\msvcr90.dll" "temp_zip\Tesseract\" COPY /Y /V "..\..\..\Tesseract4\tessdata\configs\hocr" "temp_zip\Tesseract4\tessdata\configs\"
COPY /Y /V "..\..\..\Tesseract\tesseract.exe" "temp_zip\Tesseract\" COPY /Y /V "..\..\..\Tesseract4\tessdata\*.traineddata" "temp_zip\Tesseract4\tessdata\"
COPY /Y /V "..\..\..\Tesseract\tessdata\configs\hocr" "temp_zip\Tesseract\tessdata\configs\"
COPY /Y /V "..\..\..\Tesseract\tessdata\*.traineddata" "temp_zip\Tesseract\tessdata\"
PUSHD "temp_zip" PUSHD "temp_zip"
START "" /B /WAIT "%SEVENZIP%" a -tzip -mx=9 "SE%VERSION%.zip" * >NUL START "" /B /WAIT "%SEVENZIP%" a -tzip -mx=9 "SE%VERSION%.zip" * >NUL

View File

@ -266,12 +266,9 @@ Source: {#bindir}\SubtitleEdit.exe; DestDir: {app};
Source: {#bindir}\SubtitleEdit.resources.dll; DestDir: {app}; Flags: ignoreversion; Components: main Source: {#bindir}\SubtitleEdit.resources.dll; DestDir: {app}; Flags: ignoreversion; Components: main
Source: ..\Changelog.txt; DestDir: {app}; Flags: ignoreversion; Components: main Source: ..\Changelog.txt; DestDir: {app}; Flags: ignoreversion; Components: main
Source: ..\LICENSE.txt; DestDir: {app}; Flags: ignoreversion; Components: main Source: ..\LICENSE.txt; DestDir: {app}; Flags: ignoreversion; Components: main
Source: ..\Tesseract\msvcp90.dll; DestDir: {app}\Tesseract; Flags: ignoreversion; Components: main Source: ..\Tesseract4\tessdata\configs\hocr; DestDir: {app}\Tesseract4\tessdata\configs; Flags: ignoreversion; Components: main
Source: ..\Tesseract\msvcr90.dll; DestDir: {app}\Tesseract; Flags: ignoreversion; Components: main Source: ..\Tesseract4\tessdata\eng.traineddata; DestDir: {app}\Tesseract4\tessdata; Flags: ignoreversion; Components: main
Source: ..\Tesseract\tessdata\configs\hocr; DestDir: {app}\Tesseract\tessdata\configs; Flags: ignoreversion; Components: main Source: ..\Tesseract4\tesseract.exe; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main
Source: ..\Tesseract\tessdata\eng.traineddata; DestDir: {app}\Tesseract\tessdata; Flags: ignoreversion; Components: main
Source: ..\Tesseract\tessdata\music.traineddata; DestDir: {app}\Tesseract\tessdata; Flags: ignoreversion; Components: main
Source: ..\Tesseract\tesseract.exe; DestDir: {app}\Tesseract; Flags: ignoreversion; Components: main
Source: Icons\uninstall.ico; DestDir: {app}\Icons; Flags: ignoreversion; Components: main Source: Icons\uninstall.ico; DestDir: {app}\Icons; Flags: ignoreversion; Components: main

View File

@ -23,7 +23,7 @@ namespace Nikse.SubtitleEdit.Core
public static readonly string SceneChangesDirectory = DataDirectory + "SceneChanges" + Path.DirectorySeparatorChar; public static readonly string SceneChangesDirectory = DataDirectory + "SceneChanges" + Path.DirectorySeparatorChar;
public static readonly string AutoBackupDirectory = DataDirectory + "AutoBackup" + Path.DirectorySeparatorChar; public static readonly string AutoBackupDirectory = DataDirectory + "AutoBackup" + Path.DirectorySeparatorChar;
public static readonly string VobSubCompareDirectory = DataDirectory + "VobSub" + Path.DirectorySeparatorChar; public static readonly string VobSubCompareDirectory = DataDirectory + "VobSub" + Path.DirectorySeparatorChar;
public static readonly string TesseractDirectory = DataDirectory + "Tesseract" + Path.DirectorySeparatorChar; public static readonly string TesseractDirectory = DataDirectory + "Tesseract4" + Path.DirectorySeparatorChar;
public static readonly string WaveformsDirectory = DataDirectory + "Waveforms" + Path.DirectorySeparatorChar; public static readonly string WaveformsDirectory = DataDirectory + "Waveforms" + Path.DirectorySeparatorChar;
public static readonly string PluginsDirectory = DataDirectory + "Plugins" + Path.DirectorySeparatorChar; public static readonly string PluginsDirectory = DataDirectory + "Plugins" + Path.DirectorySeparatorChar;
public static readonly string IconsDirectory = BaseDirectory + "Icons" + Path.DirectorySeparatorChar; public static readonly string IconsDirectory = BaseDirectory + "Icons" + Path.DirectorySeparatorChar;
@ -54,21 +54,9 @@ namespace Nikse.SubtitleEdit.Core
} }
public static Settings Settings public static Settings Settings => Instance.Value._settings.Value;
{
get
{
return Instance.Value._settings.Value;
}
}
public static IEnumerable<Encoding> AvailableEncodings public static IEnumerable<Encoding> AvailableEncodings => Instance.Value._encodings;
{
get
{
return Instance.Value._encodings;
}
}
private static string GetInstallerPath() private static string GetInstallerPath()
{ {
@ -117,6 +105,7 @@ namespace Nikse.SubtitleEdit.Core
} }
catch catch
{ {
// ignored
} }
} }
Directory.CreateDirectory(Path.Combine(appDataRoamingPath, "Dictionaries")); Directory.CreateDirectory(Path.Combine(appDataRoamingPath, "Dictionaries"));

View File

@ -2649,7 +2649,6 @@ Keep changes?",
SaveAllSubtitleImagesAsBdnXml = "Save all images (png/bdn xml)...", SaveAllSubtitleImagesAsBdnXml = "Save all images (png/bdn xml)...",
SaveAllSubtitleImagesWithHtml = "Save all images with HTML index...", SaveAllSubtitleImagesWithHtml = "Save all images with HTML index...",
XImagesSavedInY = "{0} images saved in {1}", XImagesSavedInY = "{0} images saved in {1}",
TryModiForUnknownWords = "Try Microsoft MODI OCR for unknown words",
DictionaryX = "Dictionary: {0}", DictionaryX = "Dictionary: {0}",
RightToLeft = "Right to left", RightToLeft = "Right to left",
ShowOnlyForcedSubtitles = "Show only forced subtitles", ShowOnlyForcedSubtitles = "Show only forced subtitles",

View File

@ -6259,9 +6259,6 @@ namespace Nikse.SubtitleEdit.Core
case "VobSubOcr/XImagesSavedInY": case "VobSubOcr/XImagesSavedInY":
language.VobSubOcr.XImagesSavedInY = reader.Value; language.VobSubOcr.XImagesSavedInY = reader.Value;
break; break;
case "VobSubOcr/TryModiForUnknownWords":
language.VobSubOcr.TryModiForUnknownWords = reader.Value;
break;
case "VobSubOcr/DictionaryX": case "VobSubOcr/DictionaryX":
language.VobSubOcr.DictionaryX = reader.Value; language.VobSubOcr.DictionaryX = reader.Value;
break; break;

View File

@ -2524,7 +2524,6 @@
public string SaveAllSubtitleImagesAsBdnXml { get; set; } public string SaveAllSubtitleImagesAsBdnXml { get; set; }
public string SaveAllSubtitleImagesWithHtml { get; set; } public string SaveAllSubtitleImagesWithHtml { get; set; }
public string XImagesSavedInY { get; set; } public string XImagesSavedInY { get; set; }
public string TryModiForUnknownWords { get; set; }
public string DictionaryX { get; set; } public string DictionaryX { get; set; }
public string RightToLeft { get; set; } public string RightToLeft { get; set; }
public string ShowOnlyForcedSubtitles { get; set; } public string ShowOnlyForcedSubtitles { get; set; }

View File

@ -531,6 +531,7 @@
<Compile Include="TarHeader.cs" /> <Compile Include="TarHeader.cs" />
<Compile Include="TarReader.cs" /> <Compile Include="TarReader.cs" />
<Compile Include="TaskbarList.cs" /> <Compile Include="TaskbarList.cs" />
<Compile Include="TesseractDictionary.cs" />
<Compile Include="TextDraw.cs" /> <Compile Include="TextDraw.cs" />
<Compile Include="TextEncodingExtensions.cs" /> <Compile Include="TextEncodingExtensions.cs" />
<Compile Include="TimeCode.cs" /> <Compile Include="TimeCode.cs" />

View File

@ -865,8 +865,8 @@ namespace Nikse.SubtitleEdit.Core
public int LastModiLanguageId { get; set; } public int LastModiLanguageId { get; set; }
public string LastOcrMethod { get; set; } public string LastOcrMethod { get; set; }
public string TesseractLastLanguage { get; set; } public string TesseractLastLanguage { get; set; }
public bool UseModiInTesseractForUnknownWords { get; set; }
public bool UseItalicsInTesseract { get; set; } public bool UseItalicsInTesseract { get; set; }
public int TesseractEngineMode { get; set; }
public bool UseMusicSymbolsInTesseract { get; set; } public bool UseMusicSymbolsInTesseract { get; set; }
public bool RightToLeft { get; set; } public bool RightToLeft { get; set; }
public bool TopToBottom { get; set; } public bool TopToBottom { get; set; }
@ -2680,12 +2680,12 @@ namespace Nikse.SubtitleEdit.Core
subNode = node.SelectSingleNode("TesseractLastLanguage"); subNode = node.SelectSingleNode("TesseractLastLanguage");
if (subNode != null) if (subNode != null)
settings.VobSubOcr.TesseractLastLanguage = subNode.InnerText; settings.VobSubOcr.TesseractLastLanguage = subNode.InnerText;
subNode = node.SelectSingleNode("UseModiInTesseractForUnknownWords");
if (subNode != null)
settings.VobSubOcr.UseModiInTesseractForUnknownWords = Convert.ToBoolean(subNode.InnerText);
subNode = node.SelectSingleNode("UseItalicsInTesseract"); subNode = node.SelectSingleNode("UseItalicsInTesseract");
if (subNode != null) if (subNode != null)
settings.VobSubOcr.UseItalicsInTesseract = Convert.ToBoolean(subNode.InnerText); settings.VobSubOcr.UseItalicsInTesseract = Convert.ToBoolean(subNode.InnerText);
subNode = node.SelectSingleNode("TesseractEngineMode");
if (subNode != null)
settings.VobSubOcr.TesseractEngineMode = Convert.ToInt32(subNode.InnerText);
subNode = node.SelectSingleNode("UseMusicSymbolsInTesseract"); subNode = node.SelectSingleNode("UseMusicSymbolsInTesseract");
if (subNode != null) if (subNode != null)
settings.VobSubOcr.UseMusicSymbolsInTesseract = Convert.ToBoolean(subNode.InnerText); settings.VobSubOcr.UseMusicSymbolsInTesseract = Convert.ToBoolean(subNode.InnerText);
@ -3808,8 +3808,8 @@ namespace Nikse.SubtitleEdit.Core
textWriter.WriteElementString("LastModiLanguageId", settings.VobSubOcr.LastModiLanguageId.ToString(CultureInfo.InvariantCulture)); textWriter.WriteElementString("LastModiLanguageId", settings.VobSubOcr.LastModiLanguageId.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("LastOcrMethod", settings.VobSubOcr.LastOcrMethod); textWriter.WriteElementString("LastOcrMethod", settings.VobSubOcr.LastOcrMethod);
textWriter.WriteElementString("TesseractLastLanguage", settings.VobSubOcr.TesseractLastLanguage); textWriter.WriteElementString("TesseractLastLanguage", settings.VobSubOcr.TesseractLastLanguage);
textWriter.WriteElementString("UseModiInTesseractForUnknownWords", settings.VobSubOcr.UseModiInTesseractForUnknownWords.ToString());
textWriter.WriteElementString("UseItalicsInTesseract", settings.VobSubOcr.UseItalicsInTesseract.ToString()); textWriter.WriteElementString("UseItalicsInTesseract", settings.VobSubOcr.UseItalicsInTesseract.ToString());
textWriter.WriteElementString("TesseractEngineMode", settings.VobSubOcr.TesseractEngineMode.ToString());
textWriter.WriteElementString("UseMusicSymbolsInTesseract", settings.VobSubOcr.UseMusicSymbolsInTesseract.ToString()); textWriter.WriteElementString("UseMusicSymbolsInTesseract", settings.VobSubOcr.UseMusicSymbolsInTesseract.ToString());
textWriter.WriteElementString("RightToLeft", settings.VobSubOcr.RightToLeft.ToString()); textWriter.WriteElementString("RightToLeft", settings.VobSubOcr.RightToLeft.ToString());
textWriter.WriteElementString("TopToBottom", settings.VobSubOcr.TopToBottom.ToString()); textWriter.WriteElementString("TopToBottom", settings.VobSubOcr.TopToBottom.ToString());

View File

@ -0,0 +1,195 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
namespace Nikse.SubtitleEdit.Core
{
public class TesseractDictionary
{
private const string DownloadUrlTemplate = "https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/{0}.traineddata";
/// <summary>
/// Dictionaries containing both 3.5 + 4.0 data - see https://github.com/tesseract-ocr/tessdata
/// </summary>
private static readonly string[] Dictionaries =
{
"afr",
"amh",
"ara",
"asm",
"aze",
"aze_cyrl",
"bel",
"ben",
"bod",
"bos",
"bre",
"bul",
"cat",
"ceb",
"ces",
"chi_sim",
"chi_sim_vert",
"chi_tra",
"chi_tra_vert",
"chr",
"cos",
"cym",
"dan",
"deu",
"div",
"dzo",
"ell",
"eng",
"enm",
"epo",
"equ",
"est",
"eus",
"fao",
"fas",
"fil",
"fin",
"fra",
"frk",
"frm",
"fry",
"gla",
"gle",
"glg",
"grc",
"guj",
"hat",
"heb",
"hin",
"hrv",
"hun",
"hye",
"iku",
"ind",
"isl",
"ita",
"jav",
"jpn",
"jpn_vert",
"kan",
"kat",
"kaz",
"khm",
"kir",
"kor",
"kor_vert",
"kur",
"kur_ara",
"lao",
"lat",
"lav",
"lit",
"ltz",
"mal",
"mar",
"mkd",
"mlt",
"mon",
"mri",
"msa",
"mya",
"nep",
"nld",
"nor",
"oci",
"ori",
"osd",
"pan",
"pol",
"por",
"pus",
"que",
"ron",
"rus",
"san",
"sin",
"slk",
"slv",
"snd",
"spa",
"sqi",
"srp",
"srp_latn",
"sun",
"swa",
"swe",
"syr",
"tam",
"tat",
"tel",
"tgk",
"tgl",
"tha",
"tir",
"ton",
"tur",
"uig",
"ukr",
"urd",
"uzb",
"uzb_cyrl",
"vie",
"yid",
"yor"
};
public string Code { get; set; }
public string Name { get; set; }
public string Url { get; set; }
public static List<TesseractDictionary> List()
{
var list = new List<TesseractDictionary>();
var cultures = CultureInfo.GetCultures(CultureTypes.NeutralCultures);
foreach (var dictionary in Dictionaries)
{
list.Add(new TesseractDictionary
{
Name = MakeName(dictionary, cultures),
Code = dictionary,
Url = string.Format(DownloadUrlTemplate, dictionary)
});
}
return list;
}
private static string MakeName(string dictionary, CultureInfo[] cultures)
{
string code = dictionary;
string post = string.Empty;
var idx = code.IndexOf('_');
if (idx > 0)
{
post = $" ({code.Substring(idx).Trim('_')})";
code = code.Substring(0, idx).Trim('_');
}
try
{
var cultureInfo = cultures.FirstOrDefault(ci => string.Equals(ci.ThreeLetterISOLanguageName, code, StringComparison.OrdinalIgnoreCase));
if (cultureInfo != null)
code = cultureInfo.EnglishName;
}
catch
{
// ignore
}
return code + post;
}
public override string ToString()
{
return Name;
}
}
}

View File

@ -1,6 +1,6 @@
namespace Nikse.SubtitleEdit.Forms namespace Nikse.SubtitleEdit.Forms
{ {
partial class GetTesseractDictionaries sealed partial class GetTesseractDictionaries
{ {
/// <summary> /// <summary>
/// Required designer variable. /// Required designer variable.

View File

@ -6,17 +6,14 @@ using System.IO;
using System.IO.Compression; using System.IO.Compression;
using System.Net; using System.Net;
using System.Windows.Forms; using System.Windows.Forms;
using System.Xml;
namespace Nikse.SubtitleEdit.Forms namespace Nikse.SubtitleEdit.Forms
{ {
public partial class GetTesseractDictionaries : Form public sealed partial class GetTesseractDictionaries : Form
{ {
private List<string> _dictionaryDownloadLinks = new List<string>(); private string _dictionaryFileName;
private List<string> _descriptions = new List<string>();
private string _xmlName = null;
private string _dictionaryFileName = null;
internal string ChosenLanguage { get; private set; } internal string ChosenLanguage { get; private set; }
private readonly List<TesseractDictionary> _dictionaries;
public GetTesseractDictionaries() public GetTesseractDictionaries()
{ {
@ -31,57 +28,24 @@ namespace Nikse.SubtitleEdit.Forms
buttonDownload.Text = Configuration.Settings.Language.GetTesseractDictionaries.Download; buttonDownload.Text = Configuration.Settings.Language.GetTesseractDictionaries.Download;
labelPleaseWait.Text = string.Empty; labelPleaseWait.Text = string.Empty;
buttonOK.Text = Configuration.Settings.Language.General.Ok; buttonOK.Text = Configuration.Settings.Language.General.Ok;
LoadDictionaryList("Nikse.SubtitleEdit.Resources.TesseractDictionaries.xml.gz");
FixLargeFonts(); FixLargeFonts();
_dictionaries = TesseractDictionary.List();
LoadDictionaryList();
} }
private void LoadDictionaryList(string xmlRessourceName) private void LoadDictionaryList()
{ {
_dictionaryDownloadLinks = new List<string>(); comboBoxDictionaries.BeginUpdate();
_descriptions = new List<string>(); comboBoxDictionaries.Items.Clear();
_xmlName = xmlRessourceName; foreach (var d in _dictionaries)
System.Reflection.Assembly asm = System.Reflection.Assembly.GetExecutingAssembly();
Stream strm = asm.GetManifestResourceStream(_xmlName);
if (strm != null)
{ {
comboBoxDictionaries.Items.Clear(); if (!string.IsNullOrEmpty(d.Url))
XmlDocument doc = new XmlDocument();
using (var rdr = new StreamReader(strm))
using (var zip = new GZipStream(rdr.BaseStream, CompressionMode.Decompress))
{ {
byte[] data = new byte[195000]; comboBoxDictionaries.Items.Add(d);
int bytesRead = zip.Read(data, 0, data.Length);
var s = System.Text.Encoding.UTF8.GetString(data, 0, bytesRead).Trim();
try
{
doc.LoadXml(s);
}
catch (Exception exception)
{
MessageBox.Show(exception.Message);
}
}
foreach (XmlNode node in doc.DocumentElement.SelectNodes("Dictionary"))
{
string englishName = node.SelectSingleNode("EnglishName").InnerText;
string downloadLink = node.SelectSingleNode("DownloadLink").InnerText;
string description = string.Empty;
if (node.SelectSingleNode("Description") != null)
description = node.SelectSingleNode("Description").InnerText;
if (!string.IsNullOrEmpty(downloadLink))
{
string name = englishName;
comboBoxDictionaries.Items.Add(name);
_dictionaryDownloadLinks.Add(downloadLink);
_descriptions.Add(description);
}
comboBoxDictionaries.SelectedIndex = 0;
} }
} }
comboBoxDictionaries.SelectedIndex = 0;
comboBoxDictionaries.EndUpdate();
comboBoxDictionaries.AutoCompleteSource = AutoCompleteSource.ListItems; comboBoxDictionaries.AutoCompleteSource = AutoCompleteSource.ListItems;
comboBoxDictionaries.AutoCompleteMode = AutoCompleteMode.Append; comboBoxDictionaries.AutoCompleteMode = AutoCompleteMode.Append;
} }
@ -105,7 +69,7 @@ namespace Nikse.SubtitleEdit.Forms
Cursor = Cursors.WaitCursor; Cursor = Cursors.WaitCursor;
int index = comboBoxDictionaries.SelectedIndex; int index = comboBoxDictionaries.SelectedIndex;
string url = _dictionaryDownloadLinks[index]; string url = _dictionaries[index].Url;
ChosenLanguage = comboBoxDictionaries.Items[index].ToString(); ChosenLanguage = comboBoxDictionaries.Items[index].ToString();
var wc = new WebClient { Proxy = Utilities.GetProxy() }; var wc = new WebClient { Proxy = Utilities.GetProxy() };

View File

@ -39,6 +39,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.nOcrTrainingToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.nOcrTrainingToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
this.toolStripSeparator4 = new System.Windows.Forms.ToolStripSeparator(); this.toolStripSeparator4 = new System.Windows.Forms.ToolStripSeparator();
this.toolStripMenuItemSetUnItalicFactor = new System.Windows.Forms.ToolStripMenuItem(); this.toolStripMenuItemSetUnItalicFactor = new System.Windows.Forms.ToolStripMenuItem();
this.setForecolorThresholdToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
this.toolStripSeparator3 = new System.Windows.Forms.ToolStripSeparator(); this.toolStripSeparator3 = new System.Windows.Forms.ToolStripSeparator();
this.deleteToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.deleteToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
this.labelSubtitleText = new System.Windows.Forms.Label(); this.labelSubtitleText = new System.Windows.Forms.Label();
@ -48,23 +49,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.buttonCancel = new System.Windows.Forms.Button(); this.buttonCancel = new System.Windows.Forms.Button();
this.groupBoxOcrMethod = new System.Windows.Forms.GroupBox(); this.groupBoxOcrMethod = new System.Windows.Forms.GroupBox();
this.comboBoxOcrMethod = new System.Windows.Forms.ComboBox(); this.comboBoxOcrMethod = new System.Windows.Forms.ComboBox();
this.groupBoxImageCompareMethod = new System.Windows.Forms.GroupBox();
this.labelMinLineSplitHeight = new System.Windows.Forms.Label();
this.comboBoxLineSplitMinLineHeight = new System.Windows.Forms.ComboBox();
this.labelMaxErrorPercent = new System.Windows.Forms.Label();
this.numericUpDownMaxErrorPct = new System.Windows.Forms.NumericUpDown();
this.checkBoxRightToLeft = new System.Windows.Forms.CheckBox();
this.numericUpDownPixelsIsSpace = new System.Windows.Forms.NumericUpDown();
this.buttonEditCharacterDatabase = new System.Windows.Forms.Button();
this.labelNoOfPixelsIsSpace = new System.Windows.Forms.Label();
this.comboBoxCharacterDatabase = new System.Windows.Forms.ComboBox();
this.labelImageDatabase = new System.Windows.Forms.Label();
this.buttonNewCharacterDatabase = new System.Windows.Forms.Button();
this.GroupBoxTesseractMethod = new System.Windows.Forms.GroupBox(); this.GroupBoxTesseractMethod = new System.Windows.Forms.GroupBox();
this.buttonGetTesseractDictionaries = new System.Windows.Forms.Button(); this.buttonGetTesseractDictionaries = new System.Windows.Forms.Button();
this.checkBoxTesseractMusicOn = new System.Windows.Forms.CheckBox(); this.checkBoxTesseractMusicOn = new System.Windows.Forms.CheckBox();
this.checkBoxTesseractItalicsOn = new System.Windows.Forms.CheckBox(); this.checkBoxTesseractItalicsOn = new System.Windows.Forms.CheckBox();
this.checkBoxUseModiInTesseractForUnknownWords = new System.Windows.Forms.CheckBox();
this.labelTesseractLanguage = new System.Windows.Forms.Label(); this.labelTesseractLanguage = new System.Windows.Forms.Label();
this.comboBoxTesseractLanguages = new System.Windows.Forms.ComboBox(); this.comboBoxTesseractLanguages = new System.Windows.Forms.ComboBox();
this.groupBoxModiMethod = new System.Windows.Forms.GroupBox(); this.groupBoxModiMethod = new System.Windows.Forms.GroupBox();
@ -80,6 +68,18 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.checkBoxRightToLeftNOCR = new System.Windows.Forms.CheckBox(); this.checkBoxRightToLeftNOCR = new System.Windows.Forms.CheckBox();
this.numericUpDownNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.NumericUpDown(); this.numericUpDownNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.NumericUpDown();
this.labelNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.Label(); this.labelNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.Label();
this.groupBoxImageCompareMethod = new System.Windows.Forms.GroupBox();
this.labelMinLineSplitHeight = new System.Windows.Forms.Label();
this.comboBoxLineSplitMinLineHeight = new System.Windows.Forms.ComboBox();
this.labelMaxErrorPercent = new System.Windows.Forms.Label();
this.numericUpDownMaxErrorPct = new System.Windows.Forms.NumericUpDown();
this.checkBoxRightToLeft = new System.Windows.Forms.CheckBox();
this.numericUpDownPixelsIsSpace = new System.Windows.Forms.NumericUpDown();
this.buttonEditCharacterDatabase = new System.Windows.Forms.Button();
this.labelNoOfPixelsIsSpace = new System.Windows.Forms.Label();
this.comboBoxCharacterDatabase = new System.Windows.Forms.ComboBox();
this.labelImageDatabase = new System.Windows.Forms.Label();
this.buttonNewCharacterDatabase = new System.Windows.Forms.Button();
this.groupBoxOCRControls = new System.Windows.Forms.GroupBox(); this.groupBoxOCRControls = new System.Windows.Forms.GroupBox();
this.labelStartFrom = new System.Windows.Forms.Label(); this.labelStartFrom = new System.Windows.Forms.Label();
this.numericUpDownStartNumber = new System.Windows.Forms.NumericUpDown(); this.numericUpDownStartNumber = new System.Windows.Forms.NumericUpDown();
@ -141,16 +141,17 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.textBoxCurrentText = new Nikse.SubtitleEdit.Controls.SETextBox(); this.textBoxCurrentText = new Nikse.SubtitleEdit.Controls.SETextBox();
this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView(); this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView();
this.timerHideStatus = new System.Windows.Forms.Timer(this.components); this.timerHideStatus = new System.Windows.Forms.Timer(this.components);
this.setForecolorThresholdToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.comboBoxTesseractEngineMode = new System.Windows.Forms.ComboBox();
this.labelTesseractEngineMode = new System.Windows.Forms.Label();
this.contextMenuStripListview.SuspendLayout(); this.contextMenuStripListview.SuspendLayout();
this.groupBoxOcrMethod.SuspendLayout(); this.groupBoxOcrMethod.SuspendLayout();
this.groupBoxImageCompareMethod.SuspendLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).BeginInit();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).BeginInit();
this.GroupBoxTesseractMethod.SuspendLayout(); this.GroupBoxTesseractMethod.SuspendLayout();
this.groupBoxModiMethod.SuspendLayout(); this.groupBoxModiMethod.SuspendLayout();
this.groupBoxNOCR.SuspendLayout(); this.groupBoxNOCR.SuspendLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).BeginInit(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).BeginInit();
this.groupBoxImageCompareMethod.SuspendLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).BeginInit();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).BeginInit();
this.groupBoxOCRControls.SuspendLayout(); this.groupBoxOCRControls.SuspendLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).BeginInit(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).BeginInit();
this.groupBoxOcrAutoFix.SuspendLayout(); this.groupBoxOcrAutoFix.SuspendLayout();
@ -200,7 +201,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.toolStripSeparator3, this.toolStripSeparator3,
this.deleteToolStripMenuItem}); this.deleteToolStripMenuItem});
this.contextMenuStripListview.Name = "contextMenuStripListview"; this.contextMenuStripListview.Name = "contextMenuStripListview";
this.contextMenuStripListview.Size = new System.Drawing.Size(306, 364); this.contextMenuStripListview.Size = new System.Drawing.Size(306, 342);
this.contextMenuStripListview.Opening += new System.ComponentModel.CancelEventHandler(this.ContextMenuStripListviewOpening); this.contextMenuStripListview.Opening += new System.ComponentModel.CancelEventHandler(this.ContextMenuStripListviewOpening);
// //
// normalToolStripMenuItem // normalToolStripMenuItem
@ -339,6 +340,13 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.toolStripMenuItemSetUnItalicFactor.Text = "Set un-italic factor..."; this.toolStripMenuItemSetUnItalicFactor.Text = "Set un-italic factor...";
this.toolStripMenuItemSetUnItalicFactor.Click += new System.EventHandler(this.toolStripMenuItemSetUnItalicFactor_Click); this.toolStripMenuItemSetUnItalicFactor.Click += new System.EventHandler(this.toolStripMenuItemSetUnItalicFactor_Click);
// //
// setForecolorThresholdToolStripMenuItem
//
this.setForecolorThresholdToolStripMenuItem.Name = "setForecolorThresholdToolStripMenuItem";
this.setForecolorThresholdToolStripMenuItem.Size = new System.Drawing.Size(305, 22);
this.setForecolorThresholdToolStripMenuItem.Text = "Set forecolor threshold...";
this.setForecolorThresholdToolStripMenuItem.Click += new System.EventHandler(this.setForecolorThresholdToolStripMenuItem_Click);
//
// toolStripSeparator3 // toolStripSeparator3
// //
this.toolStripSeparator3.Name = "toolStripSeparator3"; this.toolStripSeparator3.Name = "toolStripSeparator3";
@ -406,10 +414,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
// groupBoxOcrMethod // groupBoxOcrMethod
// //
this.groupBoxOcrMethod.Controls.Add(this.comboBoxOcrMethod); this.groupBoxOcrMethod.Controls.Add(this.comboBoxOcrMethod);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod);
this.groupBoxOcrMethod.Controls.Add(this.GroupBoxTesseractMethod); this.groupBoxOcrMethod.Controls.Add(this.GroupBoxTesseractMethod);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxModiMethod); this.groupBoxOcrMethod.Controls.Add(this.groupBoxModiMethod);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR); this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod);
this.groupBoxOcrMethod.Location = new System.Drawing.Point(13, 5); this.groupBoxOcrMethod.Location = new System.Drawing.Point(13, 5);
this.groupBoxOcrMethod.Name = "groupBoxOcrMethod"; this.groupBoxOcrMethod.Name = "groupBoxOcrMethod";
this.groupBoxOcrMethod.Size = new System.Drawing.Size(392, 192); this.groupBoxOcrMethod.Size = new System.Drawing.Size(392, 192);
@ -432,6 +440,223 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.comboBoxOcrMethod.TabIndex = 0; this.comboBoxOcrMethod.TabIndex = 0;
this.comboBoxOcrMethod.SelectedIndexChanged += new System.EventHandler(this.ComboBoxOcrMethodSelectedIndexChanged); this.comboBoxOcrMethod.SelectedIndexChanged += new System.EventHandler(this.ComboBoxOcrMethodSelectedIndexChanged);
// //
// GroupBoxTesseractMethod
//
this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractEngineMode);
this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractEngineMode);
this.GroupBoxTesseractMethod.Controls.Add(this.buttonGetTesseractDictionaries);
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractMusicOn);
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractItalicsOn);
this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractLanguage);
this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractLanguages);
this.GroupBoxTesseractMethod.Location = new System.Drawing.Point(13, 31);
this.GroupBoxTesseractMethod.Name = "GroupBoxTesseractMethod";
this.GroupBoxTesseractMethod.Size = new System.Drawing.Size(366, 131);
this.GroupBoxTesseractMethod.TabIndex = 1;
this.GroupBoxTesseractMethod.TabStop = false;
this.GroupBoxTesseractMethod.Text = "Tesseract";
//
// buttonGetTesseractDictionaries
//
this.buttonGetTesseractDictionaries.Location = new System.Drawing.Point(300, 28);
this.buttonGetTesseractDictionaries.Name = "buttonGetTesseractDictionaries";
this.buttonGetTesseractDictionaries.Size = new System.Drawing.Size(29, 23);
this.buttonGetTesseractDictionaries.TabIndex = 2;
this.buttonGetTesseractDictionaries.Text = "...";
this.buttonGetTesseractDictionaries.UseVisualStyleBackColor = true;
this.buttonGetTesseractDictionaries.Click += new System.EventHandler(this.buttonGetTesseractDictionaries_Click);
//
// checkBoxTesseractMusicOn
//
this.checkBoxTesseractMusicOn.AutoSize = true;
this.checkBoxTesseractMusicOn.Checked = true;
this.checkBoxTesseractMusicOn.CheckState = System.Windows.Forms.CheckState.Checked;
this.checkBoxTesseractMusicOn.Location = new System.Drawing.Point(99, 69);
this.checkBoxTesseractMusicOn.Name = "checkBoxTesseractMusicOn";
this.checkBoxTesseractMusicOn.Size = new System.Drawing.Size(93, 17);
this.checkBoxTesseractMusicOn.TabIndex = 4;
this.checkBoxTesseractMusicOn.Text = "Music symbols";
this.checkBoxTesseractMusicOn.UseVisualStyleBackColor = true;
//
// checkBoxTesseractItalicsOn
//
this.checkBoxTesseractItalicsOn.AutoSize = true;
this.checkBoxTesseractItalicsOn.Checked = true;
this.checkBoxTesseractItalicsOn.CheckState = System.Windows.Forms.CheckState.Checked;
this.checkBoxTesseractItalicsOn.Location = new System.Drawing.Point(22, 69);
this.checkBoxTesseractItalicsOn.Name = "checkBoxTesseractItalicsOn";
this.checkBoxTesseractItalicsOn.Size = new System.Drawing.Size(54, 17);
this.checkBoxTesseractItalicsOn.TabIndex = 3;
this.checkBoxTesseractItalicsOn.Text = "Italics";
this.checkBoxTesseractItalicsOn.UseVisualStyleBackColor = true;
//
// labelTesseractLanguage
//
this.labelTesseractLanguage.AutoSize = true;
this.labelTesseractLanguage.Location = new System.Drawing.Point(18, 32);
this.labelTesseractLanguage.Name = "labelTesseractLanguage";
this.labelTesseractLanguage.Size = new System.Drawing.Size(54, 13);
this.labelTesseractLanguage.TabIndex = 0;
this.labelTesseractLanguage.Text = "Language";
//
// comboBoxTesseractLanguages
//
this.comboBoxTesseractLanguages.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxTesseractLanguages.FormattingEnabled = true;
this.comboBoxTesseractLanguages.Location = new System.Drawing.Point(99, 29);
this.comboBoxTesseractLanguages.Name = "comboBoxTesseractLanguages";
this.comboBoxTesseractLanguages.Size = new System.Drawing.Size(195, 21);
this.comboBoxTesseractLanguages.TabIndex = 1;
this.comboBoxTesseractLanguages.SelectedIndexChanged += new System.EventHandler(this.ComboBoxTesseractLanguagesSelectedIndexChanged);
//
// groupBoxModiMethod
//
this.groupBoxModiMethod.Controls.Add(this.label1);
this.groupBoxModiMethod.Controls.Add(this.comboBoxModiLanguage);
this.groupBoxModiMethod.Location = new System.Drawing.Point(7, 50);
this.groupBoxModiMethod.Name = "groupBoxModiMethod";
this.groupBoxModiMethod.Size = new System.Drawing.Size(366, 131);
this.groupBoxModiMethod.TabIndex = 3;
this.groupBoxModiMethod.TabStop = false;
this.groupBoxModiMethod.Text = "MODI";
//
// label1
//
this.label1.AutoSize = true;
this.label1.Location = new System.Drawing.Point(11, 58);
this.label1.Name = "label1";
this.label1.Size = new System.Drawing.Size(54, 13);
this.label1.TabIndex = 33;
this.label1.Text = "Language";
//
// comboBoxModiLanguage
//
this.comboBoxModiLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxModiLanguage.FormattingEnabled = true;
this.comboBoxModiLanguage.Location = new System.Drawing.Point(108, 55);
this.comboBoxModiLanguage.Name = "comboBoxModiLanguage";
this.comboBoxModiLanguage.Size = new System.Drawing.Size(195, 21);
this.comboBoxModiLanguage.TabIndex = 0;
this.comboBoxModiLanguage.SelectedIndexChanged += new System.EventHandler(this.ComboBoxModiLanguageSelectedIndexChanged);
//
// groupBoxNOCR
//
this.groupBoxNOCR.Controls.Add(this.buttonLineOcrEditLanguage);
this.groupBoxNOCR.Controls.Add(this.buttonLineOcrNewLanguage);
this.groupBoxNOCR.Controls.Add(this.label2);
this.groupBoxNOCR.Controls.Add(this.comboBoxNOcrLanguage);
this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrItalic);
this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrCorrect);
this.groupBoxNOCR.Controls.Add(this.checkBoxRightToLeftNOCR);
this.groupBoxNOCR.Controls.Add(this.numericUpDownNumberOfPixelsIsSpaceNOCR);
this.groupBoxNOCR.Controls.Add(this.labelNumberOfPixelsIsSpaceNOCR);
this.groupBoxNOCR.Location = new System.Drawing.Point(7, 38);
this.groupBoxNOCR.Name = "groupBoxNOCR";
this.groupBoxNOCR.Size = new System.Drawing.Size(366, 131);
this.groupBoxNOCR.TabIndex = 7;
this.groupBoxNOCR.TabStop = false;
this.groupBoxNOCR.Text = "nOCR";
//
// buttonLineOcrEditLanguage
//
this.buttonLineOcrEditLanguage.Location = new System.Drawing.Point(210, 97);
this.buttonLineOcrEditLanguage.Name = "buttonLineOcrEditLanguage";
this.buttonLineOcrEditLanguage.Size = new System.Drawing.Size(68, 21);
this.buttonLineOcrEditLanguage.TabIndex = 41;
this.buttonLineOcrEditLanguage.Text = "Edit";
this.buttonLineOcrEditLanguage.UseVisualStyleBackColor = true;
this.buttonLineOcrEditLanguage.Click += new System.EventHandler(this.buttonLineOcrEditLanguage_Click);
//
// buttonLineOcrNewLanguage
//
this.buttonLineOcrNewLanguage.Location = new System.Drawing.Point(283, 97);
this.buttonLineOcrNewLanguage.Name = "buttonLineOcrNewLanguage";
this.buttonLineOcrNewLanguage.Size = new System.Drawing.Size(68, 21);
this.buttonLineOcrNewLanguage.TabIndex = 40;
this.buttonLineOcrNewLanguage.Text = "New";
this.buttonLineOcrNewLanguage.UseVisualStyleBackColor = true;
this.buttonLineOcrNewLanguage.Click += new System.EventHandler(this.buttonLineOcrNewLanguage_Click);
//
// label2
//
this.label2.AutoSize = true;
this.label2.Location = new System.Drawing.Point(11, 101);
this.label2.Name = "label2";
this.label2.Size = new System.Drawing.Size(54, 13);
this.label2.TabIndex = 35;
this.label2.Text = "Language";
//
// comboBoxNOcrLanguage
//
this.comboBoxNOcrLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxNOcrLanguage.FormattingEnabled = true;
this.comboBoxNOcrLanguage.Location = new System.Drawing.Point(74, 97);
this.comboBoxNOcrLanguage.Name = "comboBoxNOcrLanguage";
this.comboBoxNOcrLanguage.Size = new System.Drawing.Size(130, 21);
this.comboBoxNOcrLanguage.TabIndex = 34;
this.comboBoxNOcrLanguage.SelectedIndexChanged += new System.EventHandler(this.comboBoxNOcrLanguage_SelectedIndexChanged);
//
// checkBoxNOcrItalic
//
this.checkBoxNOcrItalic.AutoSize = true;
this.checkBoxNOcrItalic.Location = new System.Drawing.Point(15, 42);
this.checkBoxNOcrItalic.Name = "checkBoxNOcrItalic";
this.checkBoxNOcrItalic.Size = new System.Drawing.Size(92, 17);
this.checkBoxNOcrItalic.TabIndex = 8;
this.checkBoxNOcrItalic.Text = "Contains italic";
this.checkBoxNOcrItalic.UseVisualStyleBackColor = true;
//
// checkBoxNOcrCorrect
//
this.checkBoxNOcrCorrect.AutoSize = true;
this.checkBoxNOcrCorrect.Location = new System.Drawing.Point(235, 17);
this.checkBoxNOcrCorrect.Name = "checkBoxNOcrCorrect";
this.checkBoxNOcrCorrect.Size = new System.Drawing.Size(116, 17);
this.checkBoxNOcrCorrect.TabIndex = 7;
this.checkBoxNOcrCorrect.Text = "Draw missing texts";
this.checkBoxNOcrCorrect.UseVisualStyleBackColor = true;
//
// checkBoxRightToLeftNOCR
//
this.checkBoxRightToLeftNOCR.AutoSize = true;
this.checkBoxRightToLeftNOCR.Location = new System.Drawing.Point(235, 40);
this.checkBoxRightToLeftNOCR.Name = "checkBoxRightToLeftNOCR";
this.checkBoxRightToLeftNOCR.Size = new System.Drawing.Size(83, 17);
this.checkBoxRightToLeftNOCR.TabIndex = 6;
this.checkBoxRightToLeftNOCR.Text = "Right to left";
this.checkBoxRightToLeftNOCR.UseVisualStyleBackColor = true;
//
// numericUpDownNumberOfPixelsIsSpaceNOCR
//
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(122, 17);
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Maximum = new decimal(new int[] {
50,
0,
0,
0});
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Minimum = new decimal(new int[] {
1,
0,
0,
0});
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Name = "numericUpDownNumberOfPixelsIsSpaceNOCR";
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(50, 21);
this.numericUpDownNumberOfPixelsIsSpaceNOCR.TabIndex = 5;
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Value = new decimal(new int[] {
12,
0,
0,
0});
//
// labelNumberOfPixelsIsSpaceNOCR
//
this.labelNumberOfPixelsIsSpaceNOCR.AutoSize = true;
this.labelNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(12, 20);
this.labelNumberOfPixelsIsSpaceNOCR.Name = "labelNumberOfPixelsIsSpaceNOCR";
this.labelNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(104, 13);
this.labelNumberOfPixelsIsSpaceNOCR.TabIndex = 4;
this.labelNumberOfPixelsIsSpaceNOCR.Text = "No of pixels is space";
//
// groupBoxImageCompareMethod // groupBoxImageCompareMethod
// //
this.groupBoxImageCompareMethod.Controls.Add(this.labelMinLineSplitHeight); this.groupBoxImageCompareMethod.Controls.Add(this.labelMinLineSplitHeight);
@ -660,235 +885,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.buttonNewCharacterDatabase.UseVisualStyleBackColor = true; this.buttonNewCharacterDatabase.UseVisualStyleBackColor = true;
this.buttonNewCharacterDatabase.Click += new System.EventHandler(this.ButtonNewCharacterDatabaseClick); this.buttonNewCharacterDatabase.Click += new System.EventHandler(this.ButtonNewCharacterDatabaseClick);
// //
// GroupBoxTesseractMethod
//
this.GroupBoxTesseractMethod.Controls.Add(this.buttonGetTesseractDictionaries);
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractMusicOn);
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractItalicsOn);
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxUseModiInTesseractForUnknownWords);
this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractLanguage);
this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractLanguages);
this.GroupBoxTesseractMethod.Location = new System.Drawing.Point(13, 31);
this.GroupBoxTesseractMethod.Name = "GroupBoxTesseractMethod";
this.GroupBoxTesseractMethod.Size = new System.Drawing.Size(366, 131);
this.GroupBoxTesseractMethod.TabIndex = 1;
this.GroupBoxTesseractMethod.TabStop = false;
this.GroupBoxTesseractMethod.Text = "Tesseract";
//
// buttonGetTesseractDictionaries
//
this.buttonGetTesseractDictionaries.Location = new System.Drawing.Point(300, 30);
this.buttonGetTesseractDictionaries.Name = "buttonGetTesseractDictionaries";
this.buttonGetTesseractDictionaries.Size = new System.Drawing.Size(29, 23);
this.buttonGetTesseractDictionaries.TabIndex = 2;
this.buttonGetTesseractDictionaries.Text = "...";
this.buttonGetTesseractDictionaries.UseVisualStyleBackColor = true;
this.buttonGetTesseractDictionaries.Click += new System.EventHandler(this.buttonGetTesseractDictionaries_Click);
//
// checkBoxTesseractMusicOn
//
this.checkBoxTesseractMusicOn.AutoSize = true;
this.checkBoxTesseractMusicOn.Checked = true;
this.checkBoxTesseractMusicOn.CheckState = System.Windows.Forms.CheckState.Checked;
this.checkBoxTesseractMusicOn.Location = new System.Drawing.Point(99, 101);
this.checkBoxTesseractMusicOn.Name = "checkBoxTesseractMusicOn";
this.checkBoxTesseractMusicOn.Size = new System.Drawing.Size(93, 17);
this.checkBoxTesseractMusicOn.TabIndex = 4;
this.checkBoxTesseractMusicOn.Text = "Music symbols";
this.checkBoxTesseractMusicOn.UseVisualStyleBackColor = true;
//
// checkBoxTesseractItalicsOn
//
this.checkBoxTesseractItalicsOn.AutoSize = true;
this.checkBoxTesseractItalicsOn.Checked = true;
this.checkBoxTesseractItalicsOn.CheckState = System.Windows.Forms.CheckState.Checked;
this.checkBoxTesseractItalicsOn.Location = new System.Drawing.Point(22, 101);
this.checkBoxTesseractItalicsOn.Name = "checkBoxTesseractItalicsOn";
this.checkBoxTesseractItalicsOn.Size = new System.Drawing.Size(54, 17);
this.checkBoxTesseractItalicsOn.TabIndex = 3;
this.checkBoxTesseractItalicsOn.Text = "Italics";
this.checkBoxTesseractItalicsOn.UseVisualStyleBackColor = true;
//
// checkBoxUseModiInTesseractForUnknownWords
//
this.checkBoxUseModiInTesseractForUnknownWords.AutoSize = true;
this.checkBoxUseModiInTesseractForUnknownWords.Checked = true;
this.checkBoxUseModiInTesseractForUnknownWords.CheckState = System.Windows.Forms.CheckState.Checked;
this.checkBoxUseModiInTesseractForUnknownWords.Enabled = false;
this.checkBoxUseModiInTesseractForUnknownWords.Location = new System.Drawing.Point(22, 74);
this.checkBoxUseModiInTesseractForUnknownWords.Name = "checkBoxUseModiInTesseractForUnknownWords";
this.checkBoxUseModiInTesseractForUnknownWords.Size = new System.Drawing.Size(167, 17);
this.checkBoxUseModiInTesseractForUnknownWords.TabIndex = 2;
this.checkBoxUseModiInTesseractForUnknownWords.Text = "Try MODI for unknown words";
this.checkBoxUseModiInTesseractForUnknownWords.UseVisualStyleBackColor = true;
//
// labelTesseractLanguage
//
this.labelTesseractLanguage.AutoSize = true;
this.labelTesseractLanguage.Location = new System.Drawing.Point(18, 34);
this.labelTesseractLanguage.Name = "labelTesseractLanguage";
this.labelTesseractLanguage.Size = new System.Drawing.Size(54, 13);
this.labelTesseractLanguage.TabIndex = 0;
this.labelTesseractLanguage.Text = "Language";
//
// comboBoxTesseractLanguages
//
this.comboBoxTesseractLanguages.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxTesseractLanguages.FormattingEnabled = true;
this.comboBoxTesseractLanguages.Location = new System.Drawing.Point(99, 31);
this.comboBoxTesseractLanguages.Name = "comboBoxTesseractLanguages";
this.comboBoxTesseractLanguages.Size = new System.Drawing.Size(195, 21);
this.comboBoxTesseractLanguages.TabIndex = 1;
this.comboBoxTesseractLanguages.SelectedIndexChanged += new System.EventHandler(this.ComboBoxTesseractLanguagesSelectedIndexChanged);
//
// groupBoxModiMethod
//
this.groupBoxModiMethod.Controls.Add(this.label1);
this.groupBoxModiMethod.Controls.Add(this.comboBoxModiLanguage);
this.groupBoxModiMethod.Location = new System.Drawing.Point(7, 50);
this.groupBoxModiMethod.Name = "groupBoxModiMethod";
this.groupBoxModiMethod.Size = new System.Drawing.Size(366, 131);
this.groupBoxModiMethod.TabIndex = 3;
this.groupBoxModiMethod.TabStop = false;
this.groupBoxModiMethod.Text = "MODI";
//
// label1
//
this.label1.AutoSize = true;
this.label1.Location = new System.Drawing.Point(11, 58);
this.label1.Name = "label1";
this.label1.Size = new System.Drawing.Size(54, 13);
this.label1.TabIndex = 33;
this.label1.Text = "Language";
//
// comboBoxModiLanguage
//
this.comboBoxModiLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxModiLanguage.FormattingEnabled = true;
this.comboBoxModiLanguage.Location = new System.Drawing.Point(108, 55);
this.comboBoxModiLanguage.Name = "comboBoxModiLanguage";
this.comboBoxModiLanguage.Size = new System.Drawing.Size(195, 21);
this.comboBoxModiLanguage.TabIndex = 0;
this.comboBoxModiLanguage.SelectedIndexChanged += new System.EventHandler(this.ComboBoxModiLanguageSelectedIndexChanged);
//
// groupBoxNOCR
//
this.groupBoxNOCR.Controls.Add(this.buttonLineOcrEditLanguage);
this.groupBoxNOCR.Controls.Add(this.buttonLineOcrNewLanguage);
this.groupBoxNOCR.Controls.Add(this.label2);
this.groupBoxNOCR.Controls.Add(this.comboBoxNOcrLanguage);
this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrItalic);
this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrCorrect);
this.groupBoxNOCR.Controls.Add(this.checkBoxRightToLeftNOCR);
this.groupBoxNOCR.Controls.Add(this.numericUpDownNumberOfPixelsIsSpaceNOCR);
this.groupBoxNOCR.Controls.Add(this.labelNumberOfPixelsIsSpaceNOCR);
this.groupBoxNOCR.Location = new System.Drawing.Point(7, 38);
this.groupBoxNOCR.Name = "groupBoxNOCR";
this.groupBoxNOCR.Size = new System.Drawing.Size(366, 131);
this.groupBoxNOCR.TabIndex = 7;
this.groupBoxNOCR.TabStop = false;
this.groupBoxNOCR.Text = "nOCR";
//
// buttonLineOcrEditLanguage
//
this.buttonLineOcrEditLanguage.Location = new System.Drawing.Point(210, 97);
this.buttonLineOcrEditLanguage.Name = "buttonLineOcrEditLanguage";
this.buttonLineOcrEditLanguage.Size = new System.Drawing.Size(68, 21);
this.buttonLineOcrEditLanguage.TabIndex = 41;
this.buttonLineOcrEditLanguage.Text = "Edit";
this.buttonLineOcrEditLanguage.UseVisualStyleBackColor = true;
this.buttonLineOcrEditLanguage.Click += new System.EventHandler(this.buttonLineOcrEditLanguage_Click);
//
// buttonLineOcrNewLanguage
//
this.buttonLineOcrNewLanguage.Location = new System.Drawing.Point(283, 97);
this.buttonLineOcrNewLanguage.Name = "buttonLineOcrNewLanguage";
this.buttonLineOcrNewLanguage.Size = new System.Drawing.Size(68, 21);
this.buttonLineOcrNewLanguage.TabIndex = 40;
this.buttonLineOcrNewLanguage.Text = "New";
this.buttonLineOcrNewLanguage.UseVisualStyleBackColor = true;
this.buttonLineOcrNewLanguage.Click += new System.EventHandler(this.buttonLineOcrNewLanguage_Click);
//
// label2
//
this.label2.AutoSize = true;
this.label2.Location = new System.Drawing.Point(11, 101);
this.label2.Name = "label2";
this.label2.Size = new System.Drawing.Size(54, 13);
this.label2.TabIndex = 35;
this.label2.Text = "Language";
//
// comboBoxNOcrLanguage
//
this.comboBoxNOcrLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxNOcrLanguage.FormattingEnabled = true;
this.comboBoxNOcrLanguage.Location = new System.Drawing.Point(74, 97);
this.comboBoxNOcrLanguage.Name = "comboBoxNOcrLanguage";
this.comboBoxNOcrLanguage.Size = new System.Drawing.Size(130, 21);
this.comboBoxNOcrLanguage.TabIndex = 34;
this.comboBoxNOcrLanguage.SelectedIndexChanged += new System.EventHandler(this.comboBoxNOcrLanguage_SelectedIndexChanged);
//
// checkBoxNOcrItalic
//
this.checkBoxNOcrItalic.AutoSize = true;
this.checkBoxNOcrItalic.Location = new System.Drawing.Point(15, 42);
this.checkBoxNOcrItalic.Name = "checkBoxNOcrItalic";
this.checkBoxNOcrItalic.Size = new System.Drawing.Size(92, 17);
this.checkBoxNOcrItalic.TabIndex = 8;
this.checkBoxNOcrItalic.Text = "Contains italic";
this.checkBoxNOcrItalic.UseVisualStyleBackColor = true;
//
// checkBoxNOcrCorrect
//
this.checkBoxNOcrCorrect.AutoSize = true;
this.checkBoxNOcrCorrect.Location = new System.Drawing.Point(235, 17);
this.checkBoxNOcrCorrect.Name = "checkBoxNOcrCorrect";
this.checkBoxNOcrCorrect.Size = new System.Drawing.Size(116, 17);
this.checkBoxNOcrCorrect.TabIndex = 7;
this.checkBoxNOcrCorrect.Text = "Draw missing texts";
this.checkBoxNOcrCorrect.UseVisualStyleBackColor = true;
//
// checkBoxRightToLeftNOCR
//
this.checkBoxRightToLeftNOCR.AutoSize = true;
this.checkBoxRightToLeftNOCR.Location = new System.Drawing.Point(235, 40);
this.checkBoxRightToLeftNOCR.Name = "checkBoxRightToLeftNOCR";
this.checkBoxRightToLeftNOCR.Size = new System.Drawing.Size(83, 17);
this.checkBoxRightToLeftNOCR.TabIndex = 6;
this.checkBoxRightToLeftNOCR.Text = "Right to left";
this.checkBoxRightToLeftNOCR.UseVisualStyleBackColor = true;
//
// numericUpDownNumberOfPixelsIsSpaceNOCR
//
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(122, 17);
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Maximum = new decimal(new int[] {
50,
0,
0,
0});
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Minimum = new decimal(new int[] {
1,
0,
0,
0});
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Name = "numericUpDownNumberOfPixelsIsSpaceNOCR";
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(50, 21);
this.numericUpDownNumberOfPixelsIsSpaceNOCR.TabIndex = 5;
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Value = new decimal(new int[] {
12,
0,
0,
0});
//
// labelNumberOfPixelsIsSpaceNOCR
//
this.labelNumberOfPixelsIsSpaceNOCR.AutoSize = true;
this.labelNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(12, 20);
this.labelNumberOfPixelsIsSpaceNOCR.Name = "labelNumberOfPixelsIsSpaceNOCR";
this.labelNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(104, 13);
this.labelNumberOfPixelsIsSpaceNOCR.TabIndex = 4;
this.labelNumberOfPixelsIsSpaceNOCR.Text = "No of pixels is space";
//
// groupBoxOCRControls // groupBoxOCRControls
// //
this.groupBoxOCRControls.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); this.groupBoxOCRControls.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right)));
@ -1595,12 +1591,28 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.timerHideStatus.Interval = 2000; this.timerHideStatus.Interval = 2000;
this.timerHideStatus.Tick += new System.EventHandler(this.timerHideStatus_Tick); this.timerHideStatus.Tick += new System.EventHandler(this.timerHideStatus_Tick);
// //
// setForecolorThresholdToolStripMenuItem // comboBoxTesseractEngineMode
// //
this.setForecolorThresholdToolStripMenuItem.Name = "setForecolorThresholdToolStripMenuItem"; this.comboBoxTesseractEngineMode.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.setForecolorThresholdToolStripMenuItem.Size = new System.Drawing.Size(305, 22); this.comboBoxTesseractEngineMode.FormattingEnabled = true;
this.setForecolorThresholdToolStripMenuItem.Text = "Set forecolor threshold..."; this.comboBoxTesseractEngineMode.Items.AddRange(new object[] {
this.setForecolorThresholdToolStripMenuItem.Click += new System.EventHandler(this.setForecolorThresholdToolStripMenuItem_Click); "Original Tesseract only (can detect italic)",
"Neural nets LSTM only ",
"Tesseract + LSTM",
"Default, based on what is available"});
this.comboBoxTesseractEngineMode.Location = new System.Drawing.Point(98, 100);
this.comboBoxTesseractEngineMode.Name = "comboBoxTesseractEngineMode";
this.comboBoxTesseractEngineMode.Size = new System.Drawing.Size(195, 21);
this.comboBoxTesseractEngineMode.TabIndex = 5;
//
// labelTesseractEngineMode
//
this.labelTesseractEngineMode.AutoSize = true;
this.labelTesseractEngineMode.Location = new System.Drawing.Point(18, 103);
this.labelTesseractEngineMode.Name = "labelTesseractEngineMode";
this.labelTesseractEngineMode.Size = new System.Drawing.Size(68, 13);
this.labelTesseractEngineMode.TabIndex = 6;
this.labelTesseractEngineMode.Text = "Engine mode";
// //
// VobSubOcr // VobSubOcr
// //
@ -1629,10 +1641,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.Resize += new System.EventHandler(this.VobSubOcr_Resize); this.Resize += new System.EventHandler(this.VobSubOcr_Resize);
this.contextMenuStripListview.ResumeLayout(false); this.contextMenuStripListview.ResumeLayout(false);
this.groupBoxOcrMethod.ResumeLayout(false); this.groupBoxOcrMethod.ResumeLayout(false);
this.groupBoxImageCompareMethod.ResumeLayout(false);
this.groupBoxImageCompareMethod.PerformLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).EndInit();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).EndInit();
this.GroupBoxTesseractMethod.ResumeLayout(false); this.GroupBoxTesseractMethod.ResumeLayout(false);
this.GroupBoxTesseractMethod.PerformLayout(); this.GroupBoxTesseractMethod.PerformLayout();
this.groupBoxModiMethod.ResumeLayout(false); this.groupBoxModiMethod.ResumeLayout(false);
@ -1640,6 +1648,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.groupBoxNOCR.ResumeLayout(false); this.groupBoxNOCR.ResumeLayout(false);
this.groupBoxNOCR.PerformLayout(); this.groupBoxNOCR.PerformLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).EndInit(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).EndInit();
this.groupBoxImageCompareMethod.ResumeLayout(false);
this.groupBoxImageCompareMethod.PerformLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).EndInit();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).EndInit();
this.groupBoxOCRControls.ResumeLayout(false); this.groupBoxOCRControls.ResumeLayout(false);
this.groupBoxOCRControls.PerformLayout(); this.groupBoxOCRControls.PerformLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).EndInit(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).EndInit();
@ -1710,7 +1722,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private System.Windows.Forms.Label label1; private System.Windows.Forms.Label label1;
private System.Windows.Forms.GroupBox groupBoxModiMethod; private System.Windows.Forms.GroupBox groupBoxModiMethod;
private System.Windows.Forms.GroupBox GroupBoxTesseractMethod; private System.Windows.Forms.GroupBox GroupBoxTesseractMethod;
private System.Windows.Forms.CheckBox checkBoxUseModiInTesseractForUnknownWords;
private System.Windows.Forms.Label labelTesseractLanguage; private System.Windows.Forms.Label labelTesseractLanguage;
private System.Windows.Forms.ComboBox comboBoxTesseractLanguages; private System.Windows.Forms.ComboBox comboBoxTesseractLanguages;
private System.Windows.Forms.ContextMenuStrip contextMenuStripListview; private System.Windows.Forms.ContextMenuStrip contextMenuStripListview;
@ -1801,5 +1812,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private System.Windows.Forms.Label labelMinLineSplitHeight; private System.Windows.Forms.Label labelMinLineSplitHeight;
private System.Windows.Forms.ComboBox comboBoxLineSplitMinLineHeight; private System.Windows.Forms.ComboBox comboBoxLineSplitMinLineHeight;
private System.Windows.Forms.ToolStripMenuItem setForecolorThresholdToolStripMenuItem; private System.Windows.Forms.ToolStripMenuItem setForecolorThresholdToolStripMenuItem;
private System.Windows.Forms.Label labelTesseractEngineMode;
private System.Windows.Forms.ComboBox comboBoxTesseractEngineMode;
} }
} }

View File

@ -302,6 +302,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private string[] _tesseractAsyncStrings; private string[] _tesseractAsyncStrings;
private int _tesseractAsyncIndex; private int _tesseractAsyncIndex;
private BackgroundWorker _tesseractThread; private BackgroundWorker _tesseractThread;
private int _tesseractEngineMode;
private readonly DateTime _windowStartTime = DateTime.Now; private readonly DateTime _windowStartTime = DateTime.Now;
private int _linesOcred; private int _linesOcred;
@ -426,9 +427,15 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
_ocrMethodImageCompare = 4; _ocrMethodImageCompare = 4;
} }
checkBoxUseModiInTesseractForUnknownWords.Text = language.TryModiForUnknownWords;
checkBoxTesseractItalicsOn.Checked = Configuration.Settings.VobSubOcr.UseItalicsInTesseract; checkBoxTesseractItalicsOn.Checked = Configuration.Settings.VobSubOcr.UseItalicsInTesseract;
checkBoxTesseractItalicsOn.Text = Configuration.Settings.Language.General.Italic; checkBoxTesseractItalicsOn.Text = Configuration.Settings.Language.General.Italic;
if (Configuration.Settings.VobSubOcr.TesseractEngineMode >= 0 &&
Configuration.Settings.VobSubOcr.TesseractEngineMode < comboBoxTesseractEngineMode.Items.Count)
{
comboBoxTesseractEngineMode.SelectedIndex = Configuration.Settings.VobSubOcr.TesseractEngineMode;
}
comboBoxTesseractEngineMode.Left = labelTesseractEngineMode.Left + labelTesseractEngineMode.Width + 5;
comboBoxTesseractEngineMode.Width = GroupBoxTesseractMethod.Width - comboBoxTesseractEngineMode.Left - 10;
checkBoxTesseractMusicOn.Checked = Configuration.Settings.VobSubOcr.UseMusicSymbolsInTesseract; checkBoxTesseractMusicOn.Checked = Configuration.Settings.VobSubOcr.UseMusicSymbolsInTesseract;
checkBoxTesseractMusicOn.Text = Configuration.Settings.Language.Settings.MusicSymbol; checkBoxTesseractMusicOn.Text = Configuration.Settings.Language.Settings.MusicSymbol;
@ -5314,7 +5321,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private void FormVobSubOcr_Shown(object sender, EventArgs e) private void FormVobSubOcr_Shown(object sender, EventArgs e)
{ {
checkBoxUseModiInTesseractForUnknownWords.Checked = Configuration.Settings.VobSubOcr.UseModiInTesseractForUnknownWords;
if (_mp4List != null) if (_mp4List != null)
{ {
checkBoxShowOnlyForced.Visible = false; checkBoxShowOnlyForced.Visible = false;
@ -5841,6 +5847,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private void ButtonStartOcrClick(object sender, EventArgs e) private void ButtonStartOcrClick(object sender, EventArgs e)
{ {
_tesseractEngineMode = comboBoxTesseractEngineMode.SelectedIndex;
_isLatinDb = comboBoxCharacterDatabase.SelectedItem != null && comboBoxCharacterDatabase.SelectedItem.ToString().Equals("Latin", StringComparison.Ordinal); _isLatinDb = comboBoxCharacterDatabase.SelectedItem != null && comboBoxCharacterDatabase.SelectedItem.ToString().Equals("Latin", StringComparison.Ordinal);
Configuration.Settings.VobSubOcr.RightToLeft = checkBoxRightToLeft.Checked; Configuration.Settings.VobSubOcr.RightToLeft = checkBoxRightToLeft.Checked;
_lastLine = null; _lastLine = null;
@ -6144,11 +6151,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
var nbmp = new NikseBitmap(bmp); var nbmp = new NikseBitmap(bmp);
nbmp.ReplaceYellowWithWhite(); // optimized replace nbmp.ReplaceYellowWithWhite(); // optimized replace
string tempTiffFileName = Path.GetTempPath() + Guid.NewGuid() + ".png"; string pngFileName = Path.GetTempPath() + Guid.NewGuid() + ".png";
string tempTextFileName; string tempTextFileName;
using (var b = nbmp.GetBitmap()) using (var b = nbmp.GetBitmap())
{ {
b.Save(tempTiffFileName, System.Drawing.Imaging.ImageFormat.Png); b.Save(pngFileName, System.Drawing.Imaging.ImageFormat.Png);
tempTextFileName = Path.GetTempPath() + Guid.NewGuid(); tempTextFileName = Path.GetTempPath() + Guid.NewGuid();
} }
@ -6156,10 +6163,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{ {
process.StartInfo = new ProcessStartInfo(Configuration.TesseractDirectory + "tesseract.exe"); process.StartInfo = new ProcessStartInfo(Configuration.TesseractDirectory + "tesseract.exe");
process.StartInfo.UseShellExecute = true; process.StartInfo.UseShellExecute = true;
process.StartInfo.Arguments = "\"" + tempTiffFileName + "\" \"" + tempTextFileName + "\" -l " + language; process.StartInfo.Arguments = "\"" + pngFileName + "\" \"" + tempTextFileName + "\" --oem " + _tesseractEngineMode + " -l " + language;
if (checkBoxTesseractMusicOn.Checked)
process.StartInfo.Arguments += "+music";
if (!string.IsNullOrEmpty(psmMode)) if (!string.IsNullOrEmpty(psmMode))
process.StartInfo.Arguments += " " + psmMode.Trim(); process.StartInfo.Arguments += " " + psmMode.Trim();
@ -6186,11 +6190,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{ {
if (Configuration.IsRunningOnLinux() || Configuration.IsRunningOnMac()) if (Configuration.IsRunningOnLinux() || Configuration.IsRunningOnMac())
{ {
MessageBox.Show("Unable to start 'Tesseract' - make sure tesseract-ocr 3.x is installed!"); MessageBox.Show("Unable to start 'Tesseract' - make sure tesseract-ocr 4.x is installed!");
} }
else else
{ {
MessageBox.Show("Unable to start 'Tesseract' (" + Configuration.TesseractDirectory + "tesseract.exe) - make sure tesseract-ocr 3.x is installed!"); MessageBox.Show("Unable to start 'Tesseract' (" + Configuration.TesseractDirectory + "tesseract.exe) - make sure tesseract-ocr 4.x is installed!");
} }
throw; throw;
} }
@ -6209,7 +6213,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
result = ParseHocr(result); result = ParseHocr(result);
File.Delete(outputFileName); File.Delete(outputFileName);
} }
File.Delete(tempTiffFileName); File.Delete(pngFileName);
} }
catch catch
{ {
@ -6292,7 +6296,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
const int badWords = 0; const int badWords = 0;
string textWithOutFixes; string textWithOutFixes;
if (_tesseractAsyncStrings != null && !string.IsNullOrEmpty(_tesseractAsyncStrings[index])) if (!string.IsNullOrEmpty(_tesseractAsyncStrings?[index]))
{ {
textWithOutFixes = _tesseractAsyncStrings[index]; textWithOutFixes = _tesseractAsyncStrings[index];
} }
@ -6819,53 +6823,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{ {
_ocrFixEngine.AutoGuessesUsed.Clear(); _ocrFixEngine.AutoGuessesUsed.Clear();
_ocrFixEngine.UnknownWordsFound.Clear(); _ocrFixEngine.UnknownWordsFound.Clear();
line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel());
if (_modiEnabled && checkBoxUseModiInTesseractForUnknownWords.Checked)
{
// which is best - modi or Tesseract - we find out here
string modiText = CallModi(index);
if (modiText.Length == 0)
modiText = CallModi(index); // retry... strange MODI
if (modiText.Length == 0)
modiText = CallModi(index); // retry... strange MODI
if (modiText.Length > 1 &&
!modiText.Contains("CD") &&
(!modiText.Contains('0') || line.Contains('0')) &&
(!modiText.Contains('2') || line.Contains('2')) &&
(!modiText.Contains('3') || line.Contains('4')) &&
(!modiText.Contains('5') || line.Contains('5')) &&
(!modiText.Contains('9') || line.Contains('9')) &&
(!modiText.Contains('•') || line.Contains('•')) &&
(!modiText.Contains(')') || line.Contains(')')) &&
Utilities.CountTagInText(modiText, '(') < 2 && Utilities.CountTagInText(modiText, ')') < 2 &&
Utilities.GetNumberOfLines(modiText) < 4)
{
int modiWordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(modiText, out correctWords);
//if (modiWordsNotFound > 0)
{
string modiTextOcrFixed = modiText;
if (checkBoxAutoFixCommonErrors.Checked)
modiTextOcrFixed = _ocrFixEngine.FixOcrErrors(modiText, index, _lastLine, false, GetAutoGuessLevel());
int modiOcrCorrectedWordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(modiTextOcrFixed, out correctWords);
if (modiOcrCorrectedWordsNotFound <= modiWordsNotFound)
modiText = modiTextOcrFixed;
}
if (modiWordsNotFound < wordsNotFound || (textWithOutFixes.Length == 1 && modiWordsNotFound == 0))
line = modiText; // use the modi OCR'ed text
else if (wordsNotFound == modiWordsNotFound && modiText.EndsWith('!') && (line.EndsWith('l') || line.EndsWith('fl')))
line = modiText;
}
// take the best option - before OCR fixing, which we do again to save suggestions and prompt for user input
line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel());
}
else
{ // fix some error manually (modi not available)
line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel());
}
} }
if (_ocrFixEngine.Abort) if (_ocrFixEngine.Abort)
@ -6875,39 +6833,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
return string.Empty; return string.Empty;
} }
//check Tesseract... find an other way to do this...
//string tmp = HtmlUtil.RemoveHtmlTags(line).Trim();
//if (!tmp.TrimEnd().EndsWith("..."))
//{
// tmp = tmp.TrimEnd('.').TrimEnd();
// if (tmp.Length > 2 && Utilities.LowercaseLetters.Contains(tmp[tmp.Length - 1]))
// {
// if (_nocrChars == null)
// _nocrChars = LoadNOcrForTesseract("Nikse.SubtitleEdit.Resources.nOCR_TesseractHelper.xml.zip");
// string text = HtmlUtil.RemoveHtmlTags(NocrFastCheck(bitmap).TrimEnd());
// string post = string.Empty;
// if (line.EndsWith("</i>"))
// {
// post = "</i>";
// line = line.Remove(line.Length - 4, 4).Trim();
// }
// if (text.EndsWith('.'))
// {
// line = line.TrimEnd('.').Trim();
// while (text.EndsWith('.') || text.EndsWith(' '))
// {
// line += text.Substring(text.Length - 1).Trim();
// text = text.Remove(text.Length - 1, 1);
// }
// }
// else if (text.EndsWith('l') && text.EndsWith('!') && !text.EndsWith("l!"))
// {
// line = line.Remove(line.Length - 1, 1) + "!";
// }
// line += post;
// }
//}
// Log used word guesses (via word replace list) // Log used word guesses (via word replace list)
foreach (string guess in _ocrFixEngine.AutoGuessesUsed) foreach (string guess in _ocrFixEngine.AutoGuessesUsed)
listBoxLogSuggestions.Items.Add(guess); listBoxLogSuggestions.Items.Add(guess);
@ -6938,7 +6863,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
if (textWithOutFixes.Trim() != line.Trim()) if (textWithOutFixes.Trim() != line.Trim())
{ {
_tesseractOcrAutoFixes++; _tesseractOcrAutoFixes++;
labelFixesMade.Text = string.Format(" - {0}", _tesseractOcrAutoFixes); labelFixesMade.Text = $" - {_tesseractOcrAutoFixes}";
LogOcrFix(index, textWithOutFixes, line); LogOcrFix(index, textWithOutFixes, line);
} }
@ -6990,9 +6915,20 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private string TesseractResizeAndRetry(Bitmap bitmap) private string TesseractResizeAndRetry(Bitmap bitmap)
{ {
string result = Tesseract3DoOcrViaExe(ResizeBitmap(bitmap, bitmap.Width * 3, bitmap.Height * 2), _languageId, null); string result;
using (var b = ResizeBitmap(bitmap, bitmap.Width * 3, bitmap.Height * 2))
{
result = Tesseract3DoOcrViaExe(b, _languageId, null);
}
if (string.IsNullOrWhiteSpace(result)) if (string.IsNullOrWhiteSpace(result))
result = Tesseract3DoOcrViaExe(ResizeBitmap(bitmap, bitmap.Width * 4, bitmap.Height * 2), _languageId, "-psm 7"); {
using (var b = ResizeBitmap(bitmap, bitmap.Width * 4, bitmap.Height * 2))
{
result = Tesseract3DoOcrViaExe(b, _languageId, "-psm 7");
}
}
return result.TrimEnd(); return result.TrimEnd();
} }
@ -7108,7 +7044,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private void InitializeModi() private void InitializeModi()
{ {
_modiEnabled = false; _modiEnabled = false;
checkBoxUseModiInTesseractForUnknownWords.Enabled = false;
comboBoxModiLanguage.Enabled = false; comboBoxModiLanguage.Enabled = false;
try try
{ {
@ -7119,7 +7054,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
_modiEnabled = _modiDoc != null; _modiEnabled = _modiDoc != null;
comboBoxModiLanguage.Enabled = _modiEnabled; comboBoxModiLanguage.Enabled = _modiEnabled;
checkBoxUseModiInTesseractForUnknownWords.Enabled = _modiEnabled;
} }
catch catch
{ {
@ -7523,17 +7457,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
comboBoxDictionaries_SelectedIndexChanged(null, null); comboBoxDictionaries_SelectedIndexChanged(null, null);
} }
if (_modiEnabled && checkBoxUseModiInTesseractForUnknownWords.Checked)
{
string tesseractLanguageText = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Text;
int i = 0;
foreach (var modiLanguage in comboBoxModiLanguage.Items)
{
if ((modiLanguage as ModiLanguage).Text == tesseractLanguageText)
comboBoxModiLanguage.SelectedIndex = i;
i++;
}
}
comboBoxModiLanguage.SelectedIndex = -1; comboBoxModiLanguage.SelectedIndex = -1;
} }
@ -8430,8 +8353,9 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
DisposeImageCompareBitmaps(); DisposeImageCompareBitmaps();
Configuration.Settings.VobSubOcr.UseItalicsInTesseract = checkBoxTesseractItalicsOn.Checked; Configuration.Settings.VobSubOcr.UseItalicsInTesseract = checkBoxTesseractItalicsOn.Checked;
if (comboBoxTesseractEngineMode.SelectedIndex != -1)
Configuration.Settings.VobSubOcr.TesseractEngineMode = comboBoxTesseractEngineMode.SelectedIndex;
Configuration.Settings.VobSubOcr.ItalicFactor = _unItalicFactor; Configuration.Settings.VobSubOcr.ItalicFactor = _unItalicFactor;
Configuration.Settings.VobSubOcr.UseModiInTesseractForUnknownWords = checkBoxUseModiInTesseractForUnknownWords.Checked;
Configuration.Settings.VobSubOcr.PromptForUnknownWords = checkBoxPromptForUnknownWords.Checked; Configuration.Settings.VobSubOcr.PromptForUnknownWords = checkBoxPromptForUnknownWords.Checked;
Configuration.Settings.VobSubOcr.GuessUnknownWords = checkBoxGuessUnknownWords.Checked; Configuration.Settings.VobSubOcr.GuessUnknownWords = checkBoxGuessUnknownWords.Checked;
Configuration.Settings.VobSubOcr.AutoBreakSubtitleIfMoreThanTwoLines = checkBoxAutoBreakLines.Checked; Configuration.Settings.VobSubOcr.AutoBreakSubtitleIfMoreThanTwoLines = checkBoxAutoBreakLines.Checked;