mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-22 03:02:35 +01:00
Update Tesseract OCR from 3.02 to 4.0 (alpha)
This commit is contained in:
parent
8361d7664e
commit
696b529c37
@ -21,6 +21,7 @@
|
||||
* Update Romanian translation - thx Mircea
|
||||
* Update Basque translation - thx Xabier
|
||||
* Update Portuguese translation - thx moob
|
||||
* Update Tesseract OCR from 3.02 to 4.0 (alpha)
|
||||
* Ctrl+a/ctrl+d/ctrl+shift+i works in more lists - thx tormento
|
||||
* Remember OCR spell check dictionary for tesseract - thx raymondjpg
|
||||
* FIXED:
|
||||
@ -37,6 +38,8 @@
|
||||
* Do not allow navigating before zero in video - thx darnn
|
||||
* Fix issue with nested tags in "Remove text for HI" - thx darnn
|
||||
* Fix image render issue regarding italic/font - thx Cemal
|
||||
* Fix bottom margin in batch image export - thx Cemal
|
||||
* Fix possible crash in list view - thx lambdacore12
|
||||
|
||||
3.5.6 (27th February 2018)
|
||||
* NEW:
|
||||
|
@ -2318,7 +2318,6 @@ Keep changes?</KeepChangesMessage>
|
||||
<SaveAllSubtitleImagesAsBdnXml>Save all images (png/bdn xml)...</SaveAllSubtitleImagesAsBdnXml>
|
||||
<SaveAllSubtitleImagesWithHtml>Save all images with HTML index...</SaveAllSubtitleImagesWithHtml>
|
||||
<XImagesSavedInY>{0} images saved in {1}</XImagesSavedInY>
|
||||
<TryModiForUnknownWords>Try Microsoft MODI OCR for unknown words</TryModiForUnknownWords>
|
||||
<DictionaryX>Dictionary: {0}</DictionaryX>
|
||||
<RightToLeft>Right to left</RightToLeft>
|
||||
<ShowOnlyForcedSubtitles>Show only forced subtitles</ShowOnlyForcedSubtitles>
|
||||
|
3
Tesseract4/tessdata/configs/hocr
Normal file
3
Tesseract4/tessdata/configs/hocr
Normal file
@ -0,0 +1,3 @@
|
||||
tessedit_create_hocr 1
|
||||
tessedit_pageseg_mode 1
|
||||
hocr_font_info 0
|
BIN
Tesseract4/tessdata/eng.traineddata
Normal file
BIN
Tesseract4/tessdata/eng.traineddata
Normal file
Binary file not shown.
BIN
Tesseract4/tesseract.exe
Normal file
BIN
Tesseract4/tesseract.exe
Normal file
Binary file not shown.
14
build.bat
14
build.bat
@ -91,9 +91,9 @@ PUSHD "src\bin\Release"
|
||||
IF EXIST "temp_zip" RD /S /Q "temp_zip"
|
||||
IF NOT EXIST "temp_zip" MD "temp_zip"
|
||||
IF NOT EXIST "temp_zip\Languages" MD "temp_zip\Languages"
|
||||
IF NOT EXIST "temp_zip\Tesseract" MD "temp_zip\Tesseract"
|
||||
IF NOT EXIST "temp_zip\Tesseract\tessdata" MD "temp_zip\Tesseract\tessdata"
|
||||
IF NOT EXIST "temp_zip\Tesseract\tessdata\configs" MD "temp_zip\Tesseract\tessdata\configs"
|
||||
IF NOT EXIST "temp_zip\Tesseract4" MD "temp_zip\Tesseract4"
|
||||
IF NOT EXIST "temp_zip\Tesseract4\tessdata" MD "temp_zip\Tesseract4\tessdata"
|
||||
IF NOT EXIST "temp_zip\Tesseract4\tessdata\configs" MD "temp_zip\Tesseract4\tessdata\configs"
|
||||
|
||||
COPY /Y /V "..\..\..\LICENSE.txt" "temp_zip\"
|
||||
COPY /Y /V "..\..\..\Changelog.txt" "temp_zip\"
|
||||
@ -101,11 +101,9 @@ COPY /Y /V "Hunspellx86.dll" "temp_zip\"
|
||||
COPY /Y /V "Hunspellx64.dll" "temp_zip\"
|
||||
COPY /Y /V "SubtitleEdit.exe" "temp_zip\"
|
||||
COPY /Y /V "Languages\*.xml" "temp_zip\Languages\"
|
||||
COPY /Y /V "..\..\..\Tesseract\msvcp90.dll" "temp_zip\Tesseract\"
|
||||
COPY /Y /V "..\..\..\Tesseract\msvcr90.dll" "temp_zip\Tesseract\"
|
||||
COPY /Y /V "..\..\..\Tesseract\tesseract.exe" "temp_zip\Tesseract\"
|
||||
COPY /Y /V "..\..\..\Tesseract\tessdata\configs\hocr" "temp_zip\Tesseract\tessdata\configs\"
|
||||
COPY /Y /V "..\..\..\Tesseract\tessdata\*.traineddata" "temp_zip\Tesseract\tessdata\"
|
||||
COPY /Y /V "..\..\..\Tesseract4\tesseract.exe" "temp_zip\Tesseract4\"
|
||||
COPY /Y /V "..\..\..\Tesseract4\tessdata\configs\hocr" "temp_zip\Tesseract4\tessdata\configs\"
|
||||
COPY /Y /V "..\..\..\Tesseract4\tessdata\*.traineddata" "temp_zip\Tesseract4\tessdata\"
|
||||
|
||||
PUSHD "temp_zip"
|
||||
START "" /B /WAIT "%SEVENZIP%" a -tzip -mx=9 "SE%VERSION%.zip" * >NUL
|
||||
|
@ -266,12 +266,9 @@ Source: {#bindir}\SubtitleEdit.exe; DestDir: {app};
|
||||
Source: {#bindir}\SubtitleEdit.resources.dll; DestDir: {app}; Flags: ignoreversion; Components: main
|
||||
Source: ..\Changelog.txt; DestDir: {app}; Flags: ignoreversion; Components: main
|
||||
Source: ..\LICENSE.txt; DestDir: {app}; Flags: ignoreversion; Components: main
|
||||
Source: ..\Tesseract\msvcp90.dll; DestDir: {app}\Tesseract; Flags: ignoreversion; Components: main
|
||||
Source: ..\Tesseract\msvcr90.dll; DestDir: {app}\Tesseract; Flags: ignoreversion; Components: main
|
||||
Source: ..\Tesseract\tessdata\configs\hocr; DestDir: {app}\Tesseract\tessdata\configs; Flags: ignoreversion; Components: main
|
||||
Source: ..\Tesseract\tessdata\eng.traineddata; DestDir: {app}\Tesseract\tessdata; Flags: ignoreversion; Components: main
|
||||
Source: ..\Tesseract\tessdata\music.traineddata; DestDir: {app}\Tesseract\tessdata; Flags: ignoreversion; Components: main
|
||||
Source: ..\Tesseract\tesseract.exe; DestDir: {app}\Tesseract; Flags: ignoreversion; Components: main
|
||||
Source: ..\Tesseract4\tessdata\configs\hocr; DestDir: {app}\Tesseract4\tessdata\configs; Flags: ignoreversion; Components: main
|
||||
Source: ..\Tesseract4\tessdata\eng.traineddata; DestDir: {app}\Tesseract4\tessdata; Flags: ignoreversion; Components: main
|
||||
Source: ..\Tesseract4\tesseract.exe; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main
|
||||
Source: Icons\uninstall.ico; DestDir: {app}\Icons; Flags: ignoreversion; Components: main
|
||||
|
||||
|
||||
|
@ -23,7 +23,7 @@ namespace Nikse.SubtitleEdit.Core
|
||||
public static readonly string SceneChangesDirectory = DataDirectory + "SceneChanges" + Path.DirectorySeparatorChar;
|
||||
public static readonly string AutoBackupDirectory = DataDirectory + "AutoBackup" + Path.DirectorySeparatorChar;
|
||||
public static readonly string VobSubCompareDirectory = DataDirectory + "VobSub" + Path.DirectorySeparatorChar;
|
||||
public static readonly string TesseractDirectory = DataDirectory + "Tesseract" + Path.DirectorySeparatorChar;
|
||||
public static readonly string TesseractDirectory = DataDirectory + "Tesseract4" + Path.DirectorySeparatorChar;
|
||||
public static readonly string WaveformsDirectory = DataDirectory + "Waveforms" + Path.DirectorySeparatorChar;
|
||||
public static readonly string PluginsDirectory = DataDirectory + "Plugins" + Path.DirectorySeparatorChar;
|
||||
public static readonly string IconsDirectory = BaseDirectory + "Icons" + Path.DirectorySeparatorChar;
|
||||
@ -54,21 +54,9 @@ namespace Nikse.SubtitleEdit.Core
|
||||
}
|
||||
|
||||
|
||||
public static Settings Settings
|
||||
{
|
||||
get
|
||||
{
|
||||
return Instance.Value._settings.Value;
|
||||
}
|
||||
}
|
||||
public static Settings Settings => Instance.Value._settings.Value;
|
||||
|
||||
public static IEnumerable<Encoding> AvailableEncodings
|
||||
{
|
||||
get
|
||||
{
|
||||
return Instance.Value._encodings;
|
||||
}
|
||||
}
|
||||
public static IEnumerable<Encoding> AvailableEncodings => Instance.Value._encodings;
|
||||
|
||||
private static string GetInstallerPath()
|
||||
{
|
||||
@ -117,6 +105,7 @@ namespace Nikse.SubtitleEdit.Core
|
||||
}
|
||||
catch
|
||||
{
|
||||
// ignored
|
||||
}
|
||||
}
|
||||
Directory.CreateDirectory(Path.Combine(appDataRoamingPath, "Dictionaries"));
|
||||
|
@ -2649,7 +2649,6 @@ Keep changes?",
|
||||
SaveAllSubtitleImagesAsBdnXml = "Save all images (png/bdn xml)...",
|
||||
SaveAllSubtitleImagesWithHtml = "Save all images with HTML index...",
|
||||
XImagesSavedInY = "{0} images saved in {1}",
|
||||
TryModiForUnknownWords = "Try Microsoft MODI OCR for unknown words",
|
||||
DictionaryX = "Dictionary: {0}",
|
||||
RightToLeft = "Right to left",
|
||||
ShowOnlyForcedSubtitles = "Show only forced subtitles",
|
||||
|
@ -6259,9 +6259,6 @@ namespace Nikse.SubtitleEdit.Core
|
||||
case "VobSubOcr/XImagesSavedInY":
|
||||
language.VobSubOcr.XImagesSavedInY = reader.Value;
|
||||
break;
|
||||
case "VobSubOcr/TryModiForUnknownWords":
|
||||
language.VobSubOcr.TryModiForUnknownWords = reader.Value;
|
||||
break;
|
||||
case "VobSubOcr/DictionaryX":
|
||||
language.VobSubOcr.DictionaryX = reader.Value;
|
||||
break;
|
||||
|
@ -2524,7 +2524,6 @@
|
||||
public string SaveAllSubtitleImagesAsBdnXml { get; set; }
|
||||
public string SaveAllSubtitleImagesWithHtml { get; set; }
|
||||
public string XImagesSavedInY { get; set; }
|
||||
public string TryModiForUnknownWords { get; set; }
|
||||
public string DictionaryX { get; set; }
|
||||
public string RightToLeft { get; set; }
|
||||
public string ShowOnlyForcedSubtitles { get; set; }
|
||||
|
@ -531,6 +531,7 @@
|
||||
<Compile Include="TarHeader.cs" />
|
||||
<Compile Include="TarReader.cs" />
|
||||
<Compile Include="TaskbarList.cs" />
|
||||
<Compile Include="TesseractDictionary.cs" />
|
||||
<Compile Include="TextDraw.cs" />
|
||||
<Compile Include="TextEncodingExtensions.cs" />
|
||||
<Compile Include="TimeCode.cs" />
|
||||
|
@ -865,8 +865,8 @@ namespace Nikse.SubtitleEdit.Core
|
||||
public int LastModiLanguageId { get; set; }
|
||||
public string LastOcrMethod { get; set; }
|
||||
public string TesseractLastLanguage { get; set; }
|
||||
public bool UseModiInTesseractForUnknownWords { get; set; }
|
||||
public bool UseItalicsInTesseract { get; set; }
|
||||
public int TesseractEngineMode { get; set; }
|
||||
public bool UseMusicSymbolsInTesseract { get; set; }
|
||||
public bool RightToLeft { get; set; }
|
||||
public bool TopToBottom { get; set; }
|
||||
@ -2680,12 +2680,12 @@ namespace Nikse.SubtitleEdit.Core
|
||||
subNode = node.SelectSingleNode("TesseractLastLanguage");
|
||||
if (subNode != null)
|
||||
settings.VobSubOcr.TesseractLastLanguage = subNode.InnerText;
|
||||
subNode = node.SelectSingleNode("UseModiInTesseractForUnknownWords");
|
||||
if (subNode != null)
|
||||
settings.VobSubOcr.UseModiInTesseractForUnknownWords = Convert.ToBoolean(subNode.InnerText);
|
||||
subNode = node.SelectSingleNode("UseItalicsInTesseract");
|
||||
if (subNode != null)
|
||||
settings.VobSubOcr.UseItalicsInTesseract = Convert.ToBoolean(subNode.InnerText);
|
||||
subNode = node.SelectSingleNode("TesseractEngineMode");
|
||||
if (subNode != null)
|
||||
settings.VobSubOcr.TesseractEngineMode = Convert.ToInt32(subNode.InnerText);
|
||||
subNode = node.SelectSingleNode("UseMusicSymbolsInTesseract");
|
||||
if (subNode != null)
|
||||
settings.VobSubOcr.UseMusicSymbolsInTesseract = Convert.ToBoolean(subNode.InnerText);
|
||||
@ -3808,8 +3808,8 @@ namespace Nikse.SubtitleEdit.Core
|
||||
textWriter.WriteElementString("LastModiLanguageId", settings.VobSubOcr.LastModiLanguageId.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteElementString("LastOcrMethod", settings.VobSubOcr.LastOcrMethod);
|
||||
textWriter.WriteElementString("TesseractLastLanguage", settings.VobSubOcr.TesseractLastLanguage);
|
||||
textWriter.WriteElementString("UseModiInTesseractForUnknownWords", settings.VobSubOcr.UseModiInTesseractForUnknownWords.ToString());
|
||||
textWriter.WriteElementString("UseItalicsInTesseract", settings.VobSubOcr.UseItalicsInTesseract.ToString());
|
||||
textWriter.WriteElementString("TesseractEngineMode", settings.VobSubOcr.TesseractEngineMode.ToString());
|
||||
textWriter.WriteElementString("UseMusicSymbolsInTesseract", settings.VobSubOcr.UseMusicSymbolsInTesseract.ToString());
|
||||
textWriter.WriteElementString("RightToLeft", settings.VobSubOcr.RightToLeft.ToString());
|
||||
textWriter.WriteElementString("TopToBottom", settings.VobSubOcr.TopToBottom.ToString());
|
||||
|
195
libse/TesseractDictionary.cs
Normal file
195
libse/TesseractDictionary.cs
Normal file
@ -0,0 +1,195 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.Linq;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Core
|
||||
{
|
||||
public class TesseractDictionary
|
||||
{
|
||||
|
||||
private const string DownloadUrlTemplate = "https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/{0}.traineddata";
|
||||
|
||||
/// <summary>
|
||||
/// Dictionaries containing both 3.5 + 4.0 data - see https://github.com/tesseract-ocr/tessdata
|
||||
/// </summary>
|
||||
private static readonly string[] Dictionaries =
|
||||
{
|
||||
"afr",
|
||||
"amh",
|
||||
"ara",
|
||||
"asm",
|
||||
"aze",
|
||||
"aze_cyrl",
|
||||
"bel",
|
||||
"ben",
|
||||
"bod",
|
||||
"bos",
|
||||
"bre",
|
||||
"bul",
|
||||
"cat",
|
||||
"ceb",
|
||||
"ces",
|
||||
"chi_sim",
|
||||
"chi_sim_vert",
|
||||
"chi_tra",
|
||||
"chi_tra_vert",
|
||||
"chr",
|
||||
"cos",
|
||||
"cym",
|
||||
"dan",
|
||||
"deu",
|
||||
"div",
|
||||
"dzo",
|
||||
"ell",
|
||||
"eng",
|
||||
"enm",
|
||||
"epo",
|
||||
"equ",
|
||||
"est",
|
||||
"eus",
|
||||
"fao",
|
||||
"fas",
|
||||
"fil",
|
||||
"fin",
|
||||
"fra",
|
||||
"frk",
|
||||
"frm",
|
||||
"fry",
|
||||
"gla",
|
||||
"gle",
|
||||
"glg",
|
||||
"grc",
|
||||
"guj",
|
||||
"hat",
|
||||
"heb",
|
||||
"hin",
|
||||
"hrv",
|
||||
"hun",
|
||||
"hye",
|
||||
"iku",
|
||||
"ind",
|
||||
"isl",
|
||||
"ita",
|
||||
"jav",
|
||||
"jpn",
|
||||
"jpn_vert",
|
||||
"kan",
|
||||
"kat",
|
||||
"kaz",
|
||||
"khm",
|
||||
"kir",
|
||||
"kor",
|
||||
"kor_vert",
|
||||
"kur",
|
||||
"kur_ara",
|
||||
"lao",
|
||||
"lat",
|
||||
"lav",
|
||||
"lit",
|
||||
"ltz",
|
||||
"mal",
|
||||
"mar",
|
||||
"mkd",
|
||||
"mlt",
|
||||
"mon",
|
||||
"mri",
|
||||
"msa",
|
||||
"mya",
|
||||
"nep",
|
||||
"nld",
|
||||
"nor",
|
||||
"oci",
|
||||
"ori",
|
||||
"osd",
|
||||
"pan",
|
||||
"pol",
|
||||
"por",
|
||||
"pus",
|
||||
"que",
|
||||
"ron",
|
||||
"rus",
|
||||
"san",
|
||||
"sin",
|
||||
"slk",
|
||||
"slv",
|
||||
"snd",
|
||||
"spa",
|
||||
"sqi",
|
||||
"srp",
|
||||
"srp_latn",
|
||||
"sun",
|
||||
"swa",
|
||||
"swe",
|
||||
"syr",
|
||||
"tam",
|
||||
"tat",
|
||||
"tel",
|
||||
"tgk",
|
||||
"tgl",
|
||||
"tha",
|
||||
"tir",
|
||||
"ton",
|
||||
"tur",
|
||||
"uig",
|
||||
"ukr",
|
||||
"urd",
|
||||
"uzb",
|
||||
"uzb_cyrl",
|
||||
"vie",
|
||||
"yid",
|
||||
"yor"
|
||||
};
|
||||
|
||||
public string Code { get; set; }
|
||||
public string Name { get; set; }
|
||||
public string Url { get; set; }
|
||||
|
||||
public static List<TesseractDictionary> List()
|
||||
{
|
||||
var list = new List<TesseractDictionary>();
|
||||
var cultures = CultureInfo.GetCultures(CultureTypes.NeutralCultures);
|
||||
foreach (var dictionary in Dictionaries)
|
||||
{
|
||||
list.Add(new TesseractDictionary
|
||||
{
|
||||
Name = MakeName(dictionary, cultures),
|
||||
Code = dictionary,
|
||||
Url = string.Format(DownloadUrlTemplate, dictionary)
|
||||
});
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
private static string MakeName(string dictionary, CultureInfo[] cultures)
|
||||
{
|
||||
string code = dictionary;
|
||||
string post = string.Empty;
|
||||
var idx = code.IndexOf('_');
|
||||
if (idx > 0)
|
||||
{
|
||||
post = $" ({code.Substring(idx).Trim('_')})";
|
||||
code = code.Substring(0, idx).Trim('_');
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var cultureInfo = cultures.FirstOrDefault(ci => string.Equals(ci.ThreeLetterISOLanguageName, code, StringComparison.OrdinalIgnoreCase));
|
||||
if (cultureInfo != null)
|
||||
code = cultureInfo.EnglishName;
|
||||
}
|
||||
catch
|
||||
{
|
||||
// ignore
|
||||
}
|
||||
|
||||
return code + post;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return Name;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
2
src/Forms/GetTesseractDictionaries.Designer.cs
generated
2
src/Forms/GetTesseractDictionaries.Designer.cs
generated
@ -1,6 +1,6 @@
|
||||
namespace Nikse.SubtitleEdit.Forms
|
||||
{
|
||||
partial class GetTesseractDictionaries
|
||||
sealed partial class GetTesseractDictionaries
|
||||
{
|
||||
/// <summary>
|
||||
/// Required designer variable.
|
||||
|
@ -6,17 +6,14 @@ using System.IO;
|
||||
using System.IO.Compression;
|
||||
using System.Net;
|
||||
using System.Windows.Forms;
|
||||
using System.Xml;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Forms
|
||||
{
|
||||
public partial class GetTesseractDictionaries : Form
|
||||
public sealed partial class GetTesseractDictionaries : Form
|
||||
{
|
||||
private List<string> _dictionaryDownloadLinks = new List<string>();
|
||||
private List<string> _descriptions = new List<string>();
|
||||
private string _xmlName = null;
|
||||
private string _dictionaryFileName = null;
|
||||
private string _dictionaryFileName;
|
||||
internal string ChosenLanguage { get; private set; }
|
||||
private readonly List<TesseractDictionary> _dictionaries;
|
||||
|
||||
public GetTesseractDictionaries()
|
||||
{
|
||||
@ -31,57 +28,24 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
buttonDownload.Text = Configuration.Settings.Language.GetTesseractDictionaries.Download;
|
||||
labelPleaseWait.Text = string.Empty;
|
||||
buttonOK.Text = Configuration.Settings.Language.General.Ok;
|
||||
LoadDictionaryList("Nikse.SubtitleEdit.Resources.TesseractDictionaries.xml.gz");
|
||||
FixLargeFonts();
|
||||
_dictionaries = TesseractDictionary.List();
|
||||
LoadDictionaryList();
|
||||
}
|
||||
|
||||
private void LoadDictionaryList(string xmlRessourceName)
|
||||
private void LoadDictionaryList()
|
||||
{
|
||||
_dictionaryDownloadLinks = new List<string>();
|
||||
_descriptions = new List<string>();
|
||||
_xmlName = xmlRessourceName;
|
||||
System.Reflection.Assembly asm = System.Reflection.Assembly.GetExecutingAssembly();
|
||||
Stream strm = asm.GetManifestResourceStream(_xmlName);
|
||||
if (strm != null)
|
||||
comboBoxDictionaries.BeginUpdate();
|
||||
comboBoxDictionaries.Items.Clear();
|
||||
foreach (var d in _dictionaries)
|
||||
{
|
||||
comboBoxDictionaries.Items.Clear();
|
||||
XmlDocument doc = new XmlDocument();
|
||||
using (var rdr = new StreamReader(strm))
|
||||
using (var zip = new GZipStream(rdr.BaseStream, CompressionMode.Decompress))
|
||||
if (!string.IsNullOrEmpty(d.Url))
|
||||
{
|
||||
byte[] data = new byte[195000];
|
||||
int bytesRead = zip.Read(data, 0, data.Length);
|
||||
var s = System.Text.Encoding.UTF8.GetString(data, 0, bytesRead).Trim();
|
||||
try
|
||||
{
|
||||
doc.LoadXml(s);
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
MessageBox.Show(exception.Message);
|
||||
}
|
||||
}
|
||||
|
||||
foreach (XmlNode node in doc.DocumentElement.SelectNodes("Dictionary"))
|
||||
{
|
||||
string englishName = node.SelectSingleNode("EnglishName").InnerText;
|
||||
string downloadLink = node.SelectSingleNode("DownloadLink").InnerText;
|
||||
|
||||
string description = string.Empty;
|
||||
if (node.SelectSingleNode("Description") != null)
|
||||
description = node.SelectSingleNode("Description").InnerText;
|
||||
|
||||
if (!string.IsNullOrEmpty(downloadLink))
|
||||
{
|
||||
string name = englishName;
|
||||
|
||||
comboBoxDictionaries.Items.Add(name);
|
||||
_dictionaryDownloadLinks.Add(downloadLink);
|
||||
_descriptions.Add(description);
|
||||
}
|
||||
comboBoxDictionaries.SelectedIndex = 0;
|
||||
comboBoxDictionaries.Items.Add(d);
|
||||
}
|
||||
}
|
||||
comboBoxDictionaries.SelectedIndex = 0;
|
||||
comboBoxDictionaries.EndUpdate();
|
||||
comboBoxDictionaries.AutoCompleteSource = AutoCompleteSource.ListItems;
|
||||
comboBoxDictionaries.AutoCompleteMode = AutoCompleteMode.Append;
|
||||
}
|
||||
@ -105,7 +69,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
Cursor = Cursors.WaitCursor;
|
||||
|
||||
int index = comboBoxDictionaries.SelectedIndex;
|
||||
string url = _dictionaryDownloadLinks[index];
|
||||
string url = _dictionaries[index].Url;
|
||||
ChosenLanguage = comboBoxDictionaries.Items[index].ToString();
|
||||
|
||||
var wc = new WebClient { Proxy = Utilities.GetProxy() };
|
||||
|
529
src/Forms/Ocr/VobSubOcr.Designer.cs
generated
529
src/Forms/Ocr/VobSubOcr.Designer.cs
generated
@ -39,6 +39,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
this.nOcrTrainingToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
|
||||
this.toolStripSeparator4 = new System.Windows.Forms.ToolStripSeparator();
|
||||
this.toolStripMenuItemSetUnItalicFactor = new System.Windows.Forms.ToolStripMenuItem();
|
||||
this.setForecolorThresholdToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
|
||||
this.toolStripSeparator3 = new System.Windows.Forms.ToolStripSeparator();
|
||||
this.deleteToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
|
||||
this.labelSubtitleText = new System.Windows.Forms.Label();
|
||||
@ -48,23 +49,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
this.buttonCancel = new System.Windows.Forms.Button();
|
||||
this.groupBoxOcrMethod = new System.Windows.Forms.GroupBox();
|
||||
this.comboBoxOcrMethod = new System.Windows.Forms.ComboBox();
|
||||
this.groupBoxImageCompareMethod = new System.Windows.Forms.GroupBox();
|
||||
this.labelMinLineSplitHeight = new System.Windows.Forms.Label();
|
||||
this.comboBoxLineSplitMinLineHeight = new System.Windows.Forms.ComboBox();
|
||||
this.labelMaxErrorPercent = new System.Windows.Forms.Label();
|
||||
this.numericUpDownMaxErrorPct = new System.Windows.Forms.NumericUpDown();
|
||||
this.checkBoxRightToLeft = new System.Windows.Forms.CheckBox();
|
||||
this.numericUpDownPixelsIsSpace = new System.Windows.Forms.NumericUpDown();
|
||||
this.buttonEditCharacterDatabase = new System.Windows.Forms.Button();
|
||||
this.labelNoOfPixelsIsSpace = new System.Windows.Forms.Label();
|
||||
this.comboBoxCharacterDatabase = new System.Windows.Forms.ComboBox();
|
||||
this.labelImageDatabase = new System.Windows.Forms.Label();
|
||||
this.buttonNewCharacterDatabase = new System.Windows.Forms.Button();
|
||||
this.GroupBoxTesseractMethod = new System.Windows.Forms.GroupBox();
|
||||
this.buttonGetTesseractDictionaries = new System.Windows.Forms.Button();
|
||||
this.checkBoxTesseractMusicOn = new System.Windows.Forms.CheckBox();
|
||||
this.checkBoxTesseractItalicsOn = new System.Windows.Forms.CheckBox();
|
||||
this.checkBoxUseModiInTesseractForUnknownWords = new System.Windows.Forms.CheckBox();
|
||||
this.labelTesseractLanguage = new System.Windows.Forms.Label();
|
||||
this.comboBoxTesseractLanguages = new System.Windows.Forms.ComboBox();
|
||||
this.groupBoxModiMethod = new System.Windows.Forms.GroupBox();
|
||||
@ -80,6 +68,18 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
this.checkBoxRightToLeftNOCR = new System.Windows.Forms.CheckBox();
|
||||
this.numericUpDownNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.NumericUpDown();
|
||||
this.labelNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.Label();
|
||||
this.groupBoxImageCompareMethod = new System.Windows.Forms.GroupBox();
|
||||
this.labelMinLineSplitHeight = new System.Windows.Forms.Label();
|
||||
this.comboBoxLineSplitMinLineHeight = new System.Windows.Forms.ComboBox();
|
||||
this.labelMaxErrorPercent = new System.Windows.Forms.Label();
|
||||
this.numericUpDownMaxErrorPct = new System.Windows.Forms.NumericUpDown();
|
||||
this.checkBoxRightToLeft = new System.Windows.Forms.CheckBox();
|
||||
this.numericUpDownPixelsIsSpace = new System.Windows.Forms.NumericUpDown();
|
||||
this.buttonEditCharacterDatabase = new System.Windows.Forms.Button();
|
||||
this.labelNoOfPixelsIsSpace = new System.Windows.Forms.Label();
|
||||
this.comboBoxCharacterDatabase = new System.Windows.Forms.ComboBox();
|
||||
this.labelImageDatabase = new System.Windows.Forms.Label();
|
||||
this.buttonNewCharacterDatabase = new System.Windows.Forms.Button();
|
||||
this.groupBoxOCRControls = new System.Windows.Forms.GroupBox();
|
||||
this.labelStartFrom = new System.Windows.Forms.Label();
|
||||
this.numericUpDownStartNumber = new System.Windows.Forms.NumericUpDown();
|
||||
@ -141,16 +141,17 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
this.textBoxCurrentText = new Nikse.SubtitleEdit.Controls.SETextBox();
|
||||
this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView();
|
||||
this.timerHideStatus = new System.Windows.Forms.Timer(this.components);
|
||||
this.setForecolorThresholdToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
|
||||
this.comboBoxTesseractEngineMode = new System.Windows.Forms.ComboBox();
|
||||
this.labelTesseractEngineMode = new System.Windows.Forms.Label();
|
||||
this.contextMenuStripListview.SuspendLayout();
|
||||
this.groupBoxOcrMethod.SuspendLayout();
|
||||
this.groupBoxImageCompareMethod.SuspendLayout();
|
||||
((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).BeginInit();
|
||||
((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).BeginInit();
|
||||
this.GroupBoxTesseractMethod.SuspendLayout();
|
||||
this.groupBoxModiMethod.SuspendLayout();
|
||||
this.groupBoxNOCR.SuspendLayout();
|
||||
((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).BeginInit();
|
||||
this.groupBoxImageCompareMethod.SuspendLayout();
|
||||
((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).BeginInit();
|
||||
((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).BeginInit();
|
||||
this.groupBoxOCRControls.SuspendLayout();
|
||||
((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).BeginInit();
|
||||
this.groupBoxOcrAutoFix.SuspendLayout();
|
||||
@ -200,7 +201,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
this.toolStripSeparator3,
|
||||
this.deleteToolStripMenuItem});
|
||||
this.contextMenuStripListview.Name = "contextMenuStripListview";
|
||||
this.contextMenuStripListview.Size = new System.Drawing.Size(306, 364);
|
||||
this.contextMenuStripListview.Size = new System.Drawing.Size(306, 342);
|
||||
this.contextMenuStripListview.Opening += new System.ComponentModel.CancelEventHandler(this.ContextMenuStripListviewOpening);
|
||||
//
|
||||
// normalToolStripMenuItem
|
||||
@ -339,6 +340,13 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
this.toolStripMenuItemSetUnItalicFactor.Text = "Set un-italic factor...";
|
||||
this.toolStripMenuItemSetUnItalicFactor.Click += new System.EventHandler(this.toolStripMenuItemSetUnItalicFactor_Click);
|
||||
//
|
||||
// setForecolorThresholdToolStripMenuItem
|
||||
//
|
||||
this.setForecolorThresholdToolStripMenuItem.Name = "setForecolorThresholdToolStripMenuItem";
|
||||
this.setForecolorThresholdToolStripMenuItem.Size = new System.Drawing.Size(305, 22);
|
||||
this.setForecolorThresholdToolStripMenuItem.Text = "Set forecolor threshold...";
|
||||
this.setForecolorThresholdToolStripMenuItem.Click += new System.EventHandler(this.setForecolorThresholdToolStripMenuItem_Click);
|
||||
//
|
||||
// toolStripSeparator3
|
||||
//
|
||||
this.toolStripSeparator3.Name = "toolStripSeparator3";
|
||||
@ -406,10 +414,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
// groupBoxOcrMethod
|
||||
//
|
||||
this.groupBoxOcrMethod.Controls.Add(this.comboBoxOcrMethod);
|
||||
this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod);
|
||||
this.groupBoxOcrMethod.Controls.Add(this.GroupBoxTesseractMethod);
|
||||
this.groupBoxOcrMethod.Controls.Add(this.groupBoxModiMethod);
|
||||
this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR);
|
||||
this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod);
|
||||
this.groupBoxOcrMethod.Location = new System.Drawing.Point(13, 5);
|
||||
this.groupBoxOcrMethod.Name = "groupBoxOcrMethod";
|
||||
this.groupBoxOcrMethod.Size = new System.Drawing.Size(392, 192);
|
||||
@ -432,6 +440,223 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
this.comboBoxOcrMethod.TabIndex = 0;
|
||||
this.comboBoxOcrMethod.SelectedIndexChanged += new System.EventHandler(this.ComboBoxOcrMethodSelectedIndexChanged);
|
||||
//
|
||||
// GroupBoxTesseractMethod
|
||||
//
|
||||
this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractEngineMode);
|
||||
this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractEngineMode);
|
||||
this.GroupBoxTesseractMethod.Controls.Add(this.buttonGetTesseractDictionaries);
|
||||
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractMusicOn);
|
||||
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractItalicsOn);
|
||||
this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractLanguage);
|
||||
this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractLanguages);
|
||||
this.GroupBoxTesseractMethod.Location = new System.Drawing.Point(13, 31);
|
||||
this.GroupBoxTesseractMethod.Name = "GroupBoxTesseractMethod";
|
||||
this.GroupBoxTesseractMethod.Size = new System.Drawing.Size(366, 131);
|
||||
this.GroupBoxTesseractMethod.TabIndex = 1;
|
||||
this.GroupBoxTesseractMethod.TabStop = false;
|
||||
this.GroupBoxTesseractMethod.Text = "Tesseract";
|
||||
//
|
||||
// buttonGetTesseractDictionaries
|
||||
//
|
||||
this.buttonGetTesseractDictionaries.Location = new System.Drawing.Point(300, 28);
|
||||
this.buttonGetTesseractDictionaries.Name = "buttonGetTesseractDictionaries";
|
||||
this.buttonGetTesseractDictionaries.Size = new System.Drawing.Size(29, 23);
|
||||
this.buttonGetTesseractDictionaries.TabIndex = 2;
|
||||
this.buttonGetTesseractDictionaries.Text = "...";
|
||||
this.buttonGetTesseractDictionaries.UseVisualStyleBackColor = true;
|
||||
this.buttonGetTesseractDictionaries.Click += new System.EventHandler(this.buttonGetTesseractDictionaries_Click);
|
||||
//
|
||||
// checkBoxTesseractMusicOn
|
||||
//
|
||||
this.checkBoxTesseractMusicOn.AutoSize = true;
|
||||
this.checkBoxTesseractMusicOn.Checked = true;
|
||||
this.checkBoxTesseractMusicOn.CheckState = System.Windows.Forms.CheckState.Checked;
|
||||
this.checkBoxTesseractMusicOn.Location = new System.Drawing.Point(99, 69);
|
||||
this.checkBoxTesseractMusicOn.Name = "checkBoxTesseractMusicOn";
|
||||
this.checkBoxTesseractMusicOn.Size = new System.Drawing.Size(93, 17);
|
||||
this.checkBoxTesseractMusicOn.TabIndex = 4;
|
||||
this.checkBoxTesseractMusicOn.Text = "Music symbols";
|
||||
this.checkBoxTesseractMusicOn.UseVisualStyleBackColor = true;
|
||||
//
|
||||
// checkBoxTesseractItalicsOn
|
||||
//
|
||||
this.checkBoxTesseractItalicsOn.AutoSize = true;
|
||||
this.checkBoxTesseractItalicsOn.Checked = true;
|
||||
this.checkBoxTesseractItalicsOn.CheckState = System.Windows.Forms.CheckState.Checked;
|
||||
this.checkBoxTesseractItalicsOn.Location = new System.Drawing.Point(22, 69);
|
||||
this.checkBoxTesseractItalicsOn.Name = "checkBoxTesseractItalicsOn";
|
||||
this.checkBoxTesseractItalicsOn.Size = new System.Drawing.Size(54, 17);
|
||||
this.checkBoxTesseractItalicsOn.TabIndex = 3;
|
||||
this.checkBoxTesseractItalicsOn.Text = "Italics";
|
||||
this.checkBoxTesseractItalicsOn.UseVisualStyleBackColor = true;
|
||||
//
|
||||
// labelTesseractLanguage
|
||||
//
|
||||
this.labelTesseractLanguage.AutoSize = true;
|
||||
this.labelTesseractLanguage.Location = new System.Drawing.Point(18, 32);
|
||||
this.labelTesseractLanguage.Name = "labelTesseractLanguage";
|
||||
this.labelTesseractLanguage.Size = new System.Drawing.Size(54, 13);
|
||||
this.labelTesseractLanguage.TabIndex = 0;
|
||||
this.labelTesseractLanguage.Text = "Language";
|
||||
//
|
||||
// comboBoxTesseractLanguages
|
||||
//
|
||||
this.comboBoxTesseractLanguages.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
|
||||
this.comboBoxTesseractLanguages.FormattingEnabled = true;
|
||||
this.comboBoxTesseractLanguages.Location = new System.Drawing.Point(99, 29);
|
||||
this.comboBoxTesseractLanguages.Name = "comboBoxTesseractLanguages";
|
||||
this.comboBoxTesseractLanguages.Size = new System.Drawing.Size(195, 21);
|
||||
this.comboBoxTesseractLanguages.TabIndex = 1;
|
||||
this.comboBoxTesseractLanguages.SelectedIndexChanged += new System.EventHandler(this.ComboBoxTesseractLanguagesSelectedIndexChanged);
|
||||
//
|
||||
// groupBoxModiMethod
|
||||
//
|
||||
this.groupBoxModiMethod.Controls.Add(this.label1);
|
||||
this.groupBoxModiMethod.Controls.Add(this.comboBoxModiLanguage);
|
||||
this.groupBoxModiMethod.Location = new System.Drawing.Point(7, 50);
|
||||
this.groupBoxModiMethod.Name = "groupBoxModiMethod";
|
||||
this.groupBoxModiMethod.Size = new System.Drawing.Size(366, 131);
|
||||
this.groupBoxModiMethod.TabIndex = 3;
|
||||
this.groupBoxModiMethod.TabStop = false;
|
||||
this.groupBoxModiMethod.Text = "MODI";
|
||||
//
|
||||
// label1
|
||||
//
|
||||
this.label1.AutoSize = true;
|
||||
this.label1.Location = new System.Drawing.Point(11, 58);
|
||||
this.label1.Name = "label1";
|
||||
this.label1.Size = new System.Drawing.Size(54, 13);
|
||||
this.label1.TabIndex = 33;
|
||||
this.label1.Text = "Language";
|
||||
//
|
||||
// comboBoxModiLanguage
|
||||
//
|
||||
this.comboBoxModiLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
|
||||
this.comboBoxModiLanguage.FormattingEnabled = true;
|
||||
this.comboBoxModiLanguage.Location = new System.Drawing.Point(108, 55);
|
||||
this.comboBoxModiLanguage.Name = "comboBoxModiLanguage";
|
||||
this.comboBoxModiLanguage.Size = new System.Drawing.Size(195, 21);
|
||||
this.comboBoxModiLanguage.TabIndex = 0;
|
||||
this.comboBoxModiLanguage.SelectedIndexChanged += new System.EventHandler(this.ComboBoxModiLanguageSelectedIndexChanged);
|
||||
//
|
||||
// groupBoxNOCR
|
||||
//
|
||||
this.groupBoxNOCR.Controls.Add(this.buttonLineOcrEditLanguage);
|
||||
this.groupBoxNOCR.Controls.Add(this.buttonLineOcrNewLanguage);
|
||||
this.groupBoxNOCR.Controls.Add(this.label2);
|
||||
this.groupBoxNOCR.Controls.Add(this.comboBoxNOcrLanguage);
|
||||
this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrItalic);
|
||||
this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrCorrect);
|
||||
this.groupBoxNOCR.Controls.Add(this.checkBoxRightToLeftNOCR);
|
||||
this.groupBoxNOCR.Controls.Add(this.numericUpDownNumberOfPixelsIsSpaceNOCR);
|
||||
this.groupBoxNOCR.Controls.Add(this.labelNumberOfPixelsIsSpaceNOCR);
|
||||
this.groupBoxNOCR.Location = new System.Drawing.Point(7, 38);
|
||||
this.groupBoxNOCR.Name = "groupBoxNOCR";
|
||||
this.groupBoxNOCR.Size = new System.Drawing.Size(366, 131);
|
||||
this.groupBoxNOCR.TabIndex = 7;
|
||||
this.groupBoxNOCR.TabStop = false;
|
||||
this.groupBoxNOCR.Text = "nOCR";
|
||||
//
|
||||
// buttonLineOcrEditLanguage
|
||||
//
|
||||
this.buttonLineOcrEditLanguage.Location = new System.Drawing.Point(210, 97);
|
||||
this.buttonLineOcrEditLanguage.Name = "buttonLineOcrEditLanguage";
|
||||
this.buttonLineOcrEditLanguage.Size = new System.Drawing.Size(68, 21);
|
||||
this.buttonLineOcrEditLanguage.TabIndex = 41;
|
||||
this.buttonLineOcrEditLanguage.Text = "Edit";
|
||||
this.buttonLineOcrEditLanguage.UseVisualStyleBackColor = true;
|
||||
this.buttonLineOcrEditLanguage.Click += new System.EventHandler(this.buttonLineOcrEditLanguage_Click);
|
||||
//
|
||||
// buttonLineOcrNewLanguage
|
||||
//
|
||||
this.buttonLineOcrNewLanguage.Location = new System.Drawing.Point(283, 97);
|
||||
this.buttonLineOcrNewLanguage.Name = "buttonLineOcrNewLanguage";
|
||||
this.buttonLineOcrNewLanguage.Size = new System.Drawing.Size(68, 21);
|
||||
this.buttonLineOcrNewLanguage.TabIndex = 40;
|
||||
this.buttonLineOcrNewLanguage.Text = "New";
|
||||
this.buttonLineOcrNewLanguage.UseVisualStyleBackColor = true;
|
||||
this.buttonLineOcrNewLanguage.Click += new System.EventHandler(this.buttonLineOcrNewLanguage_Click);
|
||||
//
|
||||
// label2
|
||||
//
|
||||
this.label2.AutoSize = true;
|
||||
this.label2.Location = new System.Drawing.Point(11, 101);
|
||||
this.label2.Name = "label2";
|
||||
this.label2.Size = new System.Drawing.Size(54, 13);
|
||||
this.label2.TabIndex = 35;
|
||||
this.label2.Text = "Language";
|
||||
//
|
||||
// comboBoxNOcrLanguage
|
||||
//
|
||||
this.comboBoxNOcrLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
|
||||
this.comboBoxNOcrLanguage.FormattingEnabled = true;
|
||||
this.comboBoxNOcrLanguage.Location = new System.Drawing.Point(74, 97);
|
||||
this.comboBoxNOcrLanguage.Name = "comboBoxNOcrLanguage";
|
||||
this.comboBoxNOcrLanguage.Size = new System.Drawing.Size(130, 21);
|
||||
this.comboBoxNOcrLanguage.TabIndex = 34;
|
||||
this.comboBoxNOcrLanguage.SelectedIndexChanged += new System.EventHandler(this.comboBoxNOcrLanguage_SelectedIndexChanged);
|
||||
//
|
||||
// checkBoxNOcrItalic
|
||||
//
|
||||
this.checkBoxNOcrItalic.AutoSize = true;
|
||||
this.checkBoxNOcrItalic.Location = new System.Drawing.Point(15, 42);
|
||||
this.checkBoxNOcrItalic.Name = "checkBoxNOcrItalic";
|
||||
this.checkBoxNOcrItalic.Size = new System.Drawing.Size(92, 17);
|
||||
this.checkBoxNOcrItalic.TabIndex = 8;
|
||||
this.checkBoxNOcrItalic.Text = "Contains italic";
|
||||
this.checkBoxNOcrItalic.UseVisualStyleBackColor = true;
|
||||
//
|
||||
// checkBoxNOcrCorrect
|
||||
//
|
||||
this.checkBoxNOcrCorrect.AutoSize = true;
|
||||
this.checkBoxNOcrCorrect.Location = new System.Drawing.Point(235, 17);
|
||||
this.checkBoxNOcrCorrect.Name = "checkBoxNOcrCorrect";
|
||||
this.checkBoxNOcrCorrect.Size = new System.Drawing.Size(116, 17);
|
||||
this.checkBoxNOcrCorrect.TabIndex = 7;
|
||||
this.checkBoxNOcrCorrect.Text = "Draw missing texts";
|
||||
this.checkBoxNOcrCorrect.UseVisualStyleBackColor = true;
|
||||
//
|
||||
// checkBoxRightToLeftNOCR
|
||||
//
|
||||
this.checkBoxRightToLeftNOCR.AutoSize = true;
|
||||
this.checkBoxRightToLeftNOCR.Location = new System.Drawing.Point(235, 40);
|
||||
this.checkBoxRightToLeftNOCR.Name = "checkBoxRightToLeftNOCR";
|
||||
this.checkBoxRightToLeftNOCR.Size = new System.Drawing.Size(83, 17);
|
||||
this.checkBoxRightToLeftNOCR.TabIndex = 6;
|
||||
this.checkBoxRightToLeftNOCR.Text = "Right to left";
|
||||
this.checkBoxRightToLeftNOCR.UseVisualStyleBackColor = true;
|
||||
//
|
||||
// numericUpDownNumberOfPixelsIsSpaceNOCR
|
||||
//
|
||||
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(122, 17);
|
||||
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Maximum = new decimal(new int[] {
|
||||
50,
|
||||
0,
|
||||
0,
|
||||
0});
|
||||
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Minimum = new decimal(new int[] {
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0});
|
||||
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Name = "numericUpDownNumberOfPixelsIsSpaceNOCR";
|
||||
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(50, 21);
|
||||
this.numericUpDownNumberOfPixelsIsSpaceNOCR.TabIndex = 5;
|
||||
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Value = new decimal(new int[] {
|
||||
12,
|
||||
0,
|
||||
0,
|
||||
0});
|
||||
//
|
||||
// labelNumberOfPixelsIsSpaceNOCR
|
||||
//
|
||||
this.labelNumberOfPixelsIsSpaceNOCR.AutoSize = true;
|
||||
this.labelNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(12, 20);
|
||||
this.labelNumberOfPixelsIsSpaceNOCR.Name = "labelNumberOfPixelsIsSpaceNOCR";
|
||||
this.labelNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(104, 13);
|
||||
this.labelNumberOfPixelsIsSpaceNOCR.TabIndex = 4;
|
||||
this.labelNumberOfPixelsIsSpaceNOCR.Text = "No of pixels is space";
|
||||
//
|
||||
// groupBoxImageCompareMethod
|
||||
//
|
||||
this.groupBoxImageCompareMethod.Controls.Add(this.labelMinLineSplitHeight);
|
||||
@ -660,235 +885,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
this.buttonNewCharacterDatabase.UseVisualStyleBackColor = true;
|
||||
this.buttonNewCharacterDatabase.Click += new System.EventHandler(this.ButtonNewCharacterDatabaseClick);
|
||||
//
|
||||
// GroupBoxTesseractMethod
|
||||
//
|
||||
this.GroupBoxTesseractMethod.Controls.Add(this.buttonGetTesseractDictionaries);
|
||||
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractMusicOn);
|
||||
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractItalicsOn);
|
||||
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxUseModiInTesseractForUnknownWords);
|
||||
this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractLanguage);
|
||||
this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractLanguages);
|
||||
this.GroupBoxTesseractMethod.Location = new System.Drawing.Point(13, 31);
|
||||
this.GroupBoxTesseractMethod.Name = "GroupBoxTesseractMethod";
|
||||
this.GroupBoxTesseractMethod.Size = new System.Drawing.Size(366, 131);
|
||||
this.GroupBoxTesseractMethod.TabIndex = 1;
|
||||
this.GroupBoxTesseractMethod.TabStop = false;
|
||||
this.GroupBoxTesseractMethod.Text = "Tesseract";
|
||||
//
|
||||
// buttonGetTesseractDictionaries
|
||||
//
|
||||
this.buttonGetTesseractDictionaries.Location = new System.Drawing.Point(300, 30);
|
||||
this.buttonGetTesseractDictionaries.Name = "buttonGetTesseractDictionaries";
|
||||
this.buttonGetTesseractDictionaries.Size = new System.Drawing.Size(29, 23);
|
||||
this.buttonGetTesseractDictionaries.TabIndex = 2;
|
||||
this.buttonGetTesseractDictionaries.Text = "...";
|
||||
this.buttonGetTesseractDictionaries.UseVisualStyleBackColor = true;
|
||||
this.buttonGetTesseractDictionaries.Click += new System.EventHandler(this.buttonGetTesseractDictionaries_Click);
|
||||
//
|
||||
// checkBoxTesseractMusicOn
|
||||
//
|
||||
this.checkBoxTesseractMusicOn.AutoSize = true;
|
||||
this.checkBoxTesseractMusicOn.Checked = true;
|
||||
this.checkBoxTesseractMusicOn.CheckState = System.Windows.Forms.CheckState.Checked;
|
||||
this.checkBoxTesseractMusicOn.Location = new System.Drawing.Point(99, 101);
|
||||
this.checkBoxTesseractMusicOn.Name = "checkBoxTesseractMusicOn";
|
||||
this.checkBoxTesseractMusicOn.Size = new System.Drawing.Size(93, 17);
|
||||
this.checkBoxTesseractMusicOn.TabIndex = 4;
|
||||
this.checkBoxTesseractMusicOn.Text = "Music symbols";
|
||||
this.checkBoxTesseractMusicOn.UseVisualStyleBackColor = true;
|
||||
//
|
||||
// checkBoxTesseractItalicsOn
|
||||
//
|
||||
this.checkBoxTesseractItalicsOn.AutoSize = true;
|
||||
this.checkBoxTesseractItalicsOn.Checked = true;
|
||||
this.checkBoxTesseractItalicsOn.CheckState = System.Windows.Forms.CheckState.Checked;
|
||||
this.checkBoxTesseractItalicsOn.Location = new System.Drawing.Point(22, 101);
|
||||
this.checkBoxTesseractItalicsOn.Name = "checkBoxTesseractItalicsOn";
|
||||
this.checkBoxTesseractItalicsOn.Size = new System.Drawing.Size(54, 17);
|
||||
this.checkBoxTesseractItalicsOn.TabIndex = 3;
|
||||
this.checkBoxTesseractItalicsOn.Text = "Italics";
|
||||
this.checkBoxTesseractItalicsOn.UseVisualStyleBackColor = true;
|
||||
//
|
||||
// checkBoxUseModiInTesseractForUnknownWords
|
||||
//
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.AutoSize = true;
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.Checked = true;
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.CheckState = System.Windows.Forms.CheckState.Checked;
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.Enabled = false;
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.Location = new System.Drawing.Point(22, 74);
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.Name = "checkBoxUseModiInTesseractForUnknownWords";
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.Size = new System.Drawing.Size(167, 17);
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.TabIndex = 2;
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.Text = "Try MODI for unknown words";
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.UseVisualStyleBackColor = true;
|
||||
//
|
||||
// labelTesseractLanguage
|
||||
//
|
||||
this.labelTesseractLanguage.AutoSize = true;
|
||||
this.labelTesseractLanguage.Location = new System.Drawing.Point(18, 34);
|
||||
this.labelTesseractLanguage.Name = "labelTesseractLanguage";
|
||||
this.labelTesseractLanguage.Size = new System.Drawing.Size(54, 13);
|
||||
this.labelTesseractLanguage.TabIndex = 0;
|
||||
this.labelTesseractLanguage.Text = "Language";
|
||||
//
|
||||
// comboBoxTesseractLanguages
|
||||
//
|
||||
this.comboBoxTesseractLanguages.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
|
||||
this.comboBoxTesseractLanguages.FormattingEnabled = true;
|
||||
this.comboBoxTesseractLanguages.Location = new System.Drawing.Point(99, 31);
|
||||
this.comboBoxTesseractLanguages.Name = "comboBoxTesseractLanguages";
|
||||
this.comboBoxTesseractLanguages.Size = new System.Drawing.Size(195, 21);
|
||||
this.comboBoxTesseractLanguages.TabIndex = 1;
|
||||
this.comboBoxTesseractLanguages.SelectedIndexChanged += new System.EventHandler(this.ComboBoxTesseractLanguagesSelectedIndexChanged);
|
||||
//
|
||||
// groupBoxModiMethod
|
||||
//
|
||||
this.groupBoxModiMethod.Controls.Add(this.label1);
|
||||
this.groupBoxModiMethod.Controls.Add(this.comboBoxModiLanguage);
|
||||
this.groupBoxModiMethod.Location = new System.Drawing.Point(7, 50);
|
||||
this.groupBoxModiMethod.Name = "groupBoxModiMethod";
|
||||
this.groupBoxModiMethod.Size = new System.Drawing.Size(366, 131);
|
||||
this.groupBoxModiMethod.TabIndex = 3;
|
||||
this.groupBoxModiMethod.TabStop = false;
|
||||
this.groupBoxModiMethod.Text = "MODI";
|
||||
//
|
||||
// label1
|
||||
//
|
||||
this.label1.AutoSize = true;
|
||||
this.label1.Location = new System.Drawing.Point(11, 58);
|
||||
this.label1.Name = "label1";
|
||||
this.label1.Size = new System.Drawing.Size(54, 13);
|
||||
this.label1.TabIndex = 33;
|
||||
this.label1.Text = "Language";
|
||||
//
|
||||
// comboBoxModiLanguage
|
||||
//
|
||||
this.comboBoxModiLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
|
||||
this.comboBoxModiLanguage.FormattingEnabled = true;
|
||||
this.comboBoxModiLanguage.Location = new System.Drawing.Point(108, 55);
|
||||
this.comboBoxModiLanguage.Name = "comboBoxModiLanguage";
|
||||
this.comboBoxModiLanguage.Size = new System.Drawing.Size(195, 21);
|
||||
this.comboBoxModiLanguage.TabIndex = 0;
|
||||
this.comboBoxModiLanguage.SelectedIndexChanged += new System.EventHandler(this.ComboBoxModiLanguageSelectedIndexChanged);
|
||||
//
|
||||
// groupBoxNOCR
|
||||
//
|
||||
this.groupBoxNOCR.Controls.Add(this.buttonLineOcrEditLanguage);
|
||||
this.groupBoxNOCR.Controls.Add(this.buttonLineOcrNewLanguage);
|
||||
this.groupBoxNOCR.Controls.Add(this.label2);
|
||||
this.groupBoxNOCR.Controls.Add(this.comboBoxNOcrLanguage);
|
||||
this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrItalic);
|
||||
this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrCorrect);
|
||||
this.groupBoxNOCR.Controls.Add(this.checkBoxRightToLeftNOCR);
|
||||
this.groupBoxNOCR.Controls.Add(this.numericUpDownNumberOfPixelsIsSpaceNOCR);
|
||||
this.groupBoxNOCR.Controls.Add(this.labelNumberOfPixelsIsSpaceNOCR);
|
||||
this.groupBoxNOCR.Location = new System.Drawing.Point(7, 38);
|
||||
this.groupBoxNOCR.Name = "groupBoxNOCR";
|
||||
this.groupBoxNOCR.Size = new System.Drawing.Size(366, 131);
|
||||
this.groupBoxNOCR.TabIndex = 7;
|
||||
this.groupBoxNOCR.TabStop = false;
|
||||
this.groupBoxNOCR.Text = "nOCR";
|
||||
//
|
||||
// buttonLineOcrEditLanguage
|
||||
//
|
||||
this.buttonLineOcrEditLanguage.Location = new System.Drawing.Point(210, 97);
|
||||
this.buttonLineOcrEditLanguage.Name = "buttonLineOcrEditLanguage";
|
||||
this.buttonLineOcrEditLanguage.Size = new System.Drawing.Size(68, 21);
|
||||
this.buttonLineOcrEditLanguage.TabIndex = 41;
|
||||
this.buttonLineOcrEditLanguage.Text = "Edit";
|
||||
this.buttonLineOcrEditLanguage.UseVisualStyleBackColor = true;
|
||||
this.buttonLineOcrEditLanguage.Click += new System.EventHandler(this.buttonLineOcrEditLanguage_Click);
|
||||
//
|
||||
// buttonLineOcrNewLanguage
|
||||
//
|
||||
this.buttonLineOcrNewLanguage.Location = new System.Drawing.Point(283, 97);
|
||||
this.buttonLineOcrNewLanguage.Name = "buttonLineOcrNewLanguage";
|
||||
this.buttonLineOcrNewLanguage.Size = new System.Drawing.Size(68, 21);
|
||||
this.buttonLineOcrNewLanguage.TabIndex = 40;
|
||||
this.buttonLineOcrNewLanguage.Text = "New";
|
||||
this.buttonLineOcrNewLanguage.UseVisualStyleBackColor = true;
|
||||
this.buttonLineOcrNewLanguage.Click += new System.EventHandler(this.buttonLineOcrNewLanguage_Click);
|
||||
//
|
||||
// label2
|
||||
//
|
||||
this.label2.AutoSize = true;
|
||||
this.label2.Location = new System.Drawing.Point(11, 101);
|
||||
this.label2.Name = "label2";
|
||||
this.label2.Size = new System.Drawing.Size(54, 13);
|
||||
this.label2.TabIndex = 35;
|
||||
this.label2.Text = "Language";
|
||||
//
|
||||
// comboBoxNOcrLanguage
|
||||
//
|
||||
this.comboBoxNOcrLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
|
||||
this.comboBoxNOcrLanguage.FormattingEnabled = true;
|
||||
this.comboBoxNOcrLanguage.Location = new System.Drawing.Point(74, 97);
|
||||
this.comboBoxNOcrLanguage.Name = "comboBoxNOcrLanguage";
|
||||
this.comboBoxNOcrLanguage.Size = new System.Drawing.Size(130, 21);
|
||||
this.comboBoxNOcrLanguage.TabIndex = 34;
|
||||
this.comboBoxNOcrLanguage.SelectedIndexChanged += new System.EventHandler(this.comboBoxNOcrLanguage_SelectedIndexChanged);
|
||||
//
|
||||
// checkBoxNOcrItalic
|
||||
//
|
||||
this.checkBoxNOcrItalic.AutoSize = true;
|
||||
this.checkBoxNOcrItalic.Location = new System.Drawing.Point(15, 42);
|
||||
this.checkBoxNOcrItalic.Name = "checkBoxNOcrItalic";
|
||||
this.checkBoxNOcrItalic.Size = new System.Drawing.Size(92, 17);
|
||||
this.checkBoxNOcrItalic.TabIndex = 8;
|
||||
this.checkBoxNOcrItalic.Text = "Contains italic";
|
||||
this.checkBoxNOcrItalic.UseVisualStyleBackColor = true;
|
||||
//
|
||||
// checkBoxNOcrCorrect
|
||||
//
|
||||
this.checkBoxNOcrCorrect.AutoSize = true;
|
||||
this.checkBoxNOcrCorrect.Location = new System.Drawing.Point(235, 17);
|
||||
this.checkBoxNOcrCorrect.Name = "checkBoxNOcrCorrect";
|
||||
this.checkBoxNOcrCorrect.Size = new System.Drawing.Size(116, 17);
|
||||
this.checkBoxNOcrCorrect.TabIndex = 7;
|
||||
this.checkBoxNOcrCorrect.Text = "Draw missing texts";
|
||||
this.checkBoxNOcrCorrect.UseVisualStyleBackColor = true;
|
||||
//
|
||||
// checkBoxRightToLeftNOCR
|
||||
//
|
||||
this.checkBoxRightToLeftNOCR.AutoSize = true;
|
||||
this.checkBoxRightToLeftNOCR.Location = new System.Drawing.Point(235, 40);
|
||||
this.checkBoxRightToLeftNOCR.Name = "checkBoxRightToLeftNOCR";
|
||||
this.checkBoxRightToLeftNOCR.Size = new System.Drawing.Size(83, 17);
|
||||
this.checkBoxRightToLeftNOCR.TabIndex = 6;
|
||||
this.checkBoxRightToLeftNOCR.Text = "Right to left";
|
||||
this.checkBoxRightToLeftNOCR.UseVisualStyleBackColor = true;
|
||||
//
|
||||
// numericUpDownNumberOfPixelsIsSpaceNOCR
|
||||
//
|
||||
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(122, 17);
|
||||
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Maximum = new decimal(new int[] {
|
||||
50,
|
||||
0,
|
||||
0,
|
||||
0});
|
||||
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Minimum = new decimal(new int[] {
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0});
|
||||
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Name = "numericUpDownNumberOfPixelsIsSpaceNOCR";
|
||||
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(50, 21);
|
||||
this.numericUpDownNumberOfPixelsIsSpaceNOCR.TabIndex = 5;
|
||||
this.numericUpDownNumberOfPixelsIsSpaceNOCR.Value = new decimal(new int[] {
|
||||
12,
|
||||
0,
|
||||
0,
|
||||
0});
|
||||
//
|
||||
// labelNumberOfPixelsIsSpaceNOCR
|
||||
//
|
||||
this.labelNumberOfPixelsIsSpaceNOCR.AutoSize = true;
|
||||
this.labelNumberOfPixelsIsSpaceNOCR.Location = new System.Drawing.Point(12, 20);
|
||||
this.labelNumberOfPixelsIsSpaceNOCR.Name = "labelNumberOfPixelsIsSpaceNOCR";
|
||||
this.labelNumberOfPixelsIsSpaceNOCR.Size = new System.Drawing.Size(104, 13);
|
||||
this.labelNumberOfPixelsIsSpaceNOCR.TabIndex = 4;
|
||||
this.labelNumberOfPixelsIsSpaceNOCR.Text = "No of pixels is space";
|
||||
//
|
||||
// groupBoxOCRControls
|
||||
//
|
||||
this.groupBoxOCRControls.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right)));
|
||||
@ -1595,12 +1591,28 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
this.timerHideStatus.Interval = 2000;
|
||||
this.timerHideStatus.Tick += new System.EventHandler(this.timerHideStatus_Tick);
|
||||
//
|
||||
// setForecolorThresholdToolStripMenuItem
|
||||
// comboBoxTesseractEngineMode
|
||||
//
|
||||
this.setForecolorThresholdToolStripMenuItem.Name = "setForecolorThresholdToolStripMenuItem";
|
||||
this.setForecolorThresholdToolStripMenuItem.Size = new System.Drawing.Size(305, 22);
|
||||
this.setForecolorThresholdToolStripMenuItem.Text = "Set forecolor threshold...";
|
||||
this.setForecolorThresholdToolStripMenuItem.Click += new System.EventHandler(this.setForecolorThresholdToolStripMenuItem_Click);
|
||||
this.comboBoxTesseractEngineMode.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
|
||||
this.comboBoxTesseractEngineMode.FormattingEnabled = true;
|
||||
this.comboBoxTesseractEngineMode.Items.AddRange(new object[] {
|
||||
"Original Tesseract only (can detect italic)",
|
||||
"Neural nets LSTM only ",
|
||||
"Tesseract + LSTM",
|
||||
"Default, based on what is available"});
|
||||
this.comboBoxTesseractEngineMode.Location = new System.Drawing.Point(98, 100);
|
||||
this.comboBoxTesseractEngineMode.Name = "comboBoxTesseractEngineMode";
|
||||
this.comboBoxTesseractEngineMode.Size = new System.Drawing.Size(195, 21);
|
||||
this.comboBoxTesseractEngineMode.TabIndex = 5;
|
||||
//
|
||||
// labelTesseractEngineMode
|
||||
//
|
||||
this.labelTesseractEngineMode.AutoSize = true;
|
||||
this.labelTesseractEngineMode.Location = new System.Drawing.Point(18, 103);
|
||||
this.labelTesseractEngineMode.Name = "labelTesseractEngineMode";
|
||||
this.labelTesseractEngineMode.Size = new System.Drawing.Size(68, 13);
|
||||
this.labelTesseractEngineMode.TabIndex = 6;
|
||||
this.labelTesseractEngineMode.Text = "Engine mode";
|
||||
//
|
||||
// VobSubOcr
|
||||
//
|
||||
@ -1629,10 +1641,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
this.Resize += new System.EventHandler(this.VobSubOcr_Resize);
|
||||
this.contextMenuStripListview.ResumeLayout(false);
|
||||
this.groupBoxOcrMethod.ResumeLayout(false);
|
||||
this.groupBoxImageCompareMethod.ResumeLayout(false);
|
||||
this.groupBoxImageCompareMethod.PerformLayout();
|
||||
((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).EndInit();
|
||||
((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).EndInit();
|
||||
this.GroupBoxTesseractMethod.ResumeLayout(false);
|
||||
this.GroupBoxTesseractMethod.PerformLayout();
|
||||
this.groupBoxModiMethod.ResumeLayout(false);
|
||||
@ -1640,6 +1648,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
this.groupBoxNOCR.ResumeLayout(false);
|
||||
this.groupBoxNOCR.PerformLayout();
|
||||
((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).EndInit();
|
||||
this.groupBoxImageCompareMethod.ResumeLayout(false);
|
||||
this.groupBoxImageCompareMethod.PerformLayout();
|
||||
((System.ComponentModel.ISupportInitialize)(this.numericUpDownMaxErrorPct)).EndInit();
|
||||
((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).EndInit();
|
||||
this.groupBoxOCRControls.ResumeLayout(false);
|
||||
this.groupBoxOCRControls.PerformLayout();
|
||||
((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).EndInit();
|
||||
@ -1710,7 +1722,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
private System.Windows.Forms.Label label1;
|
||||
private System.Windows.Forms.GroupBox groupBoxModiMethod;
|
||||
private System.Windows.Forms.GroupBox GroupBoxTesseractMethod;
|
||||
private System.Windows.Forms.CheckBox checkBoxUseModiInTesseractForUnknownWords;
|
||||
private System.Windows.Forms.Label labelTesseractLanguage;
|
||||
private System.Windows.Forms.ComboBox comboBoxTesseractLanguages;
|
||||
private System.Windows.Forms.ContextMenuStrip contextMenuStripListview;
|
||||
@ -1801,5 +1812,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
private System.Windows.Forms.Label labelMinLineSplitHeight;
|
||||
private System.Windows.Forms.ComboBox comboBoxLineSplitMinLineHeight;
|
||||
private System.Windows.Forms.ToolStripMenuItem setForecolorThresholdToolStripMenuItem;
|
||||
private System.Windows.Forms.Label labelTesseractEngineMode;
|
||||
private System.Windows.Forms.ComboBox comboBoxTesseractEngineMode;
|
||||
}
|
||||
}
|
@ -302,6 +302,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
private string[] _tesseractAsyncStrings;
|
||||
private int _tesseractAsyncIndex;
|
||||
private BackgroundWorker _tesseractThread;
|
||||
private int _tesseractEngineMode;
|
||||
|
||||
private readonly DateTime _windowStartTime = DateTime.Now;
|
||||
private int _linesOcred;
|
||||
@ -426,9 +427,15 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
_ocrMethodImageCompare = 4;
|
||||
}
|
||||
|
||||
checkBoxUseModiInTesseractForUnknownWords.Text = language.TryModiForUnknownWords;
|
||||
checkBoxTesseractItalicsOn.Checked = Configuration.Settings.VobSubOcr.UseItalicsInTesseract;
|
||||
checkBoxTesseractItalicsOn.Text = Configuration.Settings.Language.General.Italic;
|
||||
if (Configuration.Settings.VobSubOcr.TesseractEngineMode >= 0 &&
|
||||
Configuration.Settings.VobSubOcr.TesseractEngineMode < comboBoxTesseractEngineMode.Items.Count)
|
||||
{
|
||||
comboBoxTesseractEngineMode.SelectedIndex = Configuration.Settings.VobSubOcr.TesseractEngineMode;
|
||||
}
|
||||
comboBoxTesseractEngineMode.Left = labelTesseractEngineMode.Left + labelTesseractEngineMode.Width + 5;
|
||||
comboBoxTesseractEngineMode.Width = GroupBoxTesseractMethod.Width - comboBoxTesseractEngineMode.Left - 10;
|
||||
|
||||
checkBoxTesseractMusicOn.Checked = Configuration.Settings.VobSubOcr.UseMusicSymbolsInTesseract;
|
||||
checkBoxTesseractMusicOn.Text = Configuration.Settings.Language.Settings.MusicSymbol;
|
||||
@ -5314,7 +5321,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
|
||||
private void FormVobSubOcr_Shown(object sender, EventArgs e)
|
||||
{
|
||||
checkBoxUseModiInTesseractForUnknownWords.Checked = Configuration.Settings.VobSubOcr.UseModiInTesseractForUnknownWords;
|
||||
if (_mp4List != null)
|
||||
{
|
||||
checkBoxShowOnlyForced.Visible = false;
|
||||
@ -5841,6 +5847,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
|
||||
private void ButtonStartOcrClick(object sender, EventArgs e)
|
||||
{
|
||||
_tesseractEngineMode = comboBoxTesseractEngineMode.SelectedIndex;
|
||||
_isLatinDb = comboBoxCharacterDatabase.SelectedItem != null && comboBoxCharacterDatabase.SelectedItem.ToString().Equals("Latin", StringComparison.Ordinal);
|
||||
Configuration.Settings.VobSubOcr.RightToLeft = checkBoxRightToLeft.Checked;
|
||||
_lastLine = null;
|
||||
@ -6144,11 +6151,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
var nbmp = new NikseBitmap(bmp);
|
||||
nbmp.ReplaceYellowWithWhite(); // optimized replace
|
||||
|
||||
string tempTiffFileName = Path.GetTempPath() + Guid.NewGuid() + ".png";
|
||||
string pngFileName = Path.GetTempPath() + Guid.NewGuid() + ".png";
|
||||
string tempTextFileName;
|
||||
using (var b = nbmp.GetBitmap())
|
||||
{
|
||||
b.Save(tempTiffFileName, System.Drawing.Imaging.ImageFormat.Png);
|
||||
b.Save(pngFileName, System.Drawing.Imaging.ImageFormat.Png);
|
||||
tempTextFileName = Path.GetTempPath() + Guid.NewGuid();
|
||||
}
|
||||
|
||||
@ -6156,10 +6163,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
{
|
||||
process.StartInfo = new ProcessStartInfo(Configuration.TesseractDirectory + "tesseract.exe");
|
||||
process.StartInfo.UseShellExecute = true;
|
||||
process.StartInfo.Arguments = "\"" + tempTiffFileName + "\" \"" + tempTextFileName + "\" -l " + language;
|
||||
|
||||
if (checkBoxTesseractMusicOn.Checked)
|
||||
process.StartInfo.Arguments += "+music";
|
||||
process.StartInfo.Arguments = "\"" + pngFileName + "\" \"" + tempTextFileName + "\" --oem " + _tesseractEngineMode + " -l " + language;
|
||||
|
||||
if (!string.IsNullOrEmpty(psmMode))
|
||||
process.StartInfo.Arguments += " " + psmMode.Trim();
|
||||
@ -6186,11 +6190,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
{
|
||||
if (Configuration.IsRunningOnLinux() || Configuration.IsRunningOnMac())
|
||||
{
|
||||
MessageBox.Show("Unable to start 'Tesseract' - make sure tesseract-ocr 3.x is installed!");
|
||||
MessageBox.Show("Unable to start 'Tesseract' - make sure tesseract-ocr 4.x is installed!");
|
||||
}
|
||||
else
|
||||
{
|
||||
MessageBox.Show("Unable to start 'Tesseract' (" + Configuration.TesseractDirectory + "tesseract.exe) - make sure tesseract-ocr 3.x is installed!");
|
||||
MessageBox.Show("Unable to start 'Tesseract' (" + Configuration.TesseractDirectory + "tesseract.exe) - make sure tesseract-ocr 4.x is installed!");
|
||||
}
|
||||
throw;
|
||||
}
|
||||
@ -6209,7 +6213,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
result = ParseHocr(result);
|
||||
File.Delete(outputFileName);
|
||||
}
|
||||
File.Delete(tempTiffFileName);
|
||||
File.Delete(pngFileName);
|
||||
}
|
||||
catch
|
||||
{
|
||||
@ -6292,7 +6296,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
|
||||
const int badWords = 0;
|
||||
string textWithOutFixes;
|
||||
if (_tesseractAsyncStrings != null && !string.IsNullOrEmpty(_tesseractAsyncStrings[index]))
|
||||
if (!string.IsNullOrEmpty(_tesseractAsyncStrings?[index]))
|
||||
{
|
||||
textWithOutFixes = _tesseractAsyncStrings[index];
|
||||
}
|
||||
@ -6819,53 +6823,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
{
|
||||
_ocrFixEngine.AutoGuessesUsed.Clear();
|
||||
_ocrFixEngine.UnknownWordsFound.Clear();
|
||||
|
||||
if (_modiEnabled && checkBoxUseModiInTesseractForUnknownWords.Checked)
|
||||
{
|
||||
// which is best - modi or Tesseract - we find out here
|
||||
string modiText = CallModi(index);
|
||||
|
||||
if (modiText.Length == 0)
|
||||
modiText = CallModi(index); // retry... strange MODI
|
||||
if (modiText.Length == 0)
|
||||
modiText = CallModi(index); // retry... strange MODI
|
||||
|
||||
if (modiText.Length > 1 &&
|
||||
!modiText.Contains("CD") &&
|
||||
(!modiText.Contains('0') || line.Contains('0')) &&
|
||||
(!modiText.Contains('2') || line.Contains('2')) &&
|
||||
(!modiText.Contains('3') || line.Contains('4')) &&
|
||||
(!modiText.Contains('5') || line.Contains('5')) &&
|
||||
(!modiText.Contains('9') || line.Contains('9')) &&
|
||||
(!modiText.Contains('•') || line.Contains('•')) &&
|
||||
(!modiText.Contains(')') || line.Contains(')')) &&
|
||||
Utilities.CountTagInText(modiText, '(') < 2 && Utilities.CountTagInText(modiText, ')') < 2 &&
|
||||
Utilities.GetNumberOfLines(modiText) < 4)
|
||||
{
|
||||
int modiWordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(modiText, out correctWords);
|
||||
//if (modiWordsNotFound > 0)
|
||||
{
|
||||
string modiTextOcrFixed = modiText;
|
||||
if (checkBoxAutoFixCommonErrors.Checked)
|
||||
modiTextOcrFixed = _ocrFixEngine.FixOcrErrors(modiText, index, _lastLine, false, GetAutoGuessLevel());
|
||||
int modiOcrCorrectedWordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(modiTextOcrFixed, out correctWords);
|
||||
if (modiOcrCorrectedWordsNotFound <= modiWordsNotFound)
|
||||
modiText = modiTextOcrFixed;
|
||||
}
|
||||
|
||||
if (modiWordsNotFound < wordsNotFound || (textWithOutFixes.Length == 1 && modiWordsNotFound == 0))
|
||||
line = modiText; // use the modi OCR'ed text
|
||||
else if (wordsNotFound == modiWordsNotFound && modiText.EndsWith('!') && (line.EndsWith('l') || line.EndsWith('fl')))
|
||||
line = modiText;
|
||||
}
|
||||
|
||||
// take the best option - before OCR fixing, which we do again to save suggestions and prompt for user input
|
||||
line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel());
|
||||
}
|
||||
else
|
||||
{ // fix some error manually (modi not available)
|
||||
line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel());
|
||||
}
|
||||
line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel());
|
||||
}
|
||||
|
||||
if (_ocrFixEngine.Abort)
|
||||
@ -6875,39 +6833,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
//check Tesseract... find an other way to do this...
|
||||
//string tmp = HtmlUtil.RemoveHtmlTags(line).Trim();
|
||||
//if (!tmp.TrimEnd().EndsWith("..."))
|
||||
//{
|
||||
// tmp = tmp.TrimEnd('.').TrimEnd();
|
||||
// if (tmp.Length > 2 && Utilities.LowercaseLetters.Contains(tmp[tmp.Length - 1]))
|
||||
// {
|
||||
// if (_nocrChars == null)
|
||||
// _nocrChars = LoadNOcrForTesseract("Nikse.SubtitleEdit.Resources.nOCR_TesseractHelper.xml.zip");
|
||||
// string text = HtmlUtil.RemoveHtmlTags(NocrFastCheck(bitmap).TrimEnd());
|
||||
// string post = string.Empty;
|
||||
// if (line.EndsWith("</i>"))
|
||||
// {
|
||||
// post = "</i>";
|
||||
// line = line.Remove(line.Length - 4, 4).Trim();
|
||||
// }
|
||||
// if (text.EndsWith('.'))
|
||||
// {
|
||||
// line = line.TrimEnd('.').Trim();
|
||||
// while (text.EndsWith('.') || text.EndsWith(' '))
|
||||
// {
|
||||
// line += text.Substring(text.Length - 1).Trim();
|
||||
// text = text.Remove(text.Length - 1, 1);
|
||||
// }
|
||||
// }
|
||||
// else if (text.EndsWith('l') && text.EndsWith('!') && !text.EndsWith("l!"))
|
||||
// {
|
||||
// line = line.Remove(line.Length - 1, 1) + "!";
|
||||
// }
|
||||
// line += post;
|
||||
// }
|
||||
//}
|
||||
|
||||
// Log used word guesses (via word replace list)
|
||||
foreach (string guess in _ocrFixEngine.AutoGuessesUsed)
|
||||
listBoxLogSuggestions.Items.Add(guess);
|
||||
@ -6938,7 +6863,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
if (textWithOutFixes.Trim() != line.Trim())
|
||||
{
|
||||
_tesseractOcrAutoFixes++;
|
||||
labelFixesMade.Text = string.Format(" - {0}", _tesseractOcrAutoFixes);
|
||||
labelFixesMade.Text = $" - {_tesseractOcrAutoFixes}";
|
||||
LogOcrFix(index, textWithOutFixes, line);
|
||||
}
|
||||
|
||||
@ -6990,9 +6915,20 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
|
||||
private string TesseractResizeAndRetry(Bitmap bitmap)
|
||||
{
|
||||
string result = Tesseract3DoOcrViaExe(ResizeBitmap(bitmap, bitmap.Width * 3, bitmap.Height * 2), _languageId, null);
|
||||
string result;
|
||||
using (var b = ResizeBitmap(bitmap, bitmap.Width * 3, bitmap.Height * 2))
|
||||
{
|
||||
result = Tesseract3DoOcrViaExe(b, _languageId, null);
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(result))
|
||||
result = Tesseract3DoOcrViaExe(ResizeBitmap(bitmap, bitmap.Width * 4, bitmap.Height * 2), _languageId, "-psm 7");
|
||||
{
|
||||
using (var b = ResizeBitmap(bitmap, bitmap.Width * 4, bitmap.Height * 2))
|
||||
{
|
||||
result = Tesseract3DoOcrViaExe(b, _languageId, "-psm 7");
|
||||
}
|
||||
}
|
||||
|
||||
return result.TrimEnd();
|
||||
}
|
||||
|
||||
@ -7108,7 +7044,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
private void InitializeModi()
|
||||
{
|
||||
_modiEnabled = false;
|
||||
checkBoxUseModiInTesseractForUnknownWords.Enabled = false;
|
||||
comboBoxModiLanguage.Enabled = false;
|
||||
try
|
||||
{
|
||||
@ -7119,7 +7054,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
|
||||
_modiEnabled = _modiDoc != null;
|
||||
comboBoxModiLanguage.Enabled = _modiEnabled;
|
||||
checkBoxUseModiInTesseractForUnknownWords.Enabled = _modiEnabled;
|
||||
}
|
||||
catch
|
||||
{
|
||||
@ -7523,17 +7457,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
comboBoxDictionaries_SelectedIndexChanged(null, null);
|
||||
}
|
||||
|
||||
if (_modiEnabled && checkBoxUseModiInTesseractForUnknownWords.Checked)
|
||||
{
|
||||
string tesseractLanguageText = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Text;
|
||||
int i = 0;
|
||||
foreach (var modiLanguage in comboBoxModiLanguage.Items)
|
||||
{
|
||||
if ((modiLanguage as ModiLanguage).Text == tesseractLanguageText)
|
||||
comboBoxModiLanguage.SelectedIndex = i;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
comboBoxModiLanguage.SelectedIndex = -1;
|
||||
}
|
||||
|
||||
@ -8430,8 +8353,9 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
DisposeImageCompareBitmaps();
|
||||
|
||||
Configuration.Settings.VobSubOcr.UseItalicsInTesseract = checkBoxTesseractItalicsOn.Checked;
|
||||
if (comboBoxTesseractEngineMode.SelectedIndex != -1)
|
||||
Configuration.Settings.VobSubOcr.TesseractEngineMode = comboBoxTesseractEngineMode.SelectedIndex;
|
||||
Configuration.Settings.VobSubOcr.ItalicFactor = _unItalicFactor;
|
||||
Configuration.Settings.VobSubOcr.UseModiInTesseractForUnknownWords = checkBoxUseModiInTesseractForUnknownWords.Checked;
|
||||
Configuration.Settings.VobSubOcr.PromptForUnknownWords = checkBoxPromptForUnknownWords.Checked;
|
||||
Configuration.Settings.VobSubOcr.GuessUnknownWords = checkBoxGuessUnknownWords.Checked;
|
||||
Configuration.Settings.VobSubOcr.AutoBreakSubtitleIfMoreThanTwoLines = checkBoxAutoBreakLines.Checked;
|
||||
|
Loading…
Reference in New Issue
Block a user