From 4f69b8d30d406973df7331b5f88827b1af8ae907 Mon Sep 17 00:00:00 2001 From: niksedk Date: Thu, 21 Mar 2013 15:45:19 +0000 Subject: [PATCH] Testing download of Tesseract dictionaries... git-svn-id: https://subtitleedit.googlecode.com/svn/trunk@1754 99eadd0c-20b8-1223-b5c4-2a2b2df33de2 --- src/Forms/GetDictionaries.cs | 2 +- .../GetTesseractDictionaries.Designer.cs | 143 ++++++++ src/Forms/GetTesseractDictionaries.cs | 191 +++++++++++ src/Forms/GetTesseractDictionaries.resx | 120 +++++++ src/Forms/VobSubOcr.Designer.cs | 305 +++++++++--------- src/Forms/VobSubOcr.cs | 31 +- src/Logic/Configuration.cs | 10 +- src/Logic/TarHeader.cs | 40 +++ src/Logic/TarReader.cs | 49 +++ src/Resources/TesseractDictionaries.xml | 151 +++++++++ src/Resources/TesseractDictionaries.xml.zip | Bin 0 -> 845 bytes src/SubtitleEdit.csproj | 15 + 12 files changed, 908 insertions(+), 149 deletions(-) create mode 100644 src/Forms/GetTesseractDictionaries.Designer.cs create mode 100644 src/Forms/GetTesseractDictionaries.cs create mode 100644 src/Forms/GetTesseractDictionaries.resx create mode 100644 src/Logic/TarHeader.cs create mode 100644 src/Logic/TarReader.cs create mode 100644 src/Resources/TesseractDictionaries.xml create mode 100644 src/Resources/TesseractDictionaries.xml.zip diff --git a/src/Forms/GetDictionaries.cs b/src/Forms/GetDictionaries.cs index 3076df47a..a8822ea61 100644 --- a/src/Forms/GetDictionaries.cs +++ b/src/Forms/GetDictionaries.cs @@ -27,7 +27,7 @@ namespace Nikse.SubtitleEdit.Forms labelChooseLanguageAndClickDownload.Text = Configuration.Settings.Language.GetDictionaries.ChooseLanguageAndClickDownload; buttonDownload.Text = Configuration.Settings.Language.GetDictionaries.Download; buttonOK.Text = Configuration.Settings.Language.General.OK; - labelPleaseWait.Text = Configuration.Settings.Language.General.PleaseWait; + labelPleaseWait.Text = string.Empty; LoadDictionaryList("Nikse.SubtitleEdit.Resources.OpenOfficeDictionaries.xml.zip"); FixLargeFonts(); diff --git a/src/Forms/GetTesseractDictionaries.Designer.cs b/src/Forms/GetTesseractDictionaries.Designer.cs new file mode 100644 index 000000000..a560ab89c --- /dev/null +++ b/src/Forms/GetTesseractDictionaries.Designer.cs @@ -0,0 +1,143 @@ +namespace Nikse.SubtitleEdit.Forms +{ + partial class GetTesseractDictionaries + { + /// + /// Required designer variable. + /// + private System.ComponentModel.IContainer components = null; + + /// + /// Clean up any resources being used. + /// + /// true if managed resources should be disposed; otherwise, false. + protected override void Dispose(bool disposing) + { + if (disposing && (components != null)) + { + components.Dispose(); + } + base.Dispose(disposing); + } + + #region Windows Form Designer generated code + + /// + /// Required method for Designer support - do not modify + /// the contents of this method with the code editor. + /// + private void InitializeComponent() + { + this.comboBoxDictionaries = new System.Windows.Forms.ComboBox(); + this.labelPleaseWait = new System.Windows.Forms.Label(); + this.labelChooseLanguageAndClickDownload = new System.Windows.Forms.Label(); + this.buttonDownload = new System.Windows.Forms.Button(); + this.linkLabelOpenDictionaryFolder = new System.Windows.Forms.LinkLabel(); + this.buttonOK = new System.Windows.Forms.Button(); + this.labelDescription1 = new System.Windows.Forms.Label(); + this.SuspendLayout(); + // + // comboBoxDictionaries + // + this.comboBoxDictionaries.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboBoxDictionaries.FormattingEnabled = true; + this.comboBoxDictionaries.Location = new System.Drawing.Point(22, 71); + this.comboBoxDictionaries.Name = "comboBoxDictionaries"; + this.comboBoxDictionaries.Size = new System.Drawing.Size(256, 21); + this.comboBoxDictionaries.TabIndex = 21; + // + // labelPleaseWait + // + this.labelPleaseWait.AutoSize = true; + this.labelPleaseWait.Location = new System.Drawing.Point(19, 99); + this.labelPleaseWait.Name = "labelPleaseWait"; + this.labelPleaseWait.Size = new System.Drawing.Size(70, 13); + this.labelPleaseWait.TabIndex = 24; + this.labelPleaseWait.Text = "Please wait..."; + // + // labelChooseLanguageAndClickDownload + // + this.labelChooseLanguageAndClickDownload.AutoSize = true; + this.labelChooseLanguageAndClickDownload.Location = new System.Drawing.Point(19, 52); + this.labelChooseLanguageAndClickDownload.Name = "labelChooseLanguageAndClickDownload"; + this.labelChooseLanguageAndClickDownload.Size = new System.Drawing.Size(202, 13); + this.labelChooseLanguageAndClickDownload.TabIndex = 23; + this.labelChooseLanguageAndClickDownload.Text = "Choose your languge and click download"; + // + // buttonDownload + // + this.buttonDownload.Location = new System.Drawing.Point(284, 70); + this.buttonDownload.Name = "buttonDownload"; + this.buttonDownload.Size = new System.Drawing.Size(104, 25); + this.buttonDownload.TabIndex = 22; + this.buttonDownload.Text = "&Download"; + this.buttonDownload.UseVisualStyleBackColor = true; + this.buttonDownload.Click += new System.EventHandler(this.buttonDownload_Click); + // + // linkLabelOpenDictionaryFolder + // + this.linkLabelOpenDictionaryFolder.AutoSize = true; + this.linkLabelOpenDictionaryFolder.Location = new System.Drawing.Point(19, 137); + this.linkLabelOpenDictionaryFolder.Name = "linkLabelOpenDictionaryFolder"; + this.linkLabelOpenDictionaryFolder.Size = new System.Drawing.Size(124, 13); + this.linkLabelOpenDictionaryFolder.TabIndex = 25; + this.linkLabelOpenDictionaryFolder.TabStop = true; + this.linkLabelOpenDictionaryFolder.Text = "Open \'Dictionaries\' folder"; + this.linkLabelOpenDictionaryFolder.LinkClicked += new System.Windows.Forms.LinkLabelLinkClickedEventHandler(this.linkLabelOpenDictionaryFolder_LinkClicked); + // + // buttonOK + // + this.buttonOK.DialogResult = System.Windows.Forms.DialogResult.OK; + this.buttonOK.Location = new System.Drawing.Point(284, 132); + this.buttonOK.Name = "buttonOK"; + this.buttonOK.Size = new System.Drawing.Size(104, 23); + this.buttonOK.TabIndex = 26; + this.buttonOK.Text = "&OK"; + this.buttonOK.UseVisualStyleBackColor = true; + // + // labelDescription1 + // + this.labelDescription1.AutoSize = true; + this.labelDescription1.Location = new System.Drawing.Point(19, 18); + this.labelDescription1.Name = "labelDescription1"; + this.labelDescription1.Size = new System.Drawing.Size(220, 13); + this.labelDescription1.TabIndex = 27; + this.labelDescription1.Text = "Get Tesseract OCR dictionaries from the web"; + // + // GetTesseractDictionaries + // + this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); + this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; + this.ClientSize = new System.Drawing.Size(426, 182); + this.Controls.Add(this.labelDescription1); + this.Controls.Add(this.comboBoxDictionaries); + this.Controls.Add(this.labelPleaseWait); + this.Controls.Add(this.labelChooseLanguageAndClickDownload); + this.Controls.Add(this.buttonDownload); + this.Controls.Add(this.linkLabelOpenDictionaryFolder); + this.Controls.Add(this.buttonOK); + this.KeyPreview = true; + this.MaximizeBox = false; + this.MinimizeBox = false; + this.Name = "GetTesseractDictionaries"; + this.ShowIcon = false; + this.ShowInTaskbar = false; + this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent; + this.Text = "GetTesseractDictionaries"; + this.KeyDown += new System.Windows.Forms.KeyEventHandler(this.GetTesseractDictionaries_KeyDown); + this.ResumeLayout(false); + this.PerformLayout(); + + } + + #endregion + + private System.Windows.Forms.ComboBox comboBoxDictionaries; + private System.Windows.Forms.Label labelPleaseWait; + private System.Windows.Forms.Label labelChooseLanguageAndClickDownload; + private System.Windows.Forms.Button buttonDownload; + private System.Windows.Forms.LinkLabel linkLabelOpenDictionaryFolder; + private System.Windows.Forms.Button buttonOK; + private System.Windows.Forms.Label labelDescription1; + } +} \ No newline at end of file diff --git a/src/Forms/GetTesseractDictionaries.cs b/src/Forms/GetTesseractDictionaries.cs new file mode 100644 index 000000000..bbb50f8b8 --- /dev/null +++ b/src/Forms/GetTesseractDictionaries.cs @@ -0,0 +1,191 @@ +using System; +using System.Collections.Generic; +using System.Drawing; +using System.IO; +using System.IO.Compression; +using System.Net; +using System.Windows.Forms; +using System.Xml; +using Nikse.SubtitleEdit.Logic; + +namespace Nikse.SubtitleEdit.Forms +{ + public partial class GetTesseractDictionaries : Form + { + List _dictionaryDownloadLinks = new List(); + List _descriptions = new List(); + string _xmlName = null; + + public GetTesseractDictionaries() + { + InitializeComponent(); + + Text = Configuration.Settings.Language.GetDictionaries.Title; +// labelDescription1.Text = Configuration.Settings.Language.GetDictionaries.DescriptionLine1; + linkLabelOpenDictionaryFolder.Text = Configuration.Settings.Language.GetDictionaries.OpenDictionariesFolder; + labelChooseLanguageAndClickDownload.Text = Configuration.Settings.Language.GetDictionaries.ChooseLanguageAndClickDownload; + buttonDownload.Text = Configuration.Settings.Language.GetDictionaries.Download; + buttonOK.Text = Configuration.Settings.Language.General.OK; + labelPleaseWait.Text = Configuration.Settings.Language.General.PleaseWait; + + LoadDictionaryList("Nikse.SubtitleEdit.Resources.TesseractDictionaries.xml.zip"); + FixLargeFonts(); + } + + private void LoadDictionaryList(string xmlRessourceName) + { + _dictionaryDownloadLinks = new List(); + _descriptions = new List(); + _xmlName = xmlRessourceName; + System.Reflection.Assembly asm = System.Reflection.Assembly.GetExecutingAssembly(); + Stream strm = asm.GetManifestResourceStream(_xmlName); + if (strm != null) + { + comboBoxDictionaries.Items.Clear(); + XmlDocument doc = new XmlDocument(); + var rdr = new StreamReader(strm); + using (var zip = new GZipStream(rdr.BaseStream, CompressionMode.Decompress)) + { + byte[] data = new byte[175000]; + zip.Read(data, 0, 175000); + doc.LoadXml(System.Text.Encoding.UTF8.GetString(data)); + } + rdr.Close(); + + foreach (XmlNode node in doc.DocumentElement.SelectNodes("Dictionary")) + { + string englishName = node.SelectSingleNode("EnglishName").InnerText; + string downloadLink = node.SelectSingleNode("DownloadLink").InnerText; + + string description = string.Empty; + if (node.SelectSingleNode("Description") != null) + description = node.SelectSingleNode("Description").InnerText; + + if (!string.IsNullOrEmpty(downloadLink)) + { + string name = englishName; + + comboBoxDictionaries.Items.Add(name); + _dictionaryDownloadLinks.Add(downloadLink); + _descriptions.Add(description); + } + comboBoxDictionaries.SelectedIndex = 0; + } + } + } + + private void FixLargeFonts() + { + if (labelDescription1.Left + labelDescription1.Width + 5 > Width) + Width = labelDescription1.Left + labelDescription1.Width + 5; + + Graphics graphics = this.CreateGraphics(); + SizeF textSize = graphics.MeasureString(buttonOK.Text, this.Font); + if (textSize.Height > buttonOK.Height - 4) + { + int newButtonHeight = (int)(textSize.Height + 7 + 0.5); + Utilities.SetButtonHeight(this, newButtonHeight, 1); + } + } + + private void buttonDownload_Click(object sender, EventArgs e) + { + try + { + labelPleaseWait.Text = Configuration.Settings.Language.General.PleaseWait; + buttonOK.Enabled = false; + buttonDownload.Enabled = false; + comboBoxDictionaries.Enabled = false; + this.Refresh(); + Cursor = Cursors.WaitCursor; + + int index = comboBoxDictionaries.SelectedIndex; + string url = _dictionaryDownloadLinks[index]; + + var wc = new WebClient { Proxy = Utilities.GetProxy() }; + wc.DownloadDataCompleted += new DownloadDataCompletedEventHandler(wc_DownloadDataCompleted); + wc.DownloadDataAsync(new Uri(url)); + Cursor = Cursors.Default; + } + catch (Exception exception) + { + labelPleaseWait.Text = string.Empty; + buttonOK.Enabled = true; + buttonDownload.Enabled = true; + comboBoxDictionaries.Enabled = true; + Cursor = Cursors.Default; + MessageBox.Show(exception.Message + Environment.NewLine + Environment.NewLine + exception.StackTrace); + } + } + + void wc_DownloadDataCompleted(object sender, DownloadDataCompletedEventArgs e) + { + if (e.Error != null) + { + MessageBox.Show("Download failed!"); + DialogResult = DialogResult.Cancel; + return; + } + + string dictionaryFolder = Configuration.TesseractDataFolder; + if (!Directory.Exists(dictionaryFolder)) + Directory.CreateDirectory(dictionaryFolder); + + int index = comboBoxDictionaries.SelectedIndex; + + var ms = new MemoryStream(e.Result); + var tempFileName = Path.GetTempFileName() + ".tar"; + var fs = new FileStream(tempFileName, FileMode.Create); + using (var zip = new GZipStream(ms, CompressionMode.Decompress)) + { + byte[] buffer = new byte[1024]; + int nRead; + while ((nRead = zip.Read(buffer, 0, buffer.Length)) > 0) + { + fs.Write(buffer, 0, nRead); + } + } + fs.Close(); + + var tr = new TarReader(tempFileName); + foreach (TarHeader th in tr.Files) + { + string fn = Path.Combine(dictionaryFolder, Path.GetFileName(th.FileName.Trim())); + th.WriteData(fn); + } + ms.Close(); + tr.Close(); + File.Delete(tempFileName); + + Cursor = Cursors.Default; + labelPleaseWait.Text = string.Empty; + buttonOK.Enabled = true; + buttonDownload.Enabled = true; + comboBoxDictionaries.Enabled = true; + MessageBox.Show(string.Format(Configuration.Settings.Language.GetDictionaries.XDownloaded, comboBoxDictionaries.Items[index])); + } + + private void linkLabelOpenDictionaryFolder_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e) + { + string dictionaryFolder = Configuration.TesseractDataFolder; + if (!Directory.Exists(dictionaryFolder)) + Directory.CreateDirectory(dictionaryFolder); + + System.Diagnostics.Process.Start(dictionaryFolder); + } + + private void GetTesseractDictionaries_KeyDown(object sender, KeyEventArgs e) + { + if (e.KeyCode == Keys.Escape) + { + DialogResult = DialogResult.Cancel; + } + else if (e.KeyCode == Keys.F1) + { + Utilities.ShowHelp("#importvobsub"); + e.SuppressKeyPress = true; + } + } + + } +} diff --git a/src/Forms/GetTesseractDictionaries.resx b/src/Forms/GetTesseractDictionaries.resx new file mode 100644 index 000000000..5ea0895e3 --- /dev/null +++ b/src/Forms/GetTesseractDictionaries.resx @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + \ No newline at end of file diff --git a/src/Forms/VobSubOcr.Designer.cs b/src/Forms/VobSubOcr.Designer.cs index 3cdd68e53..48e21a6a5 100644 --- a/src/Forms/VobSubOcr.Designer.cs +++ b/src/Forms/VobSubOcr.Designer.cs @@ -56,7 +56,19 @@ namespace Nikse.SubtitleEdit.Forms this.buttonOK = new System.Windows.Forms.Button(); this.buttonCancel = new System.Windows.Forms.Button(); this.groupBoxOcrMethod = new System.Windows.Forms.GroupBox(); + this.comboBoxOcrMethod = new System.Windows.Forms.ComboBox(); + this.GroupBoxTesseractMethod = new System.Windows.Forms.GroupBox(); + this.buttonGetTesseractDictionaries = new System.Windows.Forms.Button(); + this.checkBoxTesseractMusicOn = new System.Windows.Forms.CheckBox(); + this.checkBoxTesseractItalicsOn = new System.Windows.Forms.CheckBox(); + this.checkBoxUseModiInTesseractForUnknownWords = new System.Windows.Forms.CheckBox(); + this.labelTesseractLanguage = new System.Windows.Forms.Label(); + this.comboBoxTesseractLanguages = new System.Windows.Forms.ComboBox(); + this.groupBoxModiMethod = new System.Windows.Forms.GroupBox(); + this.label1 = new System.Windows.Forms.Label(); + this.comboBoxModiLanguage = new System.Windows.Forms.ComboBox(); this.groupBoxNOCR = new System.Windows.Forms.GroupBox(); + this.checkBoxNOcrItalic = new System.Windows.Forms.CheckBox(); this.checkBoxNOcrCorrect = new System.Windows.Forms.CheckBox(); this.checkBoxRightToLeftNOCR = new System.Windows.Forms.CheckBox(); this.numericUpDownNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.NumericUpDown(); @@ -69,16 +81,6 @@ namespace Nikse.SubtitleEdit.Forms this.comboBoxCharacterDatabase = new System.Windows.Forms.ComboBox(); this.labelImageDatabase = new System.Windows.Forms.Label(); this.buttonNewCharacterDatabase = new System.Windows.Forms.Button(); - this.comboBoxOcrMethod = new System.Windows.Forms.ComboBox(); - this.GroupBoxTesseractMethod = new System.Windows.Forms.GroupBox(); - this.checkBoxTesseractMusicOn = new System.Windows.Forms.CheckBox(); - this.checkBoxTesseractItalicsOn = new System.Windows.Forms.CheckBox(); - this.checkBoxUseModiInTesseractForUnknownWords = new System.Windows.Forms.CheckBox(); - this.labelTesseractLanguage = new System.Windows.Forms.Label(); - this.comboBoxTesseractLanguages = new System.Windows.Forms.ComboBox(); - this.groupBoxModiMethod = new System.Windows.Forms.GroupBox(); - this.label1 = new System.Windows.Forms.Label(); - this.comboBoxModiLanguage = new System.Windows.Forms.ComboBox(); this.groupBoxOCRControls = new System.Windows.Forms.GroupBox(); this.labelStartFrom = new System.Windows.Forms.Label(); this.numericUpDownStartNumber = new System.Windows.Forms.NumericUpDown(); @@ -131,15 +133,14 @@ namespace Nikse.SubtitleEdit.Forms this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView(); this.contextMenuStripGuessesUsed = new System.Windows.Forms.ContextMenuStrip(this.components); this.toolStripMenuItemClearGuesses = new System.Windows.Forms.ToolStripMenuItem(); - this.checkBoxNOcrItalic = new System.Windows.Forms.CheckBox(); this.contextMenuStripListview.SuspendLayout(); this.groupBoxOcrMethod.SuspendLayout(); + this.GroupBoxTesseractMethod.SuspendLayout(); + this.groupBoxModiMethod.SuspendLayout(); this.groupBoxNOCR.SuspendLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).BeginInit(); this.groupBoxImageCompareMethod.SuspendLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).BeginInit(); - this.GroupBoxTesseractMethod.SuspendLayout(); - this.groupBoxModiMethod.SuspendLayout(); this.groupBoxOCRControls.SuspendLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).BeginInit(); this.groupBoxOcrAutoFix.SuspendLayout(); @@ -357,11 +358,11 @@ namespace Nikse.SubtitleEdit.Forms // // groupBoxOcrMethod // - this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR); - this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod); this.groupBoxOcrMethod.Controls.Add(this.comboBoxOcrMethod); this.groupBoxOcrMethod.Controls.Add(this.GroupBoxTesseractMethod); this.groupBoxOcrMethod.Controls.Add(this.groupBoxModiMethod); + this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR); + this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod); this.groupBoxOcrMethod.Location = new System.Drawing.Point(13, 5); this.groupBoxOcrMethod.Name = "groupBoxOcrMethod"; this.groupBoxOcrMethod.Size = new System.Drawing.Size(392, 192); @@ -369,6 +370,132 @@ namespace Nikse.SubtitleEdit.Forms this.groupBoxOcrMethod.TabStop = false; this.groupBoxOcrMethod.Text = "OCR method"; // + // comboBoxOcrMethod + // + this.comboBoxOcrMethod.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboBoxOcrMethod.FormattingEnabled = true; + this.comboBoxOcrMethod.Items.AddRange(new object[] { + "OCR via tesseract", + "OCR via image compare", + "OCR via Microsoftr MODI", + "OCR via nOCR"}); + this.comboBoxOcrMethod.Location = new System.Drawing.Point(13, 20); + this.comboBoxOcrMethod.Name = "comboBoxOcrMethod"; + this.comboBoxOcrMethod.Size = new System.Drawing.Size(366, 21); + this.comboBoxOcrMethod.TabIndex = 0; + this.comboBoxOcrMethod.SelectedIndexChanged += new System.EventHandler(this.ComboBoxOcrMethodSelectedIndexChanged); + // + // GroupBoxTesseractMethod + // + this.GroupBoxTesseractMethod.Controls.Add(this.buttonGetTesseractDictionaries); + this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractMusicOn); + this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractItalicsOn); + this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxUseModiInTesseractForUnknownWords); + this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractLanguage); + this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractLanguages); + this.GroupBoxTesseractMethod.Location = new System.Drawing.Point(13, 31); + this.GroupBoxTesseractMethod.Name = "GroupBoxTesseractMethod"; + this.GroupBoxTesseractMethod.Size = new System.Drawing.Size(366, 131); + this.GroupBoxTesseractMethod.TabIndex = 1; + this.GroupBoxTesseractMethod.TabStop = false; + this.GroupBoxTesseractMethod.Text = "Tesseract"; + // + // buttonGetTesseractDictionaries + // + this.buttonGetTesseractDictionaries.Location = new System.Drawing.Point(300, 30); + this.buttonGetTesseractDictionaries.Name = "buttonGetTesseractDictionaries"; + this.buttonGetTesseractDictionaries.Size = new System.Drawing.Size(29, 23); + this.buttonGetTesseractDictionaries.TabIndex = 2; + this.buttonGetTesseractDictionaries.Text = "..."; + this.buttonGetTesseractDictionaries.UseVisualStyleBackColor = true; + this.buttonGetTesseractDictionaries.Click += new System.EventHandler(this.buttonGetTesseractDictionaries_Click); + // + // checkBoxTesseractMusicOn + // + this.checkBoxTesseractMusicOn.AutoSize = true; + this.checkBoxTesseractMusicOn.Checked = true; + this.checkBoxTesseractMusicOn.CheckState = System.Windows.Forms.CheckState.Checked; + this.checkBoxTesseractMusicOn.Location = new System.Drawing.Point(99, 101); + this.checkBoxTesseractMusicOn.Name = "checkBoxTesseractMusicOn"; + this.checkBoxTesseractMusicOn.Size = new System.Drawing.Size(93, 17); + this.checkBoxTesseractMusicOn.TabIndex = 4; + this.checkBoxTesseractMusicOn.Text = "Music symbols"; + this.checkBoxTesseractMusicOn.UseVisualStyleBackColor = true; + // + // checkBoxTesseractItalicsOn + // + this.checkBoxTesseractItalicsOn.AutoSize = true; + this.checkBoxTesseractItalicsOn.Checked = true; + this.checkBoxTesseractItalicsOn.CheckState = System.Windows.Forms.CheckState.Checked; + this.checkBoxTesseractItalicsOn.Location = new System.Drawing.Point(22, 101); + this.checkBoxTesseractItalicsOn.Name = "checkBoxTesseractItalicsOn"; + this.checkBoxTesseractItalicsOn.Size = new System.Drawing.Size(54, 17); + this.checkBoxTesseractItalicsOn.TabIndex = 3; + this.checkBoxTesseractItalicsOn.Text = "Italics"; + this.checkBoxTesseractItalicsOn.UseVisualStyleBackColor = true; + // + // checkBoxUseModiInTesseractForUnknownWords + // + this.checkBoxUseModiInTesseractForUnknownWords.AutoSize = true; + this.checkBoxUseModiInTesseractForUnknownWords.Checked = true; + this.checkBoxUseModiInTesseractForUnknownWords.CheckState = System.Windows.Forms.CheckState.Checked; + this.checkBoxUseModiInTesseractForUnknownWords.Enabled = false; + this.checkBoxUseModiInTesseractForUnknownWords.Location = new System.Drawing.Point(22, 74); + this.checkBoxUseModiInTesseractForUnknownWords.Name = "checkBoxUseModiInTesseractForUnknownWords"; + this.checkBoxUseModiInTesseractForUnknownWords.Size = new System.Drawing.Size(167, 17); + this.checkBoxUseModiInTesseractForUnknownWords.TabIndex = 2; + this.checkBoxUseModiInTesseractForUnknownWords.Text = "Try MODI for unknown words"; + this.checkBoxUseModiInTesseractForUnknownWords.UseVisualStyleBackColor = true; + // + // labelTesseractLanguage + // + this.labelTesseractLanguage.AutoSize = true; + this.labelTesseractLanguage.Location = new System.Drawing.Point(18, 34); + this.labelTesseractLanguage.Name = "labelTesseractLanguage"; + this.labelTesseractLanguage.Size = new System.Drawing.Size(54, 13); + this.labelTesseractLanguage.TabIndex = 0; + this.labelTesseractLanguage.Text = "Language"; + // + // comboBoxTesseractLanguages + // + this.comboBoxTesseractLanguages.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboBoxTesseractLanguages.FormattingEnabled = true; + this.comboBoxTesseractLanguages.Location = new System.Drawing.Point(99, 31); + this.comboBoxTesseractLanguages.Name = "comboBoxTesseractLanguages"; + this.comboBoxTesseractLanguages.Size = new System.Drawing.Size(195, 21); + this.comboBoxTesseractLanguages.TabIndex = 1; + this.comboBoxTesseractLanguages.SelectedIndexChanged += new System.EventHandler(this.ComboBoxTesseractLanguagesSelectedIndexChanged); + // + // groupBoxModiMethod + // + this.groupBoxModiMethod.Controls.Add(this.label1); + this.groupBoxModiMethod.Controls.Add(this.comboBoxModiLanguage); + this.groupBoxModiMethod.Location = new System.Drawing.Point(7, 50); + this.groupBoxModiMethod.Name = "groupBoxModiMethod"; + this.groupBoxModiMethod.Size = new System.Drawing.Size(366, 131); + this.groupBoxModiMethod.TabIndex = 3; + this.groupBoxModiMethod.TabStop = false; + this.groupBoxModiMethod.Text = "MODI"; + // + // label1 + // + this.label1.AutoSize = true; + this.label1.Location = new System.Drawing.Point(11, 58); + this.label1.Name = "label1"; + this.label1.Size = new System.Drawing.Size(54, 13); + this.label1.TabIndex = 33; + this.label1.Text = "Language"; + // + // comboBoxModiLanguage + // + this.comboBoxModiLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboBoxModiLanguage.FormattingEnabled = true; + this.comboBoxModiLanguage.Location = new System.Drawing.Point(108, 55); + this.comboBoxModiLanguage.Name = "comboBoxModiLanguage"; + this.comboBoxModiLanguage.Size = new System.Drawing.Size(195, 21); + this.comboBoxModiLanguage.TabIndex = 0; + this.comboBoxModiLanguage.SelectedIndexChanged += new System.EventHandler(this.ComboBoxModiLanguageSelectedIndexChanged); + // // groupBoxNOCR // this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrItalic); @@ -383,12 +510,22 @@ namespace Nikse.SubtitleEdit.Forms this.groupBoxNOCR.TabStop = false; this.groupBoxNOCR.Text = "nOCR"; // + // checkBoxNOcrItalic + // + this.checkBoxNOcrItalic.AutoSize = true; + this.checkBoxNOcrItalic.Location = new System.Drawing.Point(187, 28); + this.checkBoxNOcrItalic.Name = "checkBoxNOcrItalic"; + this.checkBoxNOcrItalic.Size = new System.Drawing.Size(48, 17); + this.checkBoxNOcrItalic.TabIndex = 8; + this.checkBoxNOcrItalic.Text = "Italic"; + this.checkBoxNOcrItalic.UseVisualStyleBackColor = true; + // // checkBoxNOcrCorrect // this.checkBoxNOcrCorrect.AutoSize = true; this.checkBoxNOcrCorrect.Location = new System.Drawing.Point(27, 38); this.checkBoxNOcrCorrect.Name = "checkBoxNOcrCorrect"; - this.checkBoxNOcrCorrect.Size = new System.Drawing.Size(116, 17); + this.checkBoxNOcrCorrect.Size = new System.Drawing.Size(113, 17); this.checkBoxNOcrCorrect.TabIndex = 7; this.checkBoxNOcrCorrect.Text = "Draw missing texts"; this.checkBoxNOcrCorrect.UseVisualStyleBackColor = true; @@ -398,7 +535,7 @@ namespace Nikse.SubtitleEdit.Forms this.checkBoxRightToLeftNOCR.AutoSize = true; this.checkBoxRightToLeftNOCR.Location = new System.Drawing.Point(173, 100); this.checkBoxRightToLeftNOCR.Name = "checkBoxRightToLeftNOCR"; - this.checkBoxRightToLeftNOCR.Size = new System.Drawing.Size(83, 17); + this.checkBoxRightToLeftNOCR.Size = new System.Drawing.Size(80, 17); this.checkBoxRightToLeftNOCR.TabIndex = 6; this.checkBoxRightToLeftNOCR.Text = "Right to left"; this.checkBoxRightToLeftNOCR.UseVisualStyleBackColor = true; @@ -530,121 +667,6 @@ namespace Nikse.SubtitleEdit.Forms this.buttonNewCharacterDatabase.UseVisualStyleBackColor = true; this.buttonNewCharacterDatabase.Click += new System.EventHandler(this.ButtonNewCharacterDatabaseClick); // - // comboBoxOcrMethod - // - this.comboBoxOcrMethod.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; - this.comboBoxOcrMethod.FormattingEnabled = true; - this.comboBoxOcrMethod.Items.AddRange(new object[] { - "OCR via tesseract", - "OCR via image compare", - "OCR via Microsoftr MODI", - "OCR via nOCR"}); - this.comboBoxOcrMethod.Location = new System.Drawing.Point(13, 20); - this.comboBoxOcrMethod.Name = "comboBoxOcrMethod"; - this.comboBoxOcrMethod.Size = new System.Drawing.Size(366, 21); - this.comboBoxOcrMethod.TabIndex = 0; - this.comboBoxOcrMethod.SelectedIndexChanged += new System.EventHandler(this.ComboBoxOcrMethodSelectedIndexChanged); - // - // GroupBoxTesseractMethod - // - this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractMusicOn); - this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractItalicsOn); - this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxUseModiInTesseractForUnknownWords); - this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractLanguage); - this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractLanguages); - this.GroupBoxTesseractMethod.Location = new System.Drawing.Point(13, 31); - this.GroupBoxTesseractMethod.Name = "GroupBoxTesseractMethod"; - this.GroupBoxTesseractMethod.Size = new System.Drawing.Size(366, 131); - this.GroupBoxTesseractMethod.TabIndex = 1; - this.GroupBoxTesseractMethod.TabStop = false; - this.GroupBoxTesseractMethod.Text = "Tesseract"; - // - // checkBoxTesseractMusicOn - // - this.checkBoxTesseractMusicOn.AutoSize = true; - this.checkBoxTesseractMusicOn.Checked = true; - this.checkBoxTesseractMusicOn.CheckState = System.Windows.Forms.CheckState.Checked; - this.checkBoxTesseractMusicOn.Location = new System.Drawing.Point(99, 101); - this.checkBoxTesseractMusicOn.Name = "checkBoxTesseractMusicOn"; - this.checkBoxTesseractMusicOn.Size = new System.Drawing.Size(94, 17); - this.checkBoxTesseractMusicOn.TabIndex = 4; - this.checkBoxTesseractMusicOn.Text = "Music symbols"; - this.checkBoxTesseractMusicOn.UseVisualStyleBackColor = true; - // - // checkBoxTesseractItalicsOn - // - this.checkBoxTesseractItalicsOn.AutoSize = true; - this.checkBoxTesseractItalicsOn.Checked = true; - this.checkBoxTesseractItalicsOn.CheckState = System.Windows.Forms.CheckState.Checked; - this.checkBoxTesseractItalicsOn.Location = new System.Drawing.Point(22, 101); - this.checkBoxTesseractItalicsOn.Name = "checkBoxTesseractItalicsOn"; - this.checkBoxTesseractItalicsOn.Size = new System.Drawing.Size(53, 17); - this.checkBoxTesseractItalicsOn.TabIndex = 3; - this.checkBoxTesseractItalicsOn.Text = "Italics"; - this.checkBoxTesseractItalicsOn.UseVisualStyleBackColor = true; - // - // checkBoxUseModiInTesseractForUnknownWords - // - this.checkBoxUseModiInTesseractForUnknownWords.AutoSize = true; - this.checkBoxUseModiInTesseractForUnknownWords.Checked = true; - this.checkBoxUseModiInTesseractForUnknownWords.CheckState = System.Windows.Forms.CheckState.Checked; - this.checkBoxUseModiInTesseractForUnknownWords.Enabled = false; - this.checkBoxUseModiInTesseractForUnknownWords.Location = new System.Drawing.Point(22, 74); - this.checkBoxUseModiInTesseractForUnknownWords.Name = "checkBoxUseModiInTesseractForUnknownWords"; - this.checkBoxUseModiInTesseractForUnknownWords.Size = new System.Drawing.Size(165, 17); - this.checkBoxUseModiInTesseractForUnknownWords.TabIndex = 2; - this.checkBoxUseModiInTesseractForUnknownWords.Text = "Try MODI for unknown words"; - this.checkBoxUseModiInTesseractForUnknownWords.UseVisualStyleBackColor = true; - // - // labelTesseractLanguage - // - this.labelTesseractLanguage.AutoSize = true; - this.labelTesseractLanguage.Location = new System.Drawing.Point(18, 34); - this.labelTesseractLanguage.Name = "labelTesseractLanguage"; - this.labelTesseractLanguage.Size = new System.Drawing.Size(54, 13); - this.labelTesseractLanguage.TabIndex = 0; - this.labelTesseractLanguage.Text = "Language"; - // - // comboBoxTesseractLanguages - // - this.comboBoxTesseractLanguages.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; - this.comboBoxTesseractLanguages.FormattingEnabled = true; - this.comboBoxTesseractLanguages.Location = new System.Drawing.Point(99, 31); - this.comboBoxTesseractLanguages.Name = "comboBoxTesseractLanguages"; - this.comboBoxTesseractLanguages.Size = new System.Drawing.Size(195, 21); - this.comboBoxTesseractLanguages.TabIndex = 1; - this.comboBoxTesseractLanguages.SelectedIndexChanged += new System.EventHandler(this.ComboBoxTesseractLanguagesSelectedIndexChanged); - // - // groupBoxModiMethod - // - this.groupBoxModiMethod.Controls.Add(this.label1); - this.groupBoxModiMethod.Controls.Add(this.comboBoxModiLanguage); - this.groupBoxModiMethod.Location = new System.Drawing.Point(7, 50); - this.groupBoxModiMethod.Name = "groupBoxModiMethod"; - this.groupBoxModiMethod.Size = new System.Drawing.Size(366, 131); - this.groupBoxModiMethod.TabIndex = 3; - this.groupBoxModiMethod.TabStop = false; - this.groupBoxModiMethod.Text = "MODI"; - // - // label1 - // - this.label1.AutoSize = true; - this.label1.Location = new System.Drawing.Point(11, 58); - this.label1.Name = "label1"; - this.label1.Size = new System.Drawing.Size(54, 13); - this.label1.TabIndex = 33; - this.label1.Text = "Language"; - // - // comboBoxModiLanguage - // - this.comboBoxModiLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; - this.comboBoxModiLanguage.FormattingEnabled = true; - this.comboBoxModiLanguage.Location = new System.Drawing.Point(108, 55); - this.comboBoxModiLanguage.Name = "comboBoxModiLanguage"; - this.comboBoxModiLanguage.Size = new System.Drawing.Size(195, 21); - this.comboBoxModiLanguage.TabIndex = 0; - this.comboBoxModiLanguage.SelectedIndexChanged += new System.EventHandler(this.ComboBoxModiLanguageSelectedIndexChanged); - // // groupBoxOCRControls // this.groupBoxOCRControls.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); @@ -1239,16 +1261,6 @@ namespace Nikse.SubtitleEdit.Forms this.toolStripMenuItemClearGuesses.Text = "Clear"; this.toolStripMenuItemClearGuesses.Click += new System.EventHandler(this.toolStripMenuItemClearGuesses_Click); // - // checkBoxNOcrItalic - // - this.checkBoxNOcrItalic.AutoSize = true; - this.checkBoxNOcrItalic.Location = new System.Drawing.Point(187, 28); - this.checkBoxNOcrItalic.Name = "checkBoxNOcrItalic"; - this.checkBoxNOcrItalic.Size = new System.Drawing.Size(49, 17); - this.checkBoxNOcrItalic.TabIndex = 8; - this.checkBoxNOcrItalic.Text = "Italic"; - this.checkBoxNOcrItalic.UseVisualStyleBackColor = true; - // // VobSubOcr // this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); @@ -1276,16 +1288,16 @@ namespace Nikse.SubtitleEdit.Forms this.Resize += new System.EventHandler(this.VobSubOcr_Resize); this.contextMenuStripListview.ResumeLayout(false); this.groupBoxOcrMethod.ResumeLayout(false); + this.GroupBoxTesseractMethod.ResumeLayout(false); + this.GroupBoxTesseractMethod.PerformLayout(); + this.groupBoxModiMethod.ResumeLayout(false); + this.groupBoxModiMethod.PerformLayout(); this.groupBoxNOCR.ResumeLayout(false); this.groupBoxNOCR.PerformLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).EndInit(); this.groupBoxImageCompareMethod.ResumeLayout(false); this.groupBoxImageCompareMethod.PerformLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).EndInit(); - this.GroupBoxTesseractMethod.ResumeLayout(false); - this.GroupBoxTesseractMethod.PerformLayout(); - this.groupBoxModiMethod.ResumeLayout(false); - this.groupBoxModiMethod.PerformLayout(); this.groupBoxOCRControls.ResumeLayout(false); this.groupBoxOCRControls.PerformLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).EndInit(); @@ -1419,5 +1431,6 @@ namespace Nikse.SubtitleEdit.Forms private System.Windows.Forms.Label labelNumberOfPixelsIsSpaceNOCR; private System.Windows.Forms.CheckBox checkBoxNOcrCorrect; private System.Windows.Forms.CheckBox checkBoxNOcrItalic; + private System.Windows.Forms.Button buttonGetTesseractDictionaries; } } \ No newline at end of file diff --git a/src/Forms/VobSubOcr.cs b/src/Forms/VobSubOcr.cs index 76351d3cd..07a3161a5 100644 --- a/src/Forms/VobSubOcr.cs +++ b/src/Forms/VobSubOcr.cs @@ -278,6 +278,7 @@ namespace Nikse.SubtitleEdit.Forms clearToolStripMenuItem.Text = Configuration.Settings.Language.DvdSubrip.Clear; comboBoxTesseractLanguages.Left = labelTesseractLanguage.Left + labelTesseractLanguage.Width; + buttonGetTesseractDictionaries.Left = comboBoxTesseractLanguages.Left + comboBoxTesseractLanguages.Width + 5; Utilities.InitializeSubtitleFont(subtitleListView1); subtitleListView1.AutoSizeAllColumns(this); @@ -3816,9 +3817,30 @@ namespace Nikse.SubtitleEdit.Forms comboBoxOcrMethod.Items.RemoveAt(2); } + + + + private void InitializeTesseract() { - string dir = Configuration.TesseractDataFolder; + if (!Directory.Exists(Configuration.TesseractFolder)) + { + Directory.CreateDirectory(Configuration.TesseractFolder); + if (!Utilities.IsRunningOnLinux() && !Utilities.IsRunningOnMac()) + { + System.Diagnostics.Process process = new System.Diagnostics.Process(); + System.Diagnostics.ProcessStartInfo startInfo = new System.Diagnostics.ProcessStartInfo(); + startInfo.WindowStyle = System.Diagnostics.ProcessWindowStyle.Hidden; + startInfo.FileName = "xcopy"; + startInfo.Arguments = "\"" + Path.Combine(Configuration.TesseractOriginalFolder, "*.*") + "\" \"" + Configuration.TesseractFolder + "\" /s"; + MessageBox.Show(startInfo.Arguments); + process.StartInfo = startInfo; + process.Start(); + process.WaitForExit(); + } + } + + string dir = Path.Combine(Configuration.TesseractFolder, "tessdata"); if (Directory.Exists(dir)) { var list = new List(); @@ -5037,5 +5059,12 @@ namespace Nikse.SubtitleEdit.Forms listBoxLogSuggestions.Items.Clear(); } + private void buttonGetTesseractDictionaries_Click(object sender, EventArgs e) + { + var form = new GetTesseractDictionaries(); + form.ShowDialog(this); + InitializeTesseract(); + } + } } diff --git a/src/Logic/Configuration.cs b/src/Logic/Configuration.cs index edbf175fc..05417cb86 100644 --- a/src/Logic/Configuration.cs +++ b/src/Logic/Configuration.cs @@ -51,7 +51,7 @@ namespace Nikse.SubtitleEdit.Logic } } - public static string TesseractFolder + public static string TesseractOriginalFolder { get { @@ -59,6 +59,14 @@ namespace Nikse.SubtitleEdit.Logic } } + public static string TesseractFolder + { + get + { + return DataDirectory + "Tesseract" + Path.DirectorySeparatorChar; + } + } + public static string VobSubCompareFolder { get diff --git a/src/Logic/TarHeader.cs b/src/Logic/TarHeader.cs new file mode 100644 index 000000000..493717b32 --- /dev/null +++ b/src/Logic/TarHeader.cs @@ -0,0 +1,40 @@ +using System; +using System.IO; +using System.Text; + +namespace Nikse.SubtitleEdit.Logic +{ + public class TarHeader + { + public const int HeaderSize = 512; + + public string FileName { get; set; } + public long FileSizeInBytes { get; set; } + public long FilePosition { get; set; } + + private Stream _stream; + + public TarHeader(Stream stream) + { + _stream = stream; + byte[] buffer = new byte[HeaderSize]; + stream.Read(buffer, 0, HeaderSize); + FilePosition = stream.Position; + + FileName = Encoding.ASCII.GetString(buffer, 0, 100).Replace("\0", string.Empty); + + string sizeInBytes = Encoding.ASCII.GetString(buffer, 124, 11); + if (!string.IsNullOrEmpty(FileName) && Utilities.IsInteger(sizeInBytes)) + FileSizeInBytes = Convert.ToInt64(sizeInBytes, 8); + } + + public void WriteData(string fileName) + { + byte[] buffer = new byte[FileSizeInBytes]; + _stream.Position = FilePosition; + _stream.Read(buffer, 0, buffer.Length); + File.WriteAllBytes(fileName, buffer); + } + + } +} diff --git a/src/Logic/TarReader.cs b/src/Logic/TarReader.cs new file mode 100644 index 000000000..8c01565f9 --- /dev/null +++ b/src/Logic/TarReader.cs @@ -0,0 +1,49 @@ +using System.Collections.Generic; +using System.IO; + +namespace Nikse.SubtitleEdit.Logic +{ + public class TarReader + { + + public List Files { get; private set; } + private Stream _stream; + + public TarReader(string fileName) + { + FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); + OpenTarFile(fs); + } + + public TarReader(Stream stream) + { + OpenTarFile(stream); + } + + private void OpenTarFile(Stream stream) + { + _stream = stream; + Files = new List(); + long length = stream.Length; + long pos = 0; + stream.Position = 0; + while (pos + 512 < length) + { + + stream.Seek(pos, SeekOrigin.Begin); + var th = new TarHeader(stream); + if (th.FileSizeInBytes > 0) + Files.Add(th); + pos += TarHeader.HeaderSize + th.FileSizeInBytes; + if (pos % TarHeader.HeaderSize > 0) + pos += 512 - (pos % TarHeader.HeaderSize); + } + } + + public void Close() + { + _stream.Close(); + } + + } +} \ No newline at end of file diff --git a/src/Resources/TesseractDictionaries.xml b/src/Resources/TesseractDictionaries.xml new file mode 100644 index 000000000..53dfd9e94 --- /dev/null +++ b/src/Resources/TesseractDictionaries.xml @@ -0,0 +1,151 @@ + + + Arabic + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.ara.tar.gz + Arabic language data for Tesseract 3.02 + + + Basque + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.eus.tar.gz + Basque language data for Tesseract 3.02 + + + Bulgarian + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.bul.tar.gz + Bulgarian language data for Tesseract 3.02 + + + Catalan + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.cat.tar.gz + Catalan language data for Tesseract 3.02 + + + Chinese (Simplified) + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.chi_sim.tar.gz + Chinese (Simplified) language data for Tesseract 3.02 + + + Czech + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.ces.tar.gz + Czech language data for Tesseract 3.02 + + + Danish + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.dan.tar.gz + Danish language data for Tesseract 3.02 + + Dutch + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.nld.tar.gz + Dutch language data for Tesseract 3.02 + + + English + https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.eng.tar.gz + English language data for Tesseract 3.02 + + + Finnish + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.fin.tar.gz + Finnish language data for Tesseract 3.02 + + + French + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.fra.tar.gz + French language data for Tesseract 3.02 + + + German + https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.deu.tar.gz + German language data for Tesseract 3.02 + + + Greek + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.ell.tar.gz + Greek language data for Tesseract 3.02 + + + Hungarian + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.hun.tar.gz + Hungarian language data for Tesseract 3.02 + + + Icelandic + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.isl.tar.gz + Icelandic language data for Tesseract 3.02 + + + Italian + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.ita.tar.gz + Italian language data for Tesseract 3.02 + + + Japanese + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.jpn.tar.gz + Japanese language data for Tesseract 3.02 + + + Korean + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.kor.tar.gz + Korean language data for Tesseract 3.02 + + + Norwegian + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.nor.tar.gz + Norwegian language data for Tesseract 3.02 + + + Polish + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.pol.tar.gz + Polish language data for Tesseract 3.02 + + + Portuguese + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.por.tar.gz + Portuguese language data for Tesseract 3.02 + + + Romanian + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.ron.tar.gz + Romanian language data for Tesseract 3.02 + + + Russian + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.rus.tar.gz + Russian Language Data for Tesseract 3.02 + + + Serbian (Latin) + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.srp.tar.gz + Serbian (Latin) language data for Tesseract 3.02 + + + Spanish + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.spa.tar.gz + Spanish language data for Tesseract 3.02 + + + Swedish + https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.swe.tar.gz + Swedish language data for Tesseract 3.02 + + + Thai + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.tha.tar.gz + Thai language data for Tesseract 3.02 + + + Turkish + https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.tur.tar.gz + Turkish language data for Tesseract 3.02 + + + Ukrainian + https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.ukr.tar.gz + Ukrainian language data for Tesseract 3.02 + + + Vietnamese + http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.vie.tar.gz + Vietnamese Language Data for Tesseract 3.02 + + \ No newline at end of file diff --git a/src/Resources/TesseractDictionaries.xml.zip b/src/Resources/TesseractDictionaries.xml.zip new file mode 100644 index 0000000000000000000000000000000000000000..004fcb6e74797cd845897bb806940adf20df80ee GIT binary patch literal 845 zcmV-T1G4-diwFP!000040PVd8U>r%7B_=Oz%6orG^-9;^bfE}=m|9>2+(Ca<$;`?Q$0MYYW8B5#g<<0MTV+fT0fZ~EkPc~KV4{6)6N zkB;kXQcNG;^*_^QPphkCS!J{5#q#aZylq#H?%r)pKik!`9$!?|MVU{lSw5ati@WDV znKxmR?R(>$y>V7&<2IxAKX`mMfNFZmn`vFFfTkjpmf7-Ron7SVENip$ysA?>#1t?I zkoOV~^loRQH;nvA*8HM%{!B+qKZL&+UrQ4`b|gzWiC|`1AL0_5`6H z!pJ|*>&0y?{@3UDS-!sY&;_F(#?b3Le|x);f9-+i4!0Xa;bmzXTNA~uk940AHvwbldUrN&_BzA|LN83 zS>K@ap`*W*C;xj@b^fzH^>3^C)}t4MK6LE282O8;zRE9t_W6GK9)q_y{Se0gvbw!% z`?UwZs&23H1fd_w$m@1}vHsbY`qg_(y+i6o3ZyrX;8ueFsv@WAXP^7)>h_AypmZYv zKAZ(!t()eDF!=Y#X(~qPb2Ij*KU-seo!1lewDmk|i{;ODPOGU`x28b{vmXK?fmw$4s?DPqh8l< ze+X0Fu5WMm6r~@+;JT5a1_^yj{{SKkI(aWpR6vcSwD> X>3e+FzFhAvok&qqe^&J2JDCf_N literal 0 HcmV?d00001 diff --git a/src/SubtitleEdit.csproj b/src/SubtitleEdit.csproj index 43fa85225..b1efa10be 100644 --- a/src/SubtitleEdit.csproj +++ b/src/SubtitleEdit.csproj @@ -320,6 +320,12 @@ GetDictionaries.cs + + Form + + + GetTesseractDictionaries.cs + Form @@ -926,6 +932,8 @@ + + @@ -1104,6 +1112,9 @@ GetDictionaries.cs Designer + + GetTesseractDictionaries.cs + Interjections.cs @@ -1301,6 +1312,7 @@ + Reference.map @@ -1524,6 +1536,9 @@ + + Designer + Designer