Testing download of Tesseract dictionaries...

git-svn-id: https://subtitleedit.googlecode.com/svn/trunk@1754 99eadd0c-20b8-1223-b5c4-2a2b2df33de2
This commit is contained in:
niksedk 2013-03-21 15:45:19 +00:00
parent 0b63b7c2d8
commit 4f69b8d30d
12 changed files with 908 additions and 149 deletions

View File

@ -27,7 +27,7 @@ namespace Nikse.SubtitleEdit.Forms
labelChooseLanguageAndClickDownload.Text = Configuration.Settings.Language.GetDictionaries.ChooseLanguageAndClickDownload;
buttonDownload.Text = Configuration.Settings.Language.GetDictionaries.Download;
buttonOK.Text = Configuration.Settings.Language.General.OK;
labelPleaseWait.Text = Configuration.Settings.Language.General.PleaseWait;
labelPleaseWait.Text = string.Empty;
LoadDictionaryList("Nikse.SubtitleEdit.Resources.OpenOfficeDictionaries.xml.zip");
FixLargeFonts();

View File

@ -0,0 +1,143 @@
namespace Nikse.SubtitleEdit.Forms
{
partial class GetTesseractDictionaries
{
/// <summary>
/// Required designer variable.
/// </summary>
private System.ComponentModel.IContainer components = null;
/// <summary>
/// Clean up any resources being used.
/// </summary>
/// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param>
protected override void Dispose(bool disposing)
{
if (disposing && (components != null))
{
components.Dispose();
}
base.Dispose(disposing);
}
#region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
this.comboBoxDictionaries = new System.Windows.Forms.ComboBox();
this.labelPleaseWait = new System.Windows.Forms.Label();
this.labelChooseLanguageAndClickDownload = new System.Windows.Forms.Label();
this.buttonDownload = new System.Windows.Forms.Button();
this.linkLabelOpenDictionaryFolder = new System.Windows.Forms.LinkLabel();
this.buttonOK = new System.Windows.Forms.Button();
this.labelDescription1 = new System.Windows.Forms.Label();
this.SuspendLayout();
//
// comboBoxDictionaries
//
this.comboBoxDictionaries.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxDictionaries.FormattingEnabled = true;
this.comboBoxDictionaries.Location = new System.Drawing.Point(22, 71);
this.comboBoxDictionaries.Name = "comboBoxDictionaries";
this.comboBoxDictionaries.Size = new System.Drawing.Size(256, 21);
this.comboBoxDictionaries.TabIndex = 21;
//
// labelPleaseWait
//
this.labelPleaseWait.AutoSize = true;
this.labelPleaseWait.Location = new System.Drawing.Point(19, 99);
this.labelPleaseWait.Name = "labelPleaseWait";
this.labelPleaseWait.Size = new System.Drawing.Size(70, 13);
this.labelPleaseWait.TabIndex = 24;
this.labelPleaseWait.Text = "Please wait...";
//
// labelChooseLanguageAndClickDownload
//
this.labelChooseLanguageAndClickDownload.AutoSize = true;
this.labelChooseLanguageAndClickDownload.Location = new System.Drawing.Point(19, 52);
this.labelChooseLanguageAndClickDownload.Name = "labelChooseLanguageAndClickDownload";
this.labelChooseLanguageAndClickDownload.Size = new System.Drawing.Size(202, 13);
this.labelChooseLanguageAndClickDownload.TabIndex = 23;
this.labelChooseLanguageAndClickDownload.Text = "Choose your languge and click download";
//
// buttonDownload
//
this.buttonDownload.Location = new System.Drawing.Point(284, 70);
this.buttonDownload.Name = "buttonDownload";
this.buttonDownload.Size = new System.Drawing.Size(104, 25);
this.buttonDownload.TabIndex = 22;
this.buttonDownload.Text = "&Download";
this.buttonDownload.UseVisualStyleBackColor = true;
this.buttonDownload.Click += new System.EventHandler(this.buttonDownload_Click);
//
// linkLabelOpenDictionaryFolder
//
this.linkLabelOpenDictionaryFolder.AutoSize = true;
this.linkLabelOpenDictionaryFolder.Location = new System.Drawing.Point(19, 137);
this.linkLabelOpenDictionaryFolder.Name = "linkLabelOpenDictionaryFolder";
this.linkLabelOpenDictionaryFolder.Size = new System.Drawing.Size(124, 13);
this.linkLabelOpenDictionaryFolder.TabIndex = 25;
this.linkLabelOpenDictionaryFolder.TabStop = true;
this.linkLabelOpenDictionaryFolder.Text = "Open \'Dictionaries\' folder";
this.linkLabelOpenDictionaryFolder.LinkClicked += new System.Windows.Forms.LinkLabelLinkClickedEventHandler(this.linkLabelOpenDictionaryFolder_LinkClicked);
//
// buttonOK
//
this.buttonOK.DialogResult = System.Windows.Forms.DialogResult.OK;
this.buttonOK.Location = new System.Drawing.Point(284, 132);
this.buttonOK.Name = "buttonOK";
this.buttonOK.Size = new System.Drawing.Size(104, 23);
this.buttonOK.TabIndex = 26;
this.buttonOK.Text = "&OK";
this.buttonOK.UseVisualStyleBackColor = true;
//
// labelDescription1
//
this.labelDescription1.AutoSize = true;
this.labelDescription1.Location = new System.Drawing.Point(19, 18);
this.labelDescription1.Name = "labelDescription1";
this.labelDescription1.Size = new System.Drawing.Size(220, 13);
this.labelDescription1.TabIndex = 27;
this.labelDescription1.Text = "Get Tesseract OCR dictionaries from the web";
//
// GetTesseractDictionaries
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(426, 182);
this.Controls.Add(this.labelDescription1);
this.Controls.Add(this.comboBoxDictionaries);
this.Controls.Add(this.labelPleaseWait);
this.Controls.Add(this.labelChooseLanguageAndClickDownload);
this.Controls.Add(this.buttonDownload);
this.Controls.Add(this.linkLabelOpenDictionaryFolder);
this.Controls.Add(this.buttonOK);
this.KeyPreview = true;
this.MaximizeBox = false;
this.MinimizeBox = false;
this.Name = "GetTesseractDictionaries";
this.ShowIcon = false;
this.ShowInTaskbar = false;
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
this.Text = "GetTesseractDictionaries";
this.KeyDown += new System.Windows.Forms.KeyEventHandler(this.GetTesseractDictionaries_KeyDown);
this.ResumeLayout(false);
this.PerformLayout();
}
#endregion
private System.Windows.Forms.ComboBox comboBoxDictionaries;
private System.Windows.Forms.Label labelPleaseWait;
private System.Windows.Forms.Label labelChooseLanguageAndClickDownload;
private System.Windows.Forms.Button buttonDownload;
private System.Windows.Forms.LinkLabel linkLabelOpenDictionaryFolder;
private System.Windows.Forms.Button buttonOK;
private System.Windows.Forms.Label labelDescription1;
}
}

View File

@ -0,0 +1,191 @@
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.IO.Compression;
using System.Net;
using System.Windows.Forms;
using System.Xml;
using Nikse.SubtitleEdit.Logic;
namespace Nikse.SubtitleEdit.Forms
{
public partial class GetTesseractDictionaries : Form
{
List<string> _dictionaryDownloadLinks = new List<string>();
List<string> _descriptions = new List<string>();
string _xmlName = null;
public GetTesseractDictionaries()
{
InitializeComponent();
Text = Configuration.Settings.Language.GetDictionaries.Title;
// labelDescription1.Text = Configuration.Settings.Language.GetDictionaries.DescriptionLine1;
linkLabelOpenDictionaryFolder.Text = Configuration.Settings.Language.GetDictionaries.OpenDictionariesFolder;
labelChooseLanguageAndClickDownload.Text = Configuration.Settings.Language.GetDictionaries.ChooseLanguageAndClickDownload;
buttonDownload.Text = Configuration.Settings.Language.GetDictionaries.Download;
buttonOK.Text = Configuration.Settings.Language.General.OK;
labelPleaseWait.Text = Configuration.Settings.Language.General.PleaseWait;
LoadDictionaryList("Nikse.SubtitleEdit.Resources.TesseractDictionaries.xml.zip");
FixLargeFonts();
}
private void LoadDictionaryList(string xmlRessourceName)
{
_dictionaryDownloadLinks = new List<string>();
_descriptions = new List<string>();
_xmlName = xmlRessourceName;
System.Reflection.Assembly asm = System.Reflection.Assembly.GetExecutingAssembly();
Stream strm = asm.GetManifestResourceStream(_xmlName);
if (strm != null)
{
comboBoxDictionaries.Items.Clear();
XmlDocument doc = new XmlDocument();
var rdr = new StreamReader(strm);
using (var zip = new GZipStream(rdr.BaseStream, CompressionMode.Decompress))
{
byte[] data = new byte[175000];
zip.Read(data, 0, 175000);
doc.LoadXml(System.Text.Encoding.UTF8.GetString(data));
}
rdr.Close();
foreach (XmlNode node in doc.DocumentElement.SelectNodes("Dictionary"))
{
string englishName = node.SelectSingleNode("EnglishName").InnerText;
string downloadLink = node.SelectSingleNode("DownloadLink").InnerText;
string description = string.Empty;
if (node.SelectSingleNode("Description") != null)
description = node.SelectSingleNode("Description").InnerText;
if (!string.IsNullOrEmpty(downloadLink))
{
string name = englishName;
comboBoxDictionaries.Items.Add(name);
_dictionaryDownloadLinks.Add(downloadLink);
_descriptions.Add(description);
}
comboBoxDictionaries.SelectedIndex = 0;
}
}
}
private void FixLargeFonts()
{
if (labelDescription1.Left + labelDescription1.Width + 5 > Width)
Width = labelDescription1.Left + labelDescription1.Width + 5;
Graphics graphics = this.CreateGraphics();
SizeF textSize = graphics.MeasureString(buttonOK.Text, this.Font);
if (textSize.Height > buttonOK.Height - 4)
{
int newButtonHeight = (int)(textSize.Height + 7 + 0.5);
Utilities.SetButtonHeight(this, newButtonHeight, 1);
}
}
private void buttonDownload_Click(object sender, EventArgs e)
{
try
{
labelPleaseWait.Text = Configuration.Settings.Language.General.PleaseWait;
buttonOK.Enabled = false;
buttonDownload.Enabled = false;
comboBoxDictionaries.Enabled = false;
this.Refresh();
Cursor = Cursors.WaitCursor;
int index = comboBoxDictionaries.SelectedIndex;
string url = _dictionaryDownloadLinks[index];
var wc = new WebClient { Proxy = Utilities.GetProxy() };
wc.DownloadDataCompleted += new DownloadDataCompletedEventHandler(wc_DownloadDataCompleted);
wc.DownloadDataAsync(new Uri(url));
Cursor = Cursors.Default;
}
catch (Exception exception)
{
labelPleaseWait.Text = string.Empty;
buttonOK.Enabled = true;
buttonDownload.Enabled = true;
comboBoxDictionaries.Enabled = true;
Cursor = Cursors.Default;
MessageBox.Show(exception.Message + Environment.NewLine + Environment.NewLine + exception.StackTrace);
}
}
void wc_DownloadDataCompleted(object sender, DownloadDataCompletedEventArgs e)
{
if (e.Error != null)
{
MessageBox.Show("Download failed!");
DialogResult = DialogResult.Cancel;
return;
}
string dictionaryFolder = Configuration.TesseractDataFolder;
if (!Directory.Exists(dictionaryFolder))
Directory.CreateDirectory(dictionaryFolder);
int index = comboBoxDictionaries.SelectedIndex;
var ms = new MemoryStream(e.Result);
var tempFileName = Path.GetTempFileName() + ".tar";
var fs = new FileStream(tempFileName, FileMode.Create);
using (var zip = new GZipStream(ms, CompressionMode.Decompress))
{
byte[] buffer = new byte[1024];
int nRead;
while ((nRead = zip.Read(buffer, 0, buffer.Length)) > 0)
{
fs.Write(buffer, 0, nRead);
}
}
fs.Close();
var tr = new TarReader(tempFileName);
foreach (TarHeader th in tr.Files)
{
string fn = Path.Combine(dictionaryFolder, Path.GetFileName(th.FileName.Trim()));
th.WriteData(fn);
}
ms.Close();
tr.Close();
File.Delete(tempFileName);
Cursor = Cursors.Default;
labelPleaseWait.Text = string.Empty;
buttonOK.Enabled = true;
buttonDownload.Enabled = true;
comboBoxDictionaries.Enabled = true;
MessageBox.Show(string.Format(Configuration.Settings.Language.GetDictionaries.XDownloaded, comboBoxDictionaries.Items[index]));
}
private void linkLabelOpenDictionaryFolder_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e)
{
string dictionaryFolder = Configuration.TesseractDataFolder;
if (!Directory.Exists(dictionaryFolder))
Directory.CreateDirectory(dictionaryFolder);
System.Diagnostics.Process.Start(dictionaryFolder);
}
private void GetTesseractDictionaries_KeyDown(object sender, KeyEventArgs e)
{
if (e.KeyCode == Keys.Escape)
{
DialogResult = DialogResult.Cancel;
}
else if (e.KeyCode == Keys.F1)
{
Utilities.ShowHelp("#importvobsub");
e.SuppressKeyPress = true;
}
}
}
}

View File

@ -0,0 +1,120 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" use="required" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>

View File

@ -56,7 +56,19 @@ namespace Nikse.SubtitleEdit.Forms
this.buttonOK = new System.Windows.Forms.Button();
this.buttonCancel = new System.Windows.Forms.Button();
this.groupBoxOcrMethod = new System.Windows.Forms.GroupBox();
this.comboBoxOcrMethod = new System.Windows.Forms.ComboBox();
this.GroupBoxTesseractMethod = new System.Windows.Forms.GroupBox();
this.buttonGetTesseractDictionaries = new System.Windows.Forms.Button();
this.checkBoxTesseractMusicOn = new System.Windows.Forms.CheckBox();
this.checkBoxTesseractItalicsOn = new System.Windows.Forms.CheckBox();
this.checkBoxUseModiInTesseractForUnknownWords = new System.Windows.Forms.CheckBox();
this.labelTesseractLanguage = new System.Windows.Forms.Label();
this.comboBoxTesseractLanguages = new System.Windows.Forms.ComboBox();
this.groupBoxModiMethod = new System.Windows.Forms.GroupBox();
this.label1 = new System.Windows.Forms.Label();
this.comboBoxModiLanguage = new System.Windows.Forms.ComboBox();
this.groupBoxNOCR = new System.Windows.Forms.GroupBox();
this.checkBoxNOcrItalic = new System.Windows.Forms.CheckBox();
this.checkBoxNOcrCorrect = new System.Windows.Forms.CheckBox();
this.checkBoxRightToLeftNOCR = new System.Windows.Forms.CheckBox();
this.numericUpDownNumberOfPixelsIsSpaceNOCR = new System.Windows.Forms.NumericUpDown();
@ -69,16 +81,6 @@ namespace Nikse.SubtitleEdit.Forms
this.comboBoxCharacterDatabase = new System.Windows.Forms.ComboBox();
this.labelImageDatabase = new System.Windows.Forms.Label();
this.buttonNewCharacterDatabase = new System.Windows.Forms.Button();
this.comboBoxOcrMethod = new System.Windows.Forms.ComboBox();
this.GroupBoxTesseractMethod = new System.Windows.Forms.GroupBox();
this.checkBoxTesseractMusicOn = new System.Windows.Forms.CheckBox();
this.checkBoxTesseractItalicsOn = new System.Windows.Forms.CheckBox();
this.checkBoxUseModiInTesseractForUnknownWords = new System.Windows.Forms.CheckBox();
this.labelTesseractLanguage = new System.Windows.Forms.Label();
this.comboBoxTesseractLanguages = new System.Windows.Forms.ComboBox();
this.groupBoxModiMethod = new System.Windows.Forms.GroupBox();
this.label1 = new System.Windows.Forms.Label();
this.comboBoxModiLanguage = new System.Windows.Forms.ComboBox();
this.groupBoxOCRControls = new System.Windows.Forms.GroupBox();
this.labelStartFrom = new System.Windows.Forms.Label();
this.numericUpDownStartNumber = new System.Windows.Forms.NumericUpDown();
@ -131,15 +133,14 @@ namespace Nikse.SubtitleEdit.Forms
this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView();
this.contextMenuStripGuessesUsed = new System.Windows.Forms.ContextMenuStrip(this.components);
this.toolStripMenuItemClearGuesses = new System.Windows.Forms.ToolStripMenuItem();
this.checkBoxNOcrItalic = new System.Windows.Forms.CheckBox();
this.contextMenuStripListview.SuspendLayout();
this.groupBoxOcrMethod.SuspendLayout();
this.GroupBoxTesseractMethod.SuspendLayout();
this.groupBoxModiMethod.SuspendLayout();
this.groupBoxNOCR.SuspendLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).BeginInit();
this.groupBoxImageCompareMethod.SuspendLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).BeginInit();
this.GroupBoxTesseractMethod.SuspendLayout();
this.groupBoxModiMethod.SuspendLayout();
this.groupBoxOCRControls.SuspendLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).BeginInit();
this.groupBoxOcrAutoFix.SuspendLayout();
@ -357,11 +358,11 @@ namespace Nikse.SubtitleEdit.Forms
//
// groupBoxOcrMethod
//
this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod);
this.groupBoxOcrMethod.Controls.Add(this.comboBoxOcrMethod);
this.groupBoxOcrMethod.Controls.Add(this.GroupBoxTesseractMethod);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxModiMethod);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod);
this.groupBoxOcrMethod.Location = new System.Drawing.Point(13, 5);
this.groupBoxOcrMethod.Name = "groupBoxOcrMethod";
this.groupBoxOcrMethod.Size = new System.Drawing.Size(392, 192);
@ -369,6 +370,132 @@ namespace Nikse.SubtitleEdit.Forms
this.groupBoxOcrMethod.TabStop = false;
this.groupBoxOcrMethod.Text = "OCR method";
//
// comboBoxOcrMethod
//
this.comboBoxOcrMethod.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxOcrMethod.FormattingEnabled = true;
this.comboBoxOcrMethod.Items.AddRange(new object[] {
"OCR via tesseract",
"OCR via image compare",
"OCR via Microsoftr MODI",
"OCR via nOCR"});
this.comboBoxOcrMethod.Location = new System.Drawing.Point(13, 20);
this.comboBoxOcrMethod.Name = "comboBoxOcrMethod";
this.comboBoxOcrMethod.Size = new System.Drawing.Size(366, 21);
this.comboBoxOcrMethod.TabIndex = 0;
this.comboBoxOcrMethod.SelectedIndexChanged += new System.EventHandler(this.ComboBoxOcrMethodSelectedIndexChanged);
//
// GroupBoxTesseractMethod
//
this.GroupBoxTesseractMethod.Controls.Add(this.buttonGetTesseractDictionaries);
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractMusicOn);
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractItalicsOn);
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxUseModiInTesseractForUnknownWords);
this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractLanguage);
this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractLanguages);
this.GroupBoxTesseractMethod.Location = new System.Drawing.Point(13, 31);
this.GroupBoxTesseractMethod.Name = "GroupBoxTesseractMethod";
this.GroupBoxTesseractMethod.Size = new System.Drawing.Size(366, 131);
this.GroupBoxTesseractMethod.TabIndex = 1;
this.GroupBoxTesseractMethod.TabStop = false;
this.GroupBoxTesseractMethod.Text = "Tesseract";
//
// buttonGetTesseractDictionaries
//
this.buttonGetTesseractDictionaries.Location = new System.Drawing.Point(300, 30);
this.buttonGetTesseractDictionaries.Name = "buttonGetTesseractDictionaries";
this.buttonGetTesseractDictionaries.Size = new System.Drawing.Size(29, 23);
this.buttonGetTesseractDictionaries.TabIndex = 2;
this.buttonGetTesseractDictionaries.Text = "...";
this.buttonGetTesseractDictionaries.UseVisualStyleBackColor = true;
this.buttonGetTesseractDictionaries.Click += new System.EventHandler(this.buttonGetTesseractDictionaries_Click);
//
// checkBoxTesseractMusicOn
//
this.checkBoxTesseractMusicOn.AutoSize = true;
this.checkBoxTesseractMusicOn.Checked = true;
this.checkBoxTesseractMusicOn.CheckState = System.Windows.Forms.CheckState.Checked;
this.checkBoxTesseractMusicOn.Location = new System.Drawing.Point(99, 101);
this.checkBoxTesseractMusicOn.Name = "checkBoxTesseractMusicOn";
this.checkBoxTesseractMusicOn.Size = new System.Drawing.Size(93, 17);
this.checkBoxTesseractMusicOn.TabIndex = 4;
this.checkBoxTesseractMusicOn.Text = "Music symbols";
this.checkBoxTesseractMusicOn.UseVisualStyleBackColor = true;
//
// checkBoxTesseractItalicsOn
//
this.checkBoxTesseractItalicsOn.AutoSize = true;
this.checkBoxTesseractItalicsOn.Checked = true;
this.checkBoxTesseractItalicsOn.CheckState = System.Windows.Forms.CheckState.Checked;
this.checkBoxTesseractItalicsOn.Location = new System.Drawing.Point(22, 101);
this.checkBoxTesseractItalicsOn.Name = "checkBoxTesseractItalicsOn";
this.checkBoxTesseractItalicsOn.Size = new System.Drawing.Size(54, 17);
this.checkBoxTesseractItalicsOn.TabIndex = 3;
this.checkBoxTesseractItalicsOn.Text = "Italics";
this.checkBoxTesseractItalicsOn.UseVisualStyleBackColor = true;
//
// checkBoxUseModiInTesseractForUnknownWords
//
this.checkBoxUseModiInTesseractForUnknownWords.AutoSize = true;
this.checkBoxUseModiInTesseractForUnknownWords.Checked = true;
this.checkBoxUseModiInTesseractForUnknownWords.CheckState = System.Windows.Forms.CheckState.Checked;
this.checkBoxUseModiInTesseractForUnknownWords.Enabled = false;
this.checkBoxUseModiInTesseractForUnknownWords.Location = new System.Drawing.Point(22, 74);
this.checkBoxUseModiInTesseractForUnknownWords.Name = "checkBoxUseModiInTesseractForUnknownWords";
this.checkBoxUseModiInTesseractForUnknownWords.Size = new System.Drawing.Size(167, 17);
this.checkBoxUseModiInTesseractForUnknownWords.TabIndex = 2;
this.checkBoxUseModiInTesseractForUnknownWords.Text = "Try MODI for unknown words";
this.checkBoxUseModiInTesseractForUnknownWords.UseVisualStyleBackColor = true;
//
// labelTesseractLanguage
//
this.labelTesseractLanguage.AutoSize = true;
this.labelTesseractLanguage.Location = new System.Drawing.Point(18, 34);
this.labelTesseractLanguage.Name = "labelTesseractLanguage";
this.labelTesseractLanguage.Size = new System.Drawing.Size(54, 13);
this.labelTesseractLanguage.TabIndex = 0;
this.labelTesseractLanguage.Text = "Language";
//
// comboBoxTesseractLanguages
//
this.comboBoxTesseractLanguages.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxTesseractLanguages.FormattingEnabled = true;
this.comboBoxTesseractLanguages.Location = new System.Drawing.Point(99, 31);
this.comboBoxTesseractLanguages.Name = "comboBoxTesseractLanguages";
this.comboBoxTesseractLanguages.Size = new System.Drawing.Size(195, 21);
this.comboBoxTesseractLanguages.TabIndex = 1;
this.comboBoxTesseractLanguages.SelectedIndexChanged += new System.EventHandler(this.ComboBoxTesseractLanguagesSelectedIndexChanged);
//
// groupBoxModiMethod
//
this.groupBoxModiMethod.Controls.Add(this.label1);
this.groupBoxModiMethod.Controls.Add(this.comboBoxModiLanguage);
this.groupBoxModiMethod.Location = new System.Drawing.Point(7, 50);
this.groupBoxModiMethod.Name = "groupBoxModiMethod";
this.groupBoxModiMethod.Size = new System.Drawing.Size(366, 131);
this.groupBoxModiMethod.TabIndex = 3;
this.groupBoxModiMethod.TabStop = false;
this.groupBoxModiMethod.Text = "MODI";
//
// label1
//
this.label1.AutoSize = true;
this.label1.Location = new System.Drawing.Point(11, 58);
this.label1.Name = "label1";
this.label1.Size = new System.Drawing.Size(54, 13);
this.label1.TabIndex = 33;
this.label1.Text = "Language";
//
// comboBoxModiLanguage
//
this.comboBoxModiLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxModiLanguage.FormattingEnabled = true;
this.comboBoxModiLanguage.Location = new System.Drawing.Point(108, 55);
this.comboBoxModiLanguage.Name = "comboBoxModiLanguage";
this.comboBoxModiLanguage.Size = new System.Drawing.Size(195, 21);
this.comboBoxModiLanguage.TabIndex = 0;
this.comboBoxModiLanguage.SelectedIndexChanged += new System.EventHandler(this.ComboBoxModiLanguageSelectedIndexChanged);
//
// groupBoxNOCR
//
this.groupBoxNOCR.Controls.Add(this.checkBoxNOcrItalic);
@ -383,12 +510,22 @@ namespace Nikse.SubtitleEdit.Forms
this.groupBoxNOCR.TabStop = false;
this.groupBoxNOCR.Text = "nOCR";
//
// checkBoxNOcrItalic
//
this.checkBoxNOcrItalic.AutoSize = true;
this.checkBoxNOcrItalic.Location = new System.Drawing.Point(187, 28);
this.checkBoxNOcrItalic.Name = "checkBoxNOcrItalic";
this.checkBoxNOcrItalic.Size = new System.Drawing.Size(48, 17);
this.checkBoxNOcrItalic.TabIndex = 8;
this.checkBoxNOcrItalic.Text = "Italic";
this.checkBoxNOcrItalic.UseVisualStyleBackColor = true;
//
// checkBoxNOcrCorrect
//
this.checkBoxNOcrCorrect.AutoSize = true;
this.checkBoxNOcrCorrect.Location = new System.Drawing.Point(27, 38);
this.checkBoxNOcrCorrect.Name = "checkBoxNOcrCorrect";
this.checkBoxNOcrCorrect.Size = new System.Drawing.Size(116, 17);
this.checkBoxNOcrCorrect.Size = new System.Drawing.Size(113, 17);
this.checkBoxNOcrCorrect.TabIndex = 7;
this.checkBoxNOcrCorrect.Text = "Draw missing texts";
this.checkBoxNOcrCorrect.UseVisualStyleBackColor = true;
@ -398,7 +535,7 @@ namespace Nikse.SubtitleEdit.Forms
this.checkBoxRightToLeftNOCR.AutoSize = true;
this.checkBoxRightToLeftNOCR.Location = new System.Drawing.Point(173, 100);
this.checkBoxRightToLeftNOCR.Name = "checkBoxRightToLeftNOCR";
this.checkBoxRightToLeftNOCR.Size = new System.Drawing.Size(83, 17);
this.checkBoxRightToLeftNOCR.Size = new System.Drawing.Size(80, 17);
this.checkBoxRightToLeftNOCR.TabIndex = 6;
this.checkBoxRightToLeftNOCR.Text = "Right to left";
this.checkBoxRightToLeftNOCR.UseVisualStyleBackColor = true;
@ -530,121 +667,6 @@ namespace Nikse.SubtitleEdit.Forms
this.buttonNewCharacterDatabase.UseVisualStyleBackColor = true;
this.buttonNewCharacterDatabase.Click += new System.EventHandler(this.ButtonNewCharacterDatabaseClick);
//
// comboBoxOcrMethod
//
this.comboBoxOcrMethod.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxOcrMethod.FormattingEnabled = true;
this.comboBoxOcrMethod.Items.AddRange(new object[] {
"OCR via tesseract",
"OCR via image compare",
"OCR via Microsoftr MODI",
"OCR via nOCR"});
this.comboBoxOcrMethod.Location = new System.Drawing.Point(13, 20);
this.comboBoxOcrMethod.Name = "comboBoxOcrMethod";
this.comboBoxOcrMethod.Size = new System.Drawing.Size(366, 21);
this.comboBoxOcrMethod.TabIndex = 0;
this.comboBoxOcrMethod.SelectedIndexChanged += new System.EventHandler(this.ComboBoxOcrMethodSelectedIndexChanged);
//
// GroupBoxTesseractMethod
//
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractMusicOn);
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxTesseractItalicsOn);
this.GroupBoxTesseractMethod.Controls.Add(this.checkBoxUseModiInTesseractForUnknownWords);
this.GroupBoxTesseractMethod.Controls.Add(this.labelTesseractLanguage);
this.GroupBoxTesseractMethod.Controls.Add(this.comboBoxTesseractLanguages);
this.GroupBoxTesseractMethod.Location = new System.Drawing.Point(13, 31);
this.GroupBoxTesseractMethod.Name = "GroupBoxTesseractMethod";
this.GroupBoxTesseractMethod.Size = new System.Drawing.Size(366, 131);
this.GroupBoxTesseractMethod.TabIndex = 1;
this.GroupBoxTesseractMethod.TabStop = false;
this.GroupBoxTesseractMethod.Text = "Tesseract";
//
// checkBoxTesseractMusicOn
//
this.checkBoxTesseractMusicOn.AutoSize = true;
this.checkBoxTesseractMusicOn.Checked = true;
this.checkBoxTesseractMusicOn.CheckState = System.Windows.Forms.CheckState.Checked;
this.checkBoxTesseractMusicOn.Location = new System.Drawing.Point(99, 101);
this.checkBoxTesseractMusicOn.Name = "checkBoxTesseractMusicOn";
this.checkBoxTesseractMusicOn.Size = new System.Drawing.Size(94, 17);
this.checkBoxTesseractMusicOn.TabIndex = 4;
this.checkBoxTesseractMusicOn.Text = "Music symbols";
this.checkBoxTesseractMusicOn.UseVisualStyleBackColor = true;
//
// checkBoxTesseractItalicsOn
//
this.checkBoxTesseractItalicsOn.AutoSize = true;
this.checkBoxTesseractItalicsOn.Checked = true;
this.checkBoxTesseractItalicsOn.CheckState = System.Windows.Forms.CheckState.Checked;
this.checkBoxTesseractItalicsOn.Location = new System.Drawing.Point(22, 101);
this.checkBoxTesseractItalicsOn.Name = "checkBoxTesseractItalicsOn";
this.checkBoxTesseractItalicsOn.Size = new System.Drawing.Size(53, 17);
this.checkBoxTesseractItalicsOn.TabIndex = 3;
this.checkBoxTesseractItalicsOn.Text = "Italics";
this.checkBoxTesseractItalicsOn.UseVisualStyleBackColor = true;
//
// checkBoxUseModiInTesseractForUnknownWords
//
this.checkBoxUseModiInTesseractForUnknownWords.AutoSize = true;
this.checkBoxUseModiInTesseractForUnknownWords.Checked = true;
this.checkBoxUseModiInTesseractForUnknownWords.CheckState = System.Windows.Forms.CheckState.Checked;
this.checkBoxUseModiInTesseractForUnknownWords.Enabled = false;
this.checkBoxUseModiInTesseractForUnknownWords.Location = new System.Drawing.Point(22, 74);
this.checkBoxUseModiInTesseractForUnknownWords.Name = "checkBoxUseModiInTesseractForUnknownWords";
this.checkBoxUseModiInTesseractForUnknownWords.Size = new System.Drawing.Size(165, 17);
this.checkBoxUseModiInTesseractForUnknownWords.TabIndex = 2;
this.checkBoxUseModiInTesseractForUnknownWords.Text = "Try MODI for unknown words";
this.checkBoxUseModiInTesseractForUnknownWords.UseVisualStyleBackColor = true;
//
// labelTesseractLanguage
//
this.labelTesseractLanguage.AutoSize = true;
this.labelTesseractLanguage.Location = new System.Drawing.Point(18, 34);
this.labelTesseractLanguage.Name = "labelTesseractLanguage";
this.labelTesseractLanguage.Size = new System.Drawing.Size(54, 13);
this.labelTesseractLanguage.TabIndex = 0;
this.labelTesseractLanguage.Text = "Language";
//
// comboBoxTesseractLanguages
//
this.comboBoxTesseractLanguages.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxTesseractLanguages.FormattingEnabled = true;
this.comboBoxTesseractLanguages.Location = new System.Drawing.Point(99, 31);
this.comboBoxTesseractLanguages.Name = "comboBoxTesseractLanguages";
this.comboBoxTesseractLanguages.Size = new System.Drawing.Size(195, 21);
this.comboBoxTesseractLanguages.TabIndex = 1;
this.comboBoxTesseractLanguages.SelectedIndexChanged += new System.EventHandler(this.ComboBoxTesseractLanguagesSelectedIndexChanged);
//
// groupBoxModiMethod
//
this.groupBoxModiMethod.Controls.Add(this.label1);
this.groupBoxModiMethod.Controls.Add(this.comboBoxModiLanguage);
this.groupBoxModiMethod.Location = new System.Drawing.Point(7, 50);
this.groupBoxModiMethod.Name = "groupBoxModiMethod";
this.groupBoxModiMethod.Size = new System.Drawing.Size(366, 131);
this.groupBoxModiMethod.TabIndex = 3;
this.groupBoxModiMethod.TabStop = false;
this.groupBoxModiMethod.Text = "MODI";
//
// label1
//
this.label1.AutoSize = true;
this.label1.Location = new System.Drawing.Point(11, 58);
this.label1.Name = "label1";
this.label1.Size = new System.Drawing.Size(54, 13);
this.label1.TabIndex = 33;
this.label1.Text = "Language";
//
// comboBoxModiLanguage
//
this.comboBoxModiLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxModiLanguage.FormattingEnabled = true;
this.comboBoxModiLanguage.Location = new System.Drawing.Point(108, 55);
this.comboBoxModiLanguage.Name = "comboBoxModiLanguage";
this.comboBoxModiLanguage.Size = new System.Drawing.Size(195, 21);
this.comboBoxModiLanguage.TabIndex = 0;
this.comboBoxModiLanguage.SelectedIndexChanged += new System.EventHandler(this.ComboBoxModiLanguageSelectedIndexChanged);
//
// groupBoxOCRControls
//
this.groupBoxOCRControls.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right)));
@ -1239,16 +1261,6 @@ namespace Nikse.SubtitleEdit.Forms
this.toolStripMenuItemClearGuesses.Text = "Clear";
this.toolStripMenuItemClearGuesses.Click += new System.EventHandler(this.toolStripMenuItemClearGuesses_Click);
//
// checkBoxNOcrItalic
//
this.checkBoxNOcrItalic.AutoSize = true;
this.checkBoxNOcrItalic.Location = new System.Drawing.Point(187, 28);
this.checkBoxNOcrItalic.Name = "checkBoxNOcrItalic";
this.checkBoxNOcrItalic.Size = new System.Drawing.Size(49, 17);
this.checkBoxNOcrItalic.TabIndex = 8;
this.checkBoxNOcrItalic.Text = "Italic";
this.checkBoxNOcrItalic.UseVisualStyleBackColor = true;
//
// VobSubOcr
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
@ -1276,16 +1288,16 @@ namespace Nikse.SubtitleEdit.Forms
this.Resize += new System.EventHandler(this.VobSubOcr_Resize);
this.contextMenuStripListview.ResumeLayout(false);
this.groupBoxOcrMethod.ResumeLayout(false);
this.GroupBoxTesseractMethod.ResumeLayout(false);
this.GroupBoxTesseractMethod.PerformLayout();
this.groupBoxModiMethod.ResumeLayout(false);
this.groupBoxModiMethod.PerformLayout();
this.groupBoxNOCR.ResumeLayout(false);
this.groupBoxNOCR.PerformLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).EndInit();
this.groupBoxImageCompareMethod.ResumeLayout(false);
this.groupBoxImageCompareMethod.PerformLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownPixelsIsSpace)).EndInit();
this.GroupBoxTesseractMethod.ResumeLayout(false);
this.GroupBoxTesseractMethod.PerformLayout();
this.groupBoxModiMethod.ResumeLayout(false);
this.groupBoxModiMethod.PerformLayout();
this.groupBoxOCRControls.ResumeLayout(false);
this.groupBoxOCRControls.PerformLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownStartNumber)).EndInit();
@ -1419,5 +1431,6 @@ namespace Nikse.SubtitleEdit.Forms
private System.Windows.Forms.Label labelNumberOfPixelsIsSpaceNOCR;
private System.Windows.Forms.CheckBox checkBoxNOcrCorrect;
private System.Windows.Forms.CheckBox checkBoxNOcrItalic;
private System.Windows.Forms.Button buttonGetTesseractDictionaries;
}
}

View File

@ -278,6 +278,7 @@ namespace Nikse.SubtitleEdit.Forms
clearToolStripMenuItem.Text = Configuration.Settings.Language.DvdSubrip.Clear;
comboBoxTesseractLanguages.Left = labelTesseractLanguage.Left + labelTesseractLanguage.Width;
buttonGetTesseractDictionaries.Left = comboBoxTesseractLanguages.Left + comboBoxTesseractLanguages.Width + 5;
Utilities.InitializeSubtitleFont(subtitleListView1);
subtitleListView1.AutoSizeAllColumns(this);
@ -3816,9 +3817,30 @@ namespace Nikse.SubtitleEdit.Forms
comboBoxOcrMethod.Items.RemoveAt(2);
}
private void InitializeTesseract()
{
string dir = Configuration.TesseractDataFolder;
if (!Directory.Exists(Configuration.TesseractFolder))
{
Directory.CreateDirectory(Configuration.TesseractFolder);
if (!Utilities.IsRunningOnLinux() && !Utilities.IsRunningOnMac())
{
System.Diagnostics.Process process = new System.Diagnostics.Process();
System.Diagnostics.ProcessStartInfo startInfo = new System.Diagnostics.ProcessStartInfo();
startInfo.WindowStyle = System.Diagnostics.ProcessWindowStyle.Hidden;
startInfo.FileName = "xcopy";
startInfo.Arguments = "\"" + Path.Combine(Configuration.TesseractOriginalFolder, "*.*") + "\" \"" + Configuration.TesseractFolder + "\" /s";
MessageBox.Show(startInfo.Arguments);
process.StartInfo = startInfo;
process.Start();
process.WaitForExit();
}
}
string dir = Path.Combine(Configuration.TesseractFolder, "tessdata");
if (Directory.Exists(dir))
{
var list = new List<string>();
@ -5037,5 +5059,12 @@ namespace Nikse.SubtitleEdit.Forms
listBoxLogSuggestions.Items.Clear();
}
private void buttonGetTesseractDictionaries_Click(object sender, EventArgs e)
{
var form = new GetTesseractDictionaries();
form.ShowDialog(this);
InitializeTesseract();
}
}
}

View File

@ -51,7 +51,7 @@ namespace Nikse.SubtitleEdit.Logic
}
}
public static string TesseractFolder
public static string TesseractOriginalFolder
{
get
{
@ -59,6 +59,14 @@ namespace Nikse.SubtitleEdit.Logic
}
}
public static string TesseractFolder
{
get
{
return DataDirectory + "Tesseract" + Path.DirectorySeparatorChar;
}
}
public static string VobSubCompareFolder
{
get

40
src/Logic/TarHeader.cs Normal file
View File

@ -0,0 +1,40 @@
using System;
using System.IO;
using System.Text;
namespace Nikse.SubtitleEdit.Logic
{
public class TarHeader
{
public const int HeaderSize = 512;
public string FileName { get; set; }
public long FileSizeInBytes { get; set; }
public long FilePosition { get; set; }
private Stream _stream;
public TarHeader(Stream stream)
{
_stream = stream;
byte[] buffer = new byte[HeaderSize];
stream.Read(buffer, 0, HeaderSize);
FilePosition = stream.Position;
FileName = Encoding.ASCII.GetString(buffer, 0, 100).Replace("\0", string.Empty);
string sizeInBytes = Encoding.ASCII.GetString(buffer, 124, 11);
if (!string.IsNullOrEmpty(FileName) && Utilities.IsInteger(sizeInBytes))
FileSizeInBytes = Convert.ToInt64(sizeInBytes, 8);
}
public void WriteData(string fileName)
{
byte[] buffer = new byte[FileSizeInBytes];
_stream.Position = FilePosition;
_stream.Read(buffer, 0, buffer.Length);
File.WriteAllBytes(fileName, buffer);
}
}
}

49
src/Logic/TarReader.cs Normal file
View File

@ -0,0 +1,49 @@
using System.Collections.Generic;
using System.IO;
namespace Nikse.SubtitleEdit.Logic
{
public class TarReader
{
public List<TarHeader> Files { get; private set; }
private Stream _stream;
public TarReader(string fileName)
{
FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
OpenTarFile(fs);
}
public TarReader(Stream stream)
{
OpenTarFile(stream);
}
private void OpenTarFile(Stream stream)
{
_stream = stream;
Files = new List<TarHeader>();
long length = stream.Length;
long pos = 0;
stream.Position = 0;
while (pos + 512 < length)
{
stream.Seek(pos, SeekOrigin.Begin);
var th = new TarHeader(stream);
if (th.FileSizeInBytes > 0)
Files.Add(th);
pos += TarHeader.HeaderSize + th.FileSizeInBytes;
if (pos % TarHeader.HeaderSize > 0)
pos += 512 - (pos % TarHeader.HeaderSize);
}
}
public void Close()
{
_stream.Close();
}
}
}

View File

@ -0,0 +1,151 @@
<TesseractDictionaries>
<Dictionary>
<EnglishName>Arabic</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.ara.tar.gz</DownloadLink>
<Description>Arabic language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Basque</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.eus.tar.gz</DownloadLink>
<Description>Basque language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Bulgarian</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.bul.tar.gz</DownloadLink>
<Description>Bulgarian language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Catalan</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.cat.tar.gz</DownloadLink>
<Description>Catalan language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Chinese (Simplified)</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.chi_sim.tar.gz</DownloadLink>
<Description>Chinese (Simplified) language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Czech</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.ces.tar.gz</DownloadLink>
<Description>Czech language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Danish</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.dan.tar.gz</DownloadLink>
<Description>Danish language data for Tesseract 3.02</Description>
</Dictionary> <Dictionary>
<EnglishName>Dutch</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.nld.tar.gz</DownloadLink>
<Description>Dutch language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>English</EnglishName>
<DownloadLink>https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.eng.tar.gz</DownloadLink>
<Description>English language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Finnish</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.fin.tar.gz</DownloadLink>
<Description>Finnish language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>French</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.fra.tar.gz</DownloadLink>
<Description>French language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>German</EnglishName>
<DownloadLink>https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.deu.tar.gz</DownloadLink>
<Description>German language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Greek</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.ell.tar.gz</DownloadLink>
<Description>Greek language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Hungarian</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.hun.tar.gz</DownloadLink>
<Description>Hungarian language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Icelandic</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.isl.tar.gz</DownloadLink>
<Description>Icelandic language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Italian</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.ita.tar.gz</DownloadLink>
<Description>Italian language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Japanese</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.jpn.tar.gz</DownloadLink>
<Description>Japanese language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Korean</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.kor.tar.gz</DownloadLink>
<Description>Korean language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Norwegian</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.nor.tar.gz</DownloadLink>
<Description>Norwegian language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Polish</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.pol.tar.gz</DownloadLink>
<Description>Polish language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Portuguese</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.por.tar.gz</DownloadLink>
<Description>Portuguese language data for Tesseract 3.02 </Description>
</Dictionary>
<Dictionary>
<EnglishName>Romanian</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.ron.tar.gz</DownloadLink>
<Description>Romanian language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Russian</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.rus.tar.gz</DownloadLink>
<Description>Russian Language Data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Serbian (Latin)</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.srp.tar.gz</DownloadLink>
<Description>Serbian (Latin) language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Spanish</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.spa.tar.gz</DownloadLink>
<Description>Spanish language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Swedish</EnglishName>
<DownloadLink>https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.swe.tar.gz</DownloadLink>
<Description>Swedish language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Thai</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.tha.tar.gz</DownloadLink>
<Description>Thai language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Turkish</EnglishName>
<DownloadLink>https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.tur.tar.gz</DownloadLink>
<Description>Turkish language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Ukrainian</EnglishName>
<DownloadLink>https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.ukr.tar.gz</DownloadLink>
<Description>Ukrainian language data for Tesseract 3.02</Description>
</Dictionary>
<Dictionary>
<EnglishName>Vietnamese</EnglishName>
<DownloadLink>http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.vie.tar.gz</DownloadLink>
<Description>Vietnamese Language Data for Tesseract 3.02</Description>
</Dictionary>
</TesseractDictionaries>

Binary file not shown.

View File

@ -320,6 +320,12 @@
<Compile Include="Forms\GetDictionaries.Designer.cs">
<DependentUpon>GetDictionaries.cs</DependentUpon>
</Compile>
<Compile Include="Forms\GetTesseractDictionaries.cs">
<SubType>Form</SubType>
</Compile>
<Compile Include="Forms\GetTesseractDictionaries.Designer.cs">
<DependentUpon>GetTesseractDictionaries.cs</DependentUpon>
</Compile>
<Compile Include="Forms\Interjections.cs">
<SubType>Form</SubType>
</Compile>
@ -926,6 +932,8 @@
<Compile Include="Logic\SubtitleFormats\UnknownSubtitle4.cs" />
<Compile Include="Logic\SubtitleFormats\YouTubeTranscript.cs" />
<Compile Include="Logic\SubtitleFormats\ZeroG.cs" />
<Compile Include="Logic\TarHeader.cs" />
<Compile Include="Logic\TarReader.cs" />
<Compile Include="Logic\TextDraw.cs" />
<Compile Include="Logic\TimeCode.cs" />
<Compile Include="Logic\TransportStream\AdaptationField.cs" />
@ -1104,6 +1112,9 @@
<DependentUpon>GetDictionaries.cs</DependentUpon>
<SubType>Designer</SubType>
</EmbeddedResource>
<EmbeddedResource Include="Forms\GetTesseractDictionaries.resx">
<DependentUpon>GetTesseractDictionaries.cs</DependentUpon>
</EmbeddedResource>
<EmbeddedResource Include="Forms\Interjections.resx">
<DependentUpon>Interjections.cs</DependentUpon>
</EmbeddedResource>
@ -1301,6 +1312,7 @@
</Compile>
<EmbeddedResource Include="Resources\OpenOfficeDictionaries.xml.zip" />
<EmbeddedResource Include="Resources\HunspellDictionaries.xml.zip" />
<EmbeddedResource Include="Resources\TesseractDictionaries.xml.zip" />
<None Include="Web References\MicrosoftTranslationService\GetTranslationsResponse.datasource">
<DependentUpon>Reference.map</DependentUpon>
</None>
@ -1524,6 +1536,9 @@
<None Include="Resources\pictureBoxFS.Image.png" />
<None Include="Resources\pictureBoxFSDown.Image.png" />
<None Include="Resources\pictureBoxFSOver.Image.png" />
<Content Include="Resources\TesseractDictionaries.xml">
<SubType>Designer</SubType>
</Content>
<Content Include="Resources\HunspellDictionaries.xml">
<SubType>Designer</SubType>
</Content>