mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-24 20:22:41 +01:00
Go back to include Tesseract 3.02 per derfault
T4 seems to have some problems with line breaks + no italic detection + some bad results
This commit is contained in:
parent
0cd466707e
commit
9650bf8b20
Binary file not shown.
@ -269,16 +269,12 @@ Source: ..\Changelog.txt; DestDir: {app};
|
|||||||
Source: ..\LICENSE.txt; DestDir: {app}; Flags: ignoreversion; Components: main
|
Source: ..\LICENSE.txt; DestDir: {app}; Flags: ignoreversion; Components: main
|
||||||
Source: Icons\uninstall.ico; DestDir: {app}\Icons; Flags: ignoreversion; Components: main
|
Source: Icons\uninstall.ico; DestDir: {app}\Icons; Flags: ignoreversion; Components: main
|
||||||
|
|
||||||
Source: ..\Tesseract4\tessdata\configs\hocr; DestDir: {app}\Tesseract4\tessdata\configs; Flags: ignoreversion; Components: main
|
Source: ..\Tesseract302\tessdata\configs\hocr; DestDir: {app}\Tesseract302\tessdata\configs; Flags: ignoreversion; Components: main
|
||||||
Source: ..\Tesseract4\tessdata\osd.traineddata; DestDir: {app}\Tesseract4\tessdata; Flags: ignoreversion; Components: main
|
Source: ..\Tesseract302\tessdata\eng.traineddata; DestDir: {app}\Tesseract302\tessdata; Flags: ignoreversion; Components: main
|
||||||
Source: ..\Tesseract4\tesseract.exe; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main
|
Source: ..\Tesseract302\tessdata\music.traineddata; DestDir: {app}\Tesseract302\tessdata; Flags: ignoreversion; Components: main
|
||||||
Source: ..\Tesseract4\gif.dll; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main
|
Source: ..\Tesseract302\tesseract.exe; DestDir: {app}\Tesseract302; Flags: ignoreversion; Components: main
|
||||||
Source: ..\Tesseract4\jpeg62.dll; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main
|
Source: ..\Tesseract302\msvcp90.dll; DestDir: {app}\Tesseract302; Flags: ignoreversion; Components: main
|
||||||
Source: ..\Tesseract4\leptonica-1.74.4.dll; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main
|
Source: ..\Tesseract302\msvcr90.dll; DestDir: {app}\Tesseract302; Flags: ignoreversion; Components: main
|
||||||
Source: ..\Tesseract4\libpng16.dll; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main
|
|
||||||
Source: ..\Tesseract4\lzma.dll; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main
|
|
||||||
Source: ..\Tesseract4\tiff.dll; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main
|
|
||||||
Source: ..\Tesseract4\zlib1.dll; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main
|
|
||||||
|
|
||||||
|
|
||||||
[Icons]
|
[Icons]
|
||||||
@ -325,6 +321,12 @@ Type: files; Name: {app}\TessData\eng.word-dawg; Check: IsU
|
|||||||
Type: dirifempty; Name: {app}\TessData; Check: IsUpgrade()
|
Type: dirifempty; Name: {app}\TessData; Check: IsUpgrade()
|
||||||
Type: files; Name: {app}\Tesseract\leptonlib.dll; Check: IsUpgrade()
|
Type: files; Name: {app}\Tesseract\leptonlib.dll; Check: IsUpgrade()
|
||||||
Type: files; Name: {app}\tessnet2_32.dll; Check: IsUpgrade()
|
Type: files; Name: {app}\tessnet2_32.dll; Check: IsUpgrade()
|
||||||
|
Type: files; Name: {app}\Tesseract302\tessdata\configs\hocr; Check: IsUpgrade()
|
||||||
|
Type: files; Name: {app}\Tesseract302\tessdata\eng.traineddata; Check: IsUpgrade()
|
||||||
|
Type: files; Name: {app}\Tesseract302\tessdata\music.traineddata; Check: IsUpgrade()
|
||||||
|
Type: files; Name: {app}\Tesseract302\tesseract.exe; Check: IsUpgrade()
|
||||||
|
Type: files; Name: {app}\Tesseract302\msvcp90.dll; Check: IsUpgrade()
|
||||||
|
Type: files; Name: {app}\Tesseract302\msvcr90.dll; Check: IsUpgrade()
|
||||||
Type: files; Name: {app}\Icons\SubtitleEdit.srt.ico; Check: IsUpgrade()
|
Type: files; Name: {app}\Icons\SubtitleEdit.srt.ico; Check: IsUpgrade()
|
||||||
Type: files; Name: {app}\DocumentIcons.dll; Check: IsUpgrade()
|
Type: files; Name: {app}\DocumentIcons.dll; Check: IsUpgrade()
|
||||||
Type: files; Name: {app}\Settings.xml; Check: IsUpgrade()
|
Type: files; Name: {app}\Settings.xml; Check: IsUpgrade()
|
||||||
@ -596,6 +598,12 @@ begin
|
|||||||
DelTree(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract4\tessdata\*.traineddata'), False, True, False);
|
DelTree(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract4\tessdata\*.traineddata'), False, True, False);
|
||||||
DeleteFile(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract4\tessdata\configs\hocr'));
|
DeleteFile(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract4\tessdata\configs\hocr'));
|
||||||
|
|
||||||
|
DeleteFile(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302\tessdata\configs\hocr'));
|
||||||
|
DelTree(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302\tessdata\*.traineddata'), False, True, False);
|
||||||
|
DeleteFile(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302\tesseract.exe'));
|
||||||
|
DeleteFile(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302\msvcp90.dll'));
|
||||||
|
DeleteFile(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302\msvcr90.dll'));
|
||||||
|
|
||||||
// Remove possible installed mpv
|
// Remove possible installed mpv
|
||||||
DeleteFile(ExpandConstant('{userappdata}\Subtitle Edit\mpv-1.dll'));
|
DeleteFile(ExpandConstant('{userappdata}\Subtitle Edit\mpv-1.dll'));
|
||||||
|
|
||||||
@ -614,6 +622,9 @@ begin
|
|||||||
RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract4\tessdata\configs'));
|
RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract4\tessdata\configs'));
|
||||||
RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract4\tessdata'));
|
RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract4\tessdata'));
|
||||||
RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract4'));
|
RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract4'));
|
||||||
|
RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302\tessdata\configs'));
|
||||||
|
RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302\tessdata'));
|
||||||
|
RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302'));
|
||||||
RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit'));
|
RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit'));
|
||||||
|
|
||||||
end;
|
end;
|
||||||
|
@ -17,7 +17,7 @@ namespace Nikse.SubtitleEdit.Core
|
|||||||
|
|
||||||
public static readonly string BaseDirectory = GetBaseDirectory();
|
public static readonly string BaseDirectory = GetBaseDirectory();
|
||||||
public static readonly string DataDirectory = GetDataDirectory();
|
public static readonly string DataDirectory = GetDataDirectory();
|
||||||
public static readonly string TesseractOriginalDirectory = BaseDirectory + "Tesseract4" + Path.DirectorySeparatorChar;
|
public static readonly string TesseractOriginalDirectory = BaseDirectory + "Tesseract302" + Path.DirectorySeparatorChar;
|
||||||
public static readonly string DictionariesDirectory = DataDirectory + "Dictionaries" + Path.DirectorySeparatorChar;
|
public static readonly string DictionariesDirectory = DataDirectory + "Dictionaries" + Path.DirectorySeparatorChar;
|
||||||
public static readonly string SpectrogramsDirectory = DataDirectory + "Spectrograms" + Path.DirectorySeparatorChar;
|
public static readonly string SpectrogramsDirectory = DataDirectory + "Spectrograms" + Path.DirectorySeparatorChar;
|
||||||
public static readonly string SceneChangesDirectory = DataDirectory + "SceneChanges" + Path.DirectorySeparatorChar;
|
public static readonly string SceneChangesDirectory = DataDirectory + "SceneChanges" + Path.DirectorySeparatorChar;
|
||||||
|
@ -3665,7 +3665,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
bool isUnicode = currentEncoding == Encoding.Unicode || currentEncoding == Encoding.UTF32 || currentEncoding == Encoding.GetEncoding(12001) || currentEncoding == Encoding.UTF7 || currentEncoding == Encoding.UTF8;
|
bool isUnicode = currentEncoding == Encoding.Unicode || currentEncoding == Encoding.UTF32 || currentEncoding == Encoding.GetEncoding(12001) || currentEncoding == Encoding.UTF7 || currentEncoding == Encoding.UTF8;
|
||||||
if (!isUnicode && (allText.Contains(new[] { '♪', '♫', '♥', '—', '―', '…' }))) // ANSI & music/unicode symbols
|
if (!isUnicode && (allText.Contains(new[] { '♪', '♫', '♥', '—', '―', '…' }))) // ANSI & music/unicode symbols
|
||||||
{
|
{
|
||||||
if (MessageBox.Show(string.Format(_language.UnicodeMusicSymbolsAnsiWarning), Title, MessageBoxButtons.YesNo) == DialogResult.No)
|
if (MessageBox.Show(string.Format(_language.UnicodeMusicSymbolsAnsiWarning), Title, MessageBoxButtons.YesNoCancel) != DialogResult.Yes)
|
||||||
return DialogResult.No;
|
return DialogResult.No;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3685,7 +3685,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
}
|
}
|
||||||
if (containsNegativeTime)
|
if (containsNegativeTime)
|
||||||
{
|
{
|
||||||
if (MessageBox.Show(_language.NegativeTimeWarning, Title, MessageBoxButtons.YesNo) == DialogResult.No)
|
if (MessageBox.Show(_language.NegativeTimeWarning, Title, MessageBoxButtons.YesNoCancel) != DialogResult.Yes)
|
||||||
return DialogResult.No;
|
return DialogResult.No;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3698,7 +3698,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
if (MessageBox.Show(string.Format(_language.OverwriteModifiedFile,
|
if (MessageBox.Show(string.Format(_language.OverwriteModifiedFile,
|
||||||
_fileName, fileOnDisk.ToShortDateString(), fileOnDisk.ToString("HH:mm:ss"),
|
_fileName, fileOnDisk.ToShortDateString(), fileOnDisk.ToString("HH:mm:ss"),
|
||||||
Environment.NewLine, _fileDateTime.ToShortDateString(), _fileDateTime.ToString("HH:mm:ss")),
|
Environment.NewLine, _fileDateTime.ToShortDateString(), _fileDateTime.ToString("HH:mm:ss")),
|
||||||
Title + " - " + _language.FileOnDiskModified, MessageBoxButtons.YesNo) == DialogResult.No)
|
Title + " - " + _language.FileOnDiskModified, MessageBoxButtons.YesNoCancel) != DialogResult.Yes)
|
||||||
return DialogResult.No;
|
return DialogResult.No;
|
||||||
}
|
}
|
||||||
if (fileInfo.IsReadOnly)
|
if (fileInfo.IsReadOnly)
|
||||||
@ -3797,7 +3797,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
}
|
}
|
||||||
if (containsNegativeTime)
|
if (containsNegativeTime)
|
||||||
{
|
{
|
||||||
if (MessageBox.Show(_language.NegativeTimeWarning, Title, MessageBoxButtons.YesNo) == DialogResult.No)
|
if (MessageBox.Show(_language.NegativeTimeWarning, Title, MessageBoxButtons.YesNoCancel) != DialogResult.Yes)
|
||||||
return DialogResult.No;
|
return DialogResult.No;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3824,7 +3824,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
bool isUnicode = currentEncoding != null && (currentEncoding == Encoding.Unicode || currentEncoding == Encoding.UTF32 || currentEncoding == Encoding.UTF7 || currentEncoding == Encoding.UTF8);
|
bool isUnicode = currentEncoding != null && (currentEncoding == Encoding.Unicode || currentEncoding == Encoding.UTF32 || currentEncoding == Encoding.UTF7 || currentEncoding == Encoding.UTF8);
|
||||||
if (!isUnicode && (allText.Contains(new[] { '♪', '♫', '♥', '—', '―', '…' }))) // ANSI & music/unicode symbols
|
if (!isUnicode && (allText.Contains(new[] { '♪', '♫', '♥', '—', '―', '…' }))) // ANSI & music/unicode symbols
|
||||||
{
|
{
|
||||||
if (MessageBox.Show(string.Format(_language.UnicodeMusicSymbolsAnsiWarning), Title, MessageBoxButtons.YesNo) == DialogResult.No)
|
if (MessageBox.Show(string.Format(_language.UnicodeMusicSymbolsAnsiWarning), Title, MessageBoxButtons.YesNoCancel) != DialogResult.Yes)
|
||||||
return DialogResult.No;
|
return DialogResult.No;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4745,7 +4745,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
//if we fail to find the text, we might want to start searching from the top of the file.
|
//if we fail to find the text, we might want to start searching from the top of the file.
|
||||||
if (!found && _findHelper.StartLineIndex >= 1)
|
if (!found && _findHelper.StartLineIndex >= 1)
|
||||||
{
|
{
|
||||||
if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNo) == DialogResult.Yes)
|
if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNoCancel) == DialogResult.Yes)
|
||||||
{
|
{
|
||||||
found = _findHelper.Find(_subtitle, _subtitleAlternate, -1);
|
found = _findHelper.Find(_subtitle, _subtitleAlternate, -1);
|
||||||
}
|
}
|
||||||
@ -4827,7 +4827,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
{
|
{
|
||||||
if (_findHelper.StartLineIndex >= 1)
|
if (_findHelper.StartLineIndex >= 1)
|
||||||
{
|
{
|
||||||
if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNo) == DialogResult.Yes)
|
if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNoCancel) == DialogResult.Yes)
|
||||||
{
|
{
|
||||||
_findHelper.StartLineIndex = 0;
|
_findHelper.StartLineIndex = 0;
|
||||||
if (_findHelper.Find(_subtitle, _subtitleAlternate, 0))
|
if (_findHelper.Find(_subtitle, _subtitleAlternate, 0))
|
||||||
@ -5094,7 +5094,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
string msgText = _language.ReplaceContinueNotFound;
|
string msgText = _language.ReplaceContinueNotFound;
|
||||||
if (matches.Count > 0)
|
if (matches.Count > 0)
|
||||||
msgText = string.Format(_language.ReplaceXContinue, matches.Count);
|
msgText = string.Format(_language.ReplaceXContinue, matches.Count);
|
||||||
if (MessageBox.Show(msgText, _language.ReplaceContinueTitle, MessageBoxButtons.YesNo) == DialogResult.Yes)
|
if (MessageBox.Show(msgText, _language.ReplaceContinueTitle, MessageBoxButtons.YesNoCancel) == DialogResult.Yes)
|
||||||
{
|
{
|
||||||
s = result.Substring(0, start - 1);
|
s = result.Substring(0, start - 1);
|
||||||
var rest = result.Remove(0, start - 1);
|
var rest = result.Remove(0, start - 1);
|
||||||
@ -5234,7 +5234,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
string msgText = _language.ReplaceContinueNotFound;
|
string msgText = _language.ReplaceContinueNotFound;
|
||||||
if (replaceCount > 0)
|
if (replaceCount > 0)
|
||||||
msgText = string.Format(_language.ReplaceXContinue, replaceCount);
|
msgText = string.Format(_language.ReplaceXContinue, replaceCount);
|
||||||
if (MessageBox.Show(msgText, _language.ReplaceContinueTitle, MessageBoxButtons.YesNo) == DialogResult.Yes)
|
if (MessageBox.Show(msgText, _language.ReplaceContinueTitle, MessageBoxButtons.YesNoCancel) == DialogResult.Yes)
|
||||||
{
|
{
|
||||||
stopAtIndex = firstIndex;
|
stopAtIndex = firstIndex;
|
||||||
_findHelper.MatchInOriginal = false;
|
_findHelper.MatchInOriginal = false;
|
||||||
@ -5277,7 +5277,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
if (_replaceStartLineIndex >= 1) // Prompt for start over
|
if (_replaceStartLineIndex >= 1) // Prompt for start over
|
||||||
{
|
{
|
||||||
_replaceStartLineIndex = 0;
|
_replaceStartLineIndex = 0;
|
||||||
if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNo) == DialogResult.Yes)
|
if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNoCancel) == DialogResult.Yes)
|
||||||
{
|
{
|
||||||
SubtitleListview1.SelectIndexAndEnsureVisible(0, true);
|
SubtitleListview1.SelectIndexAndEnsureVisible(0, true);
|
||||||
_findHelper.StartLineIndex = 0;
|
_findHelper.StartLineIndex = 0;
|
||||||
@ -5361,7 +5361,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
if (_replaceStartLineIndex >= 1)
|
if (_replaceStartLineIndex >= 1)
|
||||||
{
|
{
|
||||||
_replaceStartLineIndex = 0;
|
_replaceStartLineIndex = 0;
|
||||||
if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNo) == DialogResult.Yes)
|
if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNoCancel) == DialogResult.Yes)
|
||||||
{
|
{
|
||||||
SubtitleListview1.SelectIndexAndEnsureVisible(0, true);
|
SubtitleListview1.SelectIndexAndEnsureVisible(0, true);
|
||||||
_findHelper.StartLineIndex = 0;
|
_findHelper.StartLineIndex = 0;
|
||||||
@ -6008,7 +6008,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
visualSync.ShowDialog(this);
|
visualSync.ShowDialog(this);
|
||||||
if (visualSync.OkPressed)
|
if (visualSync.OkPressed)
|
||||||
{
|
{
|
||||||
if (MessageBox.Show(_language.AppendSynchronizedSubtitlePrompt, _language.SubtitleAppendPromptTitle, MessageBoxButtons.YesNo) == DialogResult.Yes)
|
if (MessageBox.Show(_language.AppendSynchronizedSubtitlePrompt, _language.SubtitleAppendPromptTitle, MessageBoxButtons.YesNoCancel) == DialogResult.Yes)
|
||||||
{
|
{
|
||||||
int start = _subtitle.Paragraphs.Count + 1;
|
int start = _subtitle.Paragraphs.Count + 1;
|
||||||
var fr = CurrentFrameRate;
|
var fr = CurrentFrameRate;
|
||||||
@ -6185,7 +6185,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
if (!isSwedish)
|
if (!isSwedish)
|
||||||
promptText = _language.TranslateSwedishToDanishWarning;
|
promptText = _language.TranslateSwedishToDanishWarning;
|
||||||
|
|
||||||
if (MessageBox.Show(promptText, Title, MessageBoxButtons.YesNo) == DialogResult.Yes)
|
if (MessageBox.Show(promptText, Title, MessageBoxButtons.YesNoCancel) == DialogResult.Yes)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@ -6537,6 +6537,12 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
RefreshSelectedParagraph();
|
RefreshSelectedParagraph();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void DeleteLine(int i, Paragraph p)
|
||||||
|
{
|
||||||
|
MakeHistoryForUndo(Configuration.Settings.Language.Main.OneLineDeleted);
|
||||||
|
DeleteSelectedLines();
|
||||||
|
}
|
||||||
|
|
||||||
public void FocusParagraph(int index)
|
public void FocusParagraph(int index)
|
||||||
{
|
{
|
||||||
if (tabControlSubtitle.SelectedIndex == TabControlSourceView)
|
if (tabControlSubtitle.SelectedIndex == TabControlSourceView)
|
||||||
@ -7445,7 +7451,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
askText = _language.DeleteOneLinePrompt;
|
askText = _language.DeleteOneLinePrompt;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Configuration.Settings.General.PromptDeleteLines && MessageBox.Show(askText, Title, MessageBoxButtons.YesNo) == DialogResult.No)
|
if (Configuration.Settings.General.PromptDeleteLines && MessageBox.Show(askText, Title, MessageBoxButtons.YesNoCancel) != DialogResult.Yes)
|
||||||
{
|
{
|
||||||
_cutText = string.Empty;
|
_cutText = string.Empty;
|
||||||
return;
|
return;
|
||||||
@ -11843,7 +11849,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
if (File.Exists(idxFileName))
|
if (File.Exists(idxFileName))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
var dr = MessageBox.Show(string.Format(_language.IdxFileNotFoundWarning, idxFileName), _title, MessageBoxButtons.YesNo);
|
var dr = MessageBox.Show(string.Format(_language.IdxFileNotFoundWarning, idxFileName), _title, MessageBoxButtons.YesNoCancel);
|
||||||
return dr == DialogResult.Yes;
|
return dr == DialogResult.Yes;
|
||||||
}
|
}
|
||||||
if (verbose)
|
if (verbose)
|
||||||
@ -14940,7 +14946,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
if (timeCodeSubtitle.Paragraphs.Count != _subtitle.Paragraphs.Count)
|
if (timeCodeSubtitle.Paragraphs.Count != _subtitle.Paragraphs.Count)
|
||||||
{
|
{
|
||||||
var text = string.Format(_language.ImportTimeCodesDifferentNumberOfLinesWarning, timeCodeSubtitle.Paragraphs.Count, _subtitle.Paragraphs.Count);
|
var text = string.Format(_language.ImportTimeCodesDifferentNumberOfLinesWarning, timeCodeSubtitle.Paragraphs.Count, _subtitle.Paragraphs.Count);
|
||||||
if (MessageBox.Show(this, text, _title, MessageBoxButtons.YesNo) == DialogResult.No)
|
if (MessageBox.Show(this, text, _title, MessageBoxButtons.YesNoCancel) != DialogResult.Yes)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -22658,7 +22664,7 @@ namespace Nikse.SubtitleEdit.Forms
|
|||||||
newP.EndTime.TotalMilliseconds <= p.EndTime.TotalMilliseconds))
|
newP.EndTime.TotalMilliseconds <= p.EndTime.TotalMilliseconds))
|
||||||
{
|
{
|
||||||
// new subs will overlap existing subs
|
// new subs will overlap existing subs
|
||||||
if (MessageBox.Show(_language.PromptInsertSubtitleOverlap, _languageGeneral.Title, MessageBoxButtons.YesNo) == DialogResult.No)
|
if (MessageBox.Show(_language.PromptInsertSubtitleOverlap, _languageGeneral.Title, MessageBoxButtons.YesNoCancel) != DialogResult.Yes)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
6
src/Forms/Ocr/DownloadTesseract302.Designer.cs
generated
6
src/Forms/Ocr/DownloadTesseract302.Designer.cs
generated
@ -37,9 +37,9 @@
|
|||||||
this.labelDescription1.AutoSize = true;
|
this.labelDescription1.AutoSize = true;
|
||||||
this.labelDescription1.Location = new System.Drawing.Point(21, 27);
|
this.labelDescription1.Location = new System.Drawing.Point(21, 27);
|
||||||
this.labelDescription1.Name = "labelDescription1";
|
this.labelDescription1.Name = "labelDescription1";
|
||||||
this.labelDescription1.Size = new System.Drawing.Size(220, 13);
|
this.labelDescription1.Size = new System.Drawing.Size(145, 13);
|
||||||
this.labelDescription1.TabIndex = 29;
|
this.labelDescription1.TabIndex = 29;
|
||||||
this.labelDescription1.Text = "Get Tesseract OCR dictionaries from the web";
|
this.labelDescription1.Text = "Downloading Tesseract OCR";
|
||||||
//
|
//
|
||||||
// labelPleaseWait
|
// labelPleaseWait
|
||||||
//
|
//
|
||||||
@ -63,7 +63,7 @@
|
|||||||
this.Name = "DownloadTesseract302";
|
this.Name = "DownloadTesseract302";
|
||||||
this.ShowInTaskbar = false;
|
this.ShowInTaskbar = false;
|
||||||
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
|
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
|
||||||
this.Text = "DownloadTesseract302";
|
this.Text = "Download Tesseract 3.02";
|
||||||
this.ResumeLayout(false);
|
this.ResumeLayout(false);
|
||||||
this.PerformLayout();
|
this.PerformLayout();
|
||||||
|
|
||||||
|
77
src/Forms/Ocr/DownloadTesseract4.Designer.cs
generated
Normal file
77
src/Forms/Ocr/DownloadTesseract4.Designer.cs
generated
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||||
|
{
|
||||||
|
partial class DownloadTesseract4
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Required designer variable.
|
||||||
|
/// </summary>
|
||||||
|
private System.ComponentModel.IContainer components = null;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Clean up any resources being used.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param>
|
||||||
|
protected override void Dispose(bool disposing)
|
||||||
|
{
|
||||||
|
if (disposing && (components != null))
|
||||||
|
{
|
||||||
|
components.Dispose();
|
||||||
|
}
|
||||||
|
base.Dispose(disposing);
|
||||||
|
}
|
||||||
|
|
||||||
|
#region Windows Form Designer generated code
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Required method for Designer support - do not modify
|
||||||
|
/// the contents of this method with the code editor.
|
||||||
|
/// </summary>
|
||||||
|
private void InitializeComponent()
|
||||||
|
{
|
||||||
|
this.labelDescription1 = new System.Windows.Forms.Label();
|
||||||
|
this.labelPleaseWait = new System.Windows.Forms.Label();
|
||||||
|
this.SuspendLayout();
|
||||||
|
//
|
||||||
|
// labelDescription1
|
||||||
|
//
|
||||||
|
this.labelDescription1.AutoSize = true;
|
||||||
|
this.labelDescription1.Location = new System.Drawing.Point(21, 27);
|
||||||
|
this.labelDescription1.Name = "labelDescription1";
|
||||||
|
this.labelDescription1.Size = new System.Drawing.Size(145, 13);
|
||||||
|
this.labelDescription1.TabIndex = 29;
|
||||||
|
this.labelDescription1.Text = "Downloading Tesseract OCR";
|
||||||
|
//
|
||||||
|
// labelPleaseWait
|
||||||
|
//
|
||||||
|
this.labelPleaseWait.AutoSize = true;
|
||||||
|
this.labelPleaseWait.Location = new System.Drawing.Point(21, 59);
|
||||||
|
this.labelPleaseWait.Name = "labelPleaseWait";
|
||||||
|
this.labelPleaseWait.Size = new System.Drawing.Size(70, 13);
|
||||||
|
this.labelPleaseWait.TabIndex = 28;
|
||||||
|
this.labelPleaseWait.Text = "Please wait...";
|
||||||
|
//
|
||||||
|
// DownloadTesseract4
|
||||||
|
//
|
||||||
|
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
|
||||||
|
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
|
||||||
|
this.ClientSize = new System.Drawing.Size(320, 93);
|
||||||
|
this.Controls.Add(this.labelDescription1);
|
||||||
|
this.Controls.Add(this.labelPleaseWait);
|
||||||
|
this.FormBorderStyle = System.Windows.Forms.FormBorderStyle.FixedDialog;
|
||||||
|
this.MaximizeBox = false;
|
||||||
|
this.MinimizeBox = false;
|
||||||
|
this.Name = "DownloadTesseract4";
|
||||||
|
this.ShowInTaskbar = false;
|
||||||
|
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
|
||||||
|
this.Text = "Download Tesseract 4";
|
||||||
|
this.ResumeLayout(false);
|
||||||
|
this.PerformLayout();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
private System.Windows.Forms.Label labelDescription1;
|
||||||
|
private System.Windows.Forms.Label labelPleaseWait;
|
||||||
|
}
|
||||||
|
}
|
70
src/Forms/Ocr/DownloadTesseract4.cs
Normal file
70
src/Forms/Ocr/DownloadTesseract4.cs
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
using System;
|
||||||
|
using System.IO;
|
||||||
|
using System.IO.Compression;
|
||||||
|
using System.Net;
|
||||||
|
using System.Windows.Forms;
|
||||||
|
using Nikse.SubtitleEdit.Core;
|
||||||
|
|
||||||
|
namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||||
|
{
|
||||||
|
public partial class DownloadTesseract4 : Form
|
||||||
|
{
|
||||||
|
public DownloadTesseract4()
|
||||||
|
{
|
||||||
|
InitializeComponent();
|
||||||
|
var wc = new WebClient { Proxy = Utilities.GetProxy() };
|
||||||
|
wc.DownloadDataAsync(new Uri("https://github.com/SubtitleEdit/support-files/raw/master/Tesseract4.tar.gz"));
|
||||||
|
wc.DownloadDataCompleted += wc_DownloadDataCompleted;
|
||||||
|
wc.DownloadProgressChanged += (o, args) =>
|
||||||
|
{
|
||||||
|
labelPleaseWait.Text = Configuration.Settings.Language.General.PleaseWait + " " + args.ProgressPercentage + "%";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private void wc_DownloadDataCompleted(object sender, DownloadDataCompletedEventArgs e)
|
||||||
|
{
|
||||||
|
if (e.Error != null)
|
||||||
|
{
|
||||||
|
MessageBox.Show(Configuration.Settings.Language.GetTesseractDictionaries.DownloadFailed);
|
||||||
|
DialogResult = DialogResult.Cancel;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
string dictionaryFolder = Configuration.TesseractDirectory;
|
||||||
|
if (!Directory.Exists(dictionaryFolder))
|
||||||
|
Directory.CreateDirectory(dictionaryFolder);
|
||||||
|
|
||||||
|
var tempFileName = Path.GetTempFileName() + ".tar";
|
||||||
|
using (var ms = new MemoryStream(e.Result))
|
||||||
|
using (var fs = new FileStream(tempFileName, FileMode.Create))
|
||||||
|
using (var zip = new GZipStream(ms, CompressionMode.Decompress))
|
||||||
|
{
|
||||||
|
byte[] buffer = new byte[1024];
|
||||||
|
int nRead;
|
||||||
|
while ((nRead = zip.Read(buffer, 0, buffer.Length)) > 0)
|
||||||
|
{
|
||||||
|
fs.Write(buffer, 0, nRead);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
using (var tr = new TarReader(tempFileName))
|
||||||
|
{
|
||||||
|
foreach (var th in tr.Files)
|
||||||
|
{
|
||||||
|
string fn = Path.Combine(dictionaryFolder, th.FileName.Replace('/', Path.DirectorySeparatorChar));
|
||||||
|
if (th.IsFolder)
|
||||||
|
{
|
||||||
|
Directory.CreateDirectory(Path.Combine(dictionaryFolder, th.FileName.Replace('/', Path.DirectorySeparatorChar)));
|
||||||
|
}
|
||||||
|
else if (th.FileSizeInBytes > 0)
|
||||||
|
{
|
||||||
|
th.WriteData(fn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
File.Delete(tempFileName);
|
||||||
|
Cursor = Cursors.Default;
|
||||||
|
DialogResult = DialogResult.OK;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
120
src/Forms/Ocr/DownloadTesseract4.resx
Normal file
120
src/Forms/Ocr/DownloadTesseract4.resx
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<root>
|
||||||
|
<!--
|
||||||
|
Microsoft ResX Schema
|
||||||
|
|
||||||
|
Version 2.0
|
||||||
|
|
||||||
|
The primary goals of this format is to allow a simple XML format
|
||||||
|
that is mostly human readable. The generation and parsing of the
|
||||||
|
various data types are done through the TypeConverter classes
|
||||||
|
associated with the data types.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
... ado.net/XML headers & schema ...
|
||||||
|
<resheader name="resmimetype">text/microsoft-resx</resheader>
|
||||||
|
<resheader name="version">2.0</resheader>
|
||||||
|
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
|
||||||
|
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
|
||||||
|
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
|
||||||
|
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
|
||||||
|
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
|
||||||
|
<value>[base64 mime encoded serialized .NET Framework object]</value>
|
||||||
|
</data>
|
||||||
|
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
|
||||||
|
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
|
||||||
|
<comment>This is a comment</comment>
|
||||||
|
</data>
|
||||||
|
|
||||||
|
There are any number of "resheader" rows that contain simple
|
||||||
|
name/value pairs.
|
||||||
|
|
||||||
|
Each data row contains a name, and value. The row also contains a
|
||||||
|
type or mimetype. Type corresponds to a .NET class that support
|
||||||
|
text/value conversion through the TypeConverter architecture.
|
||||||
|
Classes that don't support this are serialized and stored with the
|
||||||
|
mimetype set.
|
||||||
|
|
||||||
|
The mimetype is used for serialized objects, and tells the
|
||||||
|
ResXResourceReader how to depersist the object. This is currently not
|
||||||
|
extensible. For a given mimetype the value must be set accordingly:
|
||||||
|
|
||||||
|
Note - application/x-microsoft.net.object.binary.base64 is the format
|
||||||
|
that the ResXResourceWriter will generate, however the reader can
|
||||||
|
read any of the formats listed below.
|
||||||
|
|
||||||
|
mimetype: application/x-microsoft.net.object.binary.base64
|
||||||
|
value : The object must be serialized with
|
||||||
|
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
|
||||||
|
: and then encoded with base64 encoding.
|
||||||
|
|
||||||
|
mimetype: application/x-microsoft.net.object.soap.base64
|
||||||
|
value : The object must be serialized with
|
||||||
|
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
|
||||||
|
: and then encoded with base64 encoding.
|
||||||
|
|
||||||
|
mimetype: application/x-microsoft.net.object.bytearray.base64
|
||||||
|
value : The object must be serialized into a byte array
|
||||||
|
: using a System.ComponentModel.TypeConverter
|
||||||
|
: and then encoded with base64 encoding.
|
||||||
|
-->
|
||||||
|
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
|
||||||
|
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
|
||||||
|
<xsd:element name="root" msdata:IsDataSet="true">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:choice maxOccurs="unbounded">
|
||||||
|
<xsd:element name="metadata">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="value" type="xsd:string" minOccurs="0" />
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="name" use="required" type="xsd:string" />
|
||||||
|
<xsd:attribute name="type" type="xsd:string" />
|
||||||
|
<xsd:attribute name="mimetype" type="xsd:string" />
|
||||||
|
<xsd:attribute ref="xml:space" />
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
<xsd:element name="assembly">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:attribute name="alias" type="xsd:string" />
|
||||||
|
<xsd:attribute name="name" type="xsd:string" />
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
<xsd:element name="data">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||||
|
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
|
||||||
|
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
|
||||||
|
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
|
||||||
|
<xsd:attribute ref="xml:space" />
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
<xsd:element name="resheader">
|
||||||
|
<xsd:complexType>
|
||||||
|
<xsd:sequence>
|
||||||
|
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||||
|
</xsd:sequence>
|
||||||
|
<xsd:attribute name="name" type="xsd:string" use="required" />
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:choice>
|
||||||
|
</xsd:complexType>
|
||||||
|
</xsd:element>
|
||||||
|
</xsd:schema>
|
||||||
|
<resheader name="resmimetype">
|
||||||
|
<value>text/microsoft-resx</value>
|
||||||
|
</resheader>
|
||||||
|
<resheader name="version">
|
||||||
|
<value>2.0</value>
|
||||||
|
</resheader>
|
||||||
|
<resheader name="reader">
|
||||||
|
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||||
|
</resheader>
|
||||||
|
<resheader name="writer">
|
||||||
|
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||||
|
</resheader>
|
||||||
|
</root>
|
File diff suppressed because it is too large
Load Diff
188
src/Logic/OCR/Tesseract/TesseractMultiRunner.cs
Normal file
188
src/Logic/OCR/Tesseract/TesseractMultiRunner.cs
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Diagnostics;
|
||||||
|
using System.IO;
|
||||||
|
using System.Text;
|
||||||
|
using Nikse.SubtitleEdit.Core;
|
||||||
|
|
||||||
|
namespace Nikse.SubtitleEdit.Logic.Ocr.Tesseract
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Run multiple images per tesseract call
|
||||||
|
/// </summary>
|
||||||
|
public class TesseractMultiRunner
|
||||||
|
{
|
||||||
|
private readonly List<string> _tesseractErrors;
|
||||||
|
|
||||||
|
public TesseractMultiRunner()
|
||||||
|
{
|
||||||
|
_tesseractErrors = new List<string>();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void TesseractErrorReceived(object sender, DataReceivedEventArgs e)
|
||||||
|
{
|
||||||
|
var msg = e.Data;
|
||||||
|
|
||||||
|
if (string.IsNullOrEmpty(msg) ||
|
||||||
|
msg.StartsWith("Tesseract Open Source OCR Engine", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
msg.Contains("Too few characters", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
msg.Contains("Empty page", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
msg.Contains(" diacritics", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
msg.Contains("Weak margin", StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
_tesseractErrors.Add(msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
public string Run(List<NikseBitmap> bmps, string language, string psmMode)
|
||||||
|
{
|
||||||
|
// change yellow color to white - easier for Tesseract
|
||||||
|
string inputFileName = Path.GetTempPath() + Guid.NewGuid() + ".txt";
|
||||||
|
var filesToDelete = new List<string>();
|
||||||
|
var sb = new StringBuilder();
|
||||||
|
foreach (var bmp in bmps)
|
||||||
|
{
|
||||||
|
bmp.ReplaceYellowWithWhite(); // optimized replace
|
||||||
|
string pngFileName = Path.GetTempPath() + Guid.NewGuid() + ".png";
|
||||||
|
using (var b = bmp.GetBitmap())
|
||||||
|
{
|
||||||
|
b.Save(pngFileName, System.Drawing.Imaging.ImageFormat.Png);
|
||||||
|
}
|
||||||
|
filesToDelete.Add(pngFileName);
|
||||||
|
sb.AppendLine(pngFileName);
|
||||||
|
}
|
||||||
|
|
||||||
|
File.WriteAllText(inputFileName, sb.ToString());
|
||||||
|
filesToDelete.Add(inputFileName);
|
||||||
|
var outputFileName = Path.GetTempPath() + Guid.NewGuid();
|
||||||
|
var dir = @"C:\Data\SubtitleEdit\subtitleedit\src\bin\Debug\Tesseract4";
|
||||||
|
using (var process = new Process())
|
||||||
|
{
|
||||||
|
process.StartInfo = new ProcessStartInfo(dir + "tesseract.exe")
|
||||||
|
{
|
||||||
|
UseShellExecute = true,
|
||||||
|
Arguments = "\"" + inputFileName + "\" \"" + outputFileName + "\" -l " + language
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!string.IsNullOrEmpty(psmMode))
|
||||||
|
process.StartInfo.Arguments += " " + psmMode.Trim();
|
||||||
|
|
||||||
|
process.StartInfo.Arguments += " hocr";
|
||||||
|
process.StartInfo.Arguments = " --tessdata-dir \"" + Path.Combine(dir, "tessdata") + "\" " + process.StartInfo.Arguments.Trim();
|
||||||
|
process.StartInfo.WindowStyle = ProcessWindowStyle.Hidden;
|
||||||
|
|
||||||
|
if (Configuration.IsRunningOnLinux() || Configuration.IsRunningOnMac())
|
||||||
|
{
|
||||||
|
process.StartInfo.UseShellExecute = false;
|
||||||
|
process.StartInfo.RedirectStandardError = true;
|
||||||
|
process.StartInfo.FileName = "tesseract";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
var tessdataPath = Path.Combine(Configuration.TesseractDirectory, "tessdata");
|
||||||
|
process.StartInfo.Arguments = " --tessdata-dir \"" + tessdataPath + "\" " + process.StartInfo.Arguments.Trim();
|
||||||
|
process.StartInfo.WorkingDirectory = Configuration.TesseractDirectory;
|
||||||
|
process.StartInfo.UseShellExecute = false;
|
||||||
|
process.StartInfo.CreateNoWindow = true;
|
||||||
|
process.StartInfo.RedirectStandardError = true;
|
||||||
|
process.ErrorDataReceived += TesseractErrorReceived;
|
||||||
|
process.EnableRaisingEvents = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
process.Start();
|
||||||
|
process.BeginErrorReadLine();
|
||||||
|
}
|
||||||
|
catch
|
||||||
|
{
|
||||||
|
if (_tesseractErrors.Count <= 2)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (Configuration.IsRunningOnLinux() || Configuration.IsRunningOnMac())
|
||||||
|
{
|
||||||
|
_tesseractErrors.Add("Unable to start 'Tesseract' - make sure tesseract-ocr 4.x is installed!");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_tesseractErrors.Add("Unable to start 'Tesseract' (" + Configuration.TesseractDirectory + "tesseract.exe) - make sure Subtitle Edit is install correctly + Visual Studio 2017 C++ runtime");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
process.WaitForExit(5000 + bmps.Count * 500);
|
||||||
|
|
||||||
|
string result = string.Empty;
|
||||||
|
string resultFileName = outputFileName + ".html";
|
||||||
|
if (!File.Exists(outputFileName))
|
||||||
|
resultFileName = outputFileName + ".hocr";
|
||||||
|
filesToDelete.Add(resultFileName);
|
||||||
|
try
|
||||||
|
{
|
||||||
|
if (File.Exists(outputFileName))
|
||||||
|
{
|
||||||
|
result = File.ReadAllText(outputFileName, Encoding.UTF8);
|
||||||
|
result = ParseHocr(result);
|
||||||
|
}
|
||||||
|
foreach (var fileName in filesToDelete)
|
||||||
|
{
|
||||||
|
if (File.Exists(fileName))
|
||||||
|
{
|
||||||
|
File.Delete(fileName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch
|
||||||
|
{
|
||||||
|
// ignored
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string ParseHocr(string html)
|
||||||
|
{
|
||||||
|
string s = html.Replace("<em>", "@001_____").Replace("</em>", "@002_____");
|
||||||
|
|
||||||
|
int first = s.IndexOf('<');
|
||||||
|
while (first >= 0)
|
||||||
|
{
|
||||||
|
int last = s.IndexOf('>');
|
||||||
|
if (last > 0)
|
||||||
|
{
|
||||||
|
s = s.Remove(first, last - first + 1);
|
||||||
|
first = s.IndexOf('<');
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
first = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s = s.Trim();
|
||||||
|
s = s.Replace("@001_____", "<i>").Replace("@002_____", "</i>");
|
||||||
|
while (s.Contains(" "))
|
||||||
|
s = s.Replace(" ", " ");
|
||||||
|
s = s.Replace("</i> <i>", " ");
|
||||||
|
|
||||||
|
// html escape decoding
|
||||||
|
s = s.Replace("&", "&");
|
||||||
|
s = s.Replace("<", "<");
|
||||||
|
s = s.Replace(">", ">");
|
||||||
|
s = s.Replace(""", "\"");
|
||||||
|
s = s.Replace("'", "'");
|
||||||
|
s = s.Replace("'", "'");
|
||||||
|
|
||||||
|
while (s.Contains("\n\n"))
|
||||||
|
s = s.Replace("\n\n", "\n");
|
||||||
|
s = s.Replace("</i>\n<i>", "\n");
|
||||||
|
s = s.Replace("\n", Environment.NewLine);
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
160
src/Logic/OCR/Tesseract/TesseractRunner.cs
Normal file
160
src/Logic/OCR/Tesseract/TesseractRunner.cs
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Diagnostics;
|
||||||
|
using System.IO;
|
||||||
|
using System.Text;
|
||||||
|
using Nikse.SubtitleEdit.Core;
|
||||||
|
|
||||||
|
namespace Nikse.SubtitleEdit.Logic.Ocr.Tesseract
|
||||||
|
{
|
||||||
|
public class TesseractRunner
|
||||||
|
{
|
||||||
|
public List<string> TesseractErrors { get; set; }
|
||||||
|
public string LastError { get; set; }
|
||||||
|
|
||||||
|
public TesseractRunner()
|
||||||
|
{
|
||||||
|
TesseractErrors = new List<string>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public string Run(string languageCode, string psmMode, string engineMode, string imageFileName, bool run302 = false)
|
||||||
|
{
|
||||||
|
LastError = null;
|
||||||
|
var dir = run302 ? Configuration.Tesseract302Directory : Configuration.TesseractDirectory;
|
||||||
|
string tempTextFileName = Path.GetTempPath() + Guid.NewGuid();
|
||||||
|
using (var process = new Process())
|
||||||
|
{
|
||||||
|
process.StartInfo = new ProcessStartInfo(Path.Combine(dir, "tesseract.exe"))
|
||||||
|
{
|
||||||
|
UseShellExecute = true,
|
||||||
|
Arguments = "\"" + imageFileName + "\" \"" + tempTextFileName + "\" -l " + languageCode
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!string.IsNullOrEmpty(psmMode))
|
||||||
|
{
|
||||||
|
process.StartInfo.Arguments += " --psm " + psmMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!string.IsNullOrEmpty(engineMode) && !run302)
|
||||||
|
{
|
||||||
|
process.StartInfo.Arguments += " --oem " + engineMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
process.StartInfo.Arguments += " hocr";
|
||||||
|
if (run302)
|
||||||
|
{
|
||||||
|
process.StartInfo.WorkingDirectory = Configuration.Tesseract302Directory;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
process.ErrorDataReceived += TesseractErrorReceived;
|
||||||
|
process.StartInfo.Arguments = " --tessdata-dir \"" + Path.Combine(dir, "tessdata") + "\" " + process.StartInfo.Arguments.Trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
process.StartInfo.WindowStyle = ProcessWindowStyle.Hidden;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
process.Start();
|
||||||
|
}
|
||||||
|
catch (Exception exception)
|
||||||
|
{
|
||||||
|
LastError = exception.Message + Environment.NewLine + exception.StackTrace;
|
||||||
|
TesseractErrors.Add(LastError);
|
||||||
|
return "Error!";
|
||||||
|
}
|
||||||
|
process.WaitForExit(5000);
|
||||||
|
}
|
||||||
|
|
||||||
|
string result = string.Empty;
|
||||||
|
string outputFileName = tempTextFileName + ".html";
|
||||||
|
if (!File.Exists(outputFileName))
|
||||||
|
outputFileName = tempTextFileName + ".hocr";
|
||||||
|
try
|
||||||
|
{
|
||||||
|
if (File.Exists(outputFileName))
|
||||||
|
{
|
||||||
|
result = File.ReadAllText(outputFileName, Encoding.UTF8);
|
||||||
|
result = ParseHocr(result);
|
||||||
|
File.Delete(outputFileName);
|
||||||
|
}
|
||||||
|
File.Delete(imageFileName);
|
||||||
|
}
|
||||||
|
catch
|
||||||
|
{
|
||||||
|
// ignored
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string ParseHocr(string html)
|
||||||
|
{
|
||||||
|
string s = html.Replace("<em>", "@001_____").Replace("</em>", "@002_____");
|
||||||
|
|
||||||
|
int first = s.IndexOf('<');
|
||||||
|
while (first >= 0)
|
||||||
|
{
|
||||||
|
int last = s.IndexOf('>');
|
||||||
|
if (last > 0)
|
||||||
|
{
|
||||||
|
s = s.Remove(first, last - first + 1);
|
||||||
|
first = s.IndexOf('<');
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
first = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s = s.Trim();
|
||||||
|
s = s.Replace("@001_____", "<i>").Replace("@002_____", "</i>");
|
||||||
|
while (s.Contains(" "))
|
||||||
|
s = s.Replace(" ", " ");
|
||||||
|
s = s.Replace("</i> <i>", " ");
|
||||||
|
|
||||||
|
// html escape decoding
|
||||||
|
s = s.Replace("&", "&")
|
||||||
|
.Replace("<", "<")
|
||||||
|
.Replace(">", ">")
|
||||||
|
.Replace(""", "\"")
|
||||||
|
.Replace("'", "'")
|
||||||
|
.Replace("'", "'");
|
||||||
|
|
||||||
|
while (s.Contains("\n\n"))
|
||||||
|
s = s.Replace("\n\n", "\n");
|
||||||
|
s = s.Replace("</i>\n<i>", "\n");
|
||||||
|
s = s.Replace("\n", Environment.NewLine);
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void TesseractErrorReceived(object sender, DataReceivedEventArgs e)
|
||||||
|
{
|
||||||
|
string msg = e.Data;
|
||||||
|
|
||||||
|
if (string.IsNullOrEmpty(msg) ||
|
||||||
|
msg.StartsWith("Tesseract Open Source OCR Engine", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
msg.Contains("Too few characters", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
msg.Contains("Empty page", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
msg.Contains(" diacritics", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
msg.Contains("Weak margin", StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TesseractErrors.Count <= 100)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrEmpty(LastError))
|
||||||
|
{
|
||||||
|
LastError = msg;
|
||||||
|
}
|
||||||
|
else if (!LastError.Contains(msg))
|
||||||
|
{
|
||||||
|
LastError = LastError + Environment.NewLine + msg;
|
||||||
|
}
|
||||||
|
TesseractErrors.Add(msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
95
src/Logic/OCR/Tesseract/TesseractThreadRunner.cs
Normal file
95
src/Logic/OCR/Tesseract/TesseractThreadRunner.cs
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Drawing;
|
||||||
|
using System.IO;
|
||||||
|
using System.Threading;
|
||||||
|
|
||||||
|
namespace Nikse.SubtitleEdit.Logic.Ocr.Tesseract
|
||||||
|
{
|
||||||
|
public class TesseractThreadRunner
|
||||||
|
{
|
||||||
|
public delegate void OcrDone(int index, ImageJob job);
|
||||||
|
private readonly OcrDone _callback;
|
||||||
|
private readonly Queue<ImageJob> _jobQueue;
|
||||||
|
private static readonly object QueueLock = new object();
|
||||||
|
private readonly TesseractRunner _tesseractRunner;
|
||||||
|
private bool _abort;
|
||||||
|
|
||||||
|
public TesseractThreadRunner(OcrDone callback = null)
|
||||||
|
{
|
||||||
|
_jobQueue = new Queue<ImageJob>();
|
||||||
|
_callback = callback;
|
||||||
|
_tesseractRunner = new TesseractRunner();
|
||||||
|
}
|
||||||
|
|
||||||
|
public class ImageJob
|
||||||
|
{
|
||||||
|
public string FileName { get; set; }
|
||||||
|
public int Index { get; set; }
|
||||||
|
public string Result { get; set; }
|
||||||
|
public DateTime Completed { get; set; }
|
||||||
|
public string LanguageCode { get; set; }
|
||||||
|
public string PsmMode { get; set; }
|
||||||
|
public string EngineMode { get; set; }
|
||||||
|
public bool Run302 { get; set; }
|
||||||
|
public Bitmap Bitmap { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
private void DoOcr(object j)
|
||||||
|
{
|
||||||
|
if (_abort)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var job = (ImageJob)j;
|
||||||
|
job.Result = _tesseractRunner.Run(job.LanguageCode, job.PsmMode, job.EngineMode, job.FileName, job.Run302);
|
||||||
|
lock (QueueLock)
|
||||||
|
{
|
||||||
|
job.Completed = DateTime.UtcNow;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void AddImageJob(Bitmap bmp, int index, string language, string psmMode, string engineMode, bool run302)
|
||||||
|
{
|
||||||
|
var job = new ImageJob
|
||||||
|
{
|
||||||
|
FileName = Path.GetTempFileName() + ".png",
|
||||||
|
Index = index,
|
||||||
|
Completed = DateTime.MaxValue,
|
||||||
|
Bitmap = bmp,
|
||||||
|
LanguageCode = language,
|
||||||
|
PsmMode = psmMode,
|
||||||
|
EngineMode = engineMode,
|
||||||
|
Run302 = run302
|
||||||
|
};
|
||||||
|
bmp.Save(job.FileName, System.Drawing.Imaging.ImageFormat.Png);
|
||||||
|
ThreadPool.QueueUserWorkItem(DoOcr, job);
|
||||||
|
_jobQueue.Enqueue(job);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void CheckQueue()
|
||||||
|
{
|
||||||
|
if (_jobQueue.Count == 0)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lock (QueueLock)
|
||||||
|
{
|
||||||
|
var checkTime = DateTime.UtcNow;
|
||||||
|
var job = _jobQueue.Peek();
|
||||||
|
if (job != null && job.Completed < checkTime)
|
||||||
|
{
|
||||||
|
_jobQueue.Dequeue();
|
||||||
|
_callback?.Invoke(job.Index, job);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Cancel()
|
||||||
|
{
|
||||||
|
_abort = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -518,6 +518,12 @@
|
|||||||
<Compile Include="Forms\Networking\NetworkStart.Designer.cs">
|
<Compile Include="Forms\Networking\NetworkStart.Designer.cs">
|
||||||
<DependentUpon>NetworkStart.cs</DependentUpon>
|
<DependentUpon>NetworkStart.cs</DependentUpon>
|
||||||
</Compile>
|
</Compile>
|
||||||
|
<Compile Include="Forms\Ocr\DownloadTesseract4.cs">
|
||||||
|
<SubType>Form</SubType>
|
||||||
|
</Compile>
|
||||||
|
<Compile Include="Forms\Ocr\DownloadTesseract4.Designer.cs">
|
||||||
|
<DependentUpon>DownloadTesseract4.cs</DependentUpon>
|
||||||
|
</Compile>
|
||||||
<Compile Include="Forms\Ocr\DownloadTesseract302.cs">
|
<Compile Include="Forms\Ocr\DownloadTesseract302.cs">
|
||||||
<SubType>Form</SubType>
|
<SubType>Form</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
@ -937,6 +943,9 @@
|
|||||||
<Compile Include="Logic\Ocr\NOcrPoint.cs" />
|
<Compile Include="Logic\Ocr\NOcrPoint.cs" />
|
||||||
<Compile Include="Logic\Ocr\PreprocessingSettings.cs" />
|
<Compile Include="Logic\Ocr\PreprocessingSettings.cs" />
|
||||||
<Compile Include="Logic\Ocr\SpellCheckOcrTextResult.cs" />
|
<Compile Include="Logic\Ocr\SpellCheckOcrTextResult.cs" />
|
||||||
|
<Compile Include="Logic\Ocr\Tesseract\TesseractThreadRunner.cs" />
|
||||||
|
<Compile Include="Logic\Ocr\Tesseract\TesseractRunner.cs" />
|
||||||
|
<Compile Include="Logic\Ocr\Tesseract\TesseractMultiRunner.cs" />
|
||||||
<Compile Include="Logic\RtfTextConverterRichTextBox.cs" />
|
<Compile Include="Logic\RtfTextConverterRichTextBox.cs" />
|
||||||
<Compile Include="Logic\SpellCheck\Hunspell.cs" />
|
<Compile Include="Logic\SpellCheck\Hunspell.cs" />
|
||||||
<Compile Include="Logic\SpellCheck\LinuxHunspell.cs" />
|
<Compile Include="Logic\SpellCheck\LinuxHunspell.cs" />
|
||||||
@ -1207,6 +1216,9 @@
|
|||||||
<EmbeddedResource Include="Forms\Networking\NetworkStart.resx">
|
<EmbeddedResource Include="Forms\Networking\NetworkStart.resx">
|
||||||
<DependentUpon>NetworkStart.cs</DependentUpon>
|
<DependentUpon>NetworkStart.cs</DependentUpon>
|
||||||
</EmbeddedResource>
|
</EmbeddedResource>
|
||||||
|
<EmbeddedResource Include="Forms\Ocr\DownloadTesseract4.resx">
|
||||||
|
<DependentUpon>DownloadTesseract4.cs</DependentUpon>
|
||||||
|
</EmbeddedResource>
|
||||||
<EmbeddedResource Include="Forms\Ocr\DownloadTesseract302.resx">
|
<EmbeddedResource Include="Forms\Ocr\DownloadTesseract302.resx">
|
||||||
<DependentUpon>DownloadTesseract302.cs</DependentUpon>
|
<DependentUpon>DownloadTesseract302.cs</DependentUpon>
|
||||||
</EmbeddedResource>
|
</EmbeddedResource>
|
||||||
|
Loading…
Reference in New Issue
Block a user