diff --git a/Tesseract/msvcp90.dll b/Tesseract302/msvcp90.dll similarity index 100% rename from Tesseract/msvcp90.dll rename to Tesseract302/msvcp90.dll diff --git a/Tesseract/msvcr90.dll b/Tesseract302/msvcr90.dll similarity index 100% rename from Tesseract/msvcr90.dll rename to Tesseract302/msvcr90.dll diff --git a/Tesseract/tessdata/configs/hocr b/Tesseract302/tessdata/configs/hocr similarity index 100% rename from Tesseract/tessdata/configs/hocr rename to Tesseract302/tessdata/configs/hocr diff --git a/Tesseract/tessdata/eng.traineddata b/Tesseract302/tessdata/eng.traineddata similarity index 100% rename from Tesseract/tessdata/eng.traineddata rename to Tesseract302/tessdata/eng.traineddata diff --git a/Tesseract/tessdata/music.traineddata b/Tesseract302/tessdata/music.traineddata similarity index 100% rename from Tesseract/tessdata/music.traineddata rename to Tesseract302/tessdata/music.traineddata diff --git a/Tesseract/tesseract.exe b/Tesseract302/tesseract.exe similarity index 100% rename from Tesseract/tesseract.exe rename to Tesseract302/tesseract.exe diff --git a/Tesseract4/tesseract.exe b/Tesseract4/tesseract.exe index 0b4a0fc1c..0529798a6 100644 Binary files a/Tesseract4/tesseract.exe and b/Tesseract4/tesseract.exe differ diff --git a/installer/Subtitle_Edit_installer.iss b/installer/Subtitle_Edit_installer.iss index b2e29d649..c6a85b8dc 100644 --- a/installer/Subtitle_Edit_installer.iss +++ b/installer/Subtitle_Edit_installer.iss @@ -269,16 +269,12 @@ Source: ..\Changelog.txt; DestDir: {app}; Source: ..\LICENSE.txt; DestDir: {app}; Flags: ignoreversion; Components: main Source: Icons\uninstall.ico; DestDir: {app}\Icons; Flags: ignoreversion; Components: main -Source: ..\Tesseract4\tessdata\configs\hocr; DestDir: {app}\Tesseract4\tessdata\configs; Flags: ignoreversion; Components: main -Source: ..\Tesseract4\tessdata\osd.traineddata; DestDir: {app}\Tesseract4\tessdata; Flags: ignoreversion; Components: main -Source: ..\Tesseract4\tesseract.exe; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main -Source: ..\Tesseract4\gif.dll; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main -Source: ..\Tesseract4\jpeg62.dll; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main -Source: ..\Tesseract4\leptonica-1.74.4.dll; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main -Source: ..\Tesseract4\libpng16.dll; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main -Source: ..\Tesseract4\lzma.dll; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main -Source: ..\Tesseract4\tiff.dll; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main -Source: ..\Tesseract4\zlib1.dll; DestDir: {app}\Tesseract4; Flags: ignoreversion; Components: main +Source: ..\Tesseract302\tessdata\configs\hocr; DestDir: {app}\Tesseract302\tessdata\configs; Flags: ignoreversion; Components: main +Source: ..\Tesseract302\tessdata\eng.traineddata; DestDir: {app}\Tesseract302\tessdata; Flags: ignoreversion; Components: main +Source: ..\Tesseract302\tessdata\music.traineddata; DestDir: {app}\Tesseract302\tessdata; Flags: ignoreversion; Components: main +Source: ..\Tesseract302\tesseract.exe; DestDir: {app}\Tesseract302; Flags: ignoreversion; Components: main +Source: ..\Tesseract302\msvcp90.dll; DestDir: {app}\Tesseract302; Flags: ignoreversion; Components: main +Source: ..\Tesseract302\msvcr90.dll; DestDir: {app}\Tesseract302; Flags: ignoreversion; Components: main [Icons] @@ -325,6 +321,12 @@ Type: files; Name: {app}\TessData\eng.word-dawg; Check: IsU Type: dirifempty; Name: {app}\TessData; Check: IsUpgrade() Type: files; Name: {app}\Tesseract\leptonlib.dll; Check: IsUpgrade() Type: files; Name: {app}\tessnet2_32.dll; Check: IsUpgrade() +Type: files; Name: {app}\Tesseract302\tessdata\configs\hocr; Check: IsUpgrade() +Type: files; Name: {app}\Tesseract302\tessdata\eng.traineddata; Check: IsUpgrade() +Type: files; Name: {app}\Tesseract302\tessdata\music.traineddata; Check: IsUpgrade() +Type: files; Name: {app}\Tesseract302\tesseract.exe; Check: IsUpgrade() +Type: files; Name: {app}\Tesseract302\msvcp90.dll; Check: IsUpgrade() +Type: files; Name: {app}\Tesseract302\msvcr90.dll; Check: IsUpgrade() Type: files; Name: {app}\Icons\SubtitleEdit.srt.ico; Check: IsUpgrade() Type: files; Name: {app}\DocumentIcons.dll; Check: IsUpgrade() Type: files; Name: {app}\Settings.xml; Check: IsUpgrade() @@ -596,6 +598,12 @@ begin DelTree(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract4\tessdata\*.traineddata'), False, True, False); DeleteFile(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract4\tessdata\configs\hocr')); + DeleteFile(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302\tessdata\configs\hocr')); + DelTree(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302\tessdata\*.traineddata'), False, True, False); + DeleteFile(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302\tesseract.exe')); + DeleteFile(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302\msvcp90.dll')); + DeleteFile(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302\msvcr90.dll')); + // Remove possible installed mpv DeleteFile(ExpandConstant('{userappdata}\Subtitle Edit\mpv-1.dll')); @@ -614,6 +622,9 @@ begin RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract4\tessdata\configs')); RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract4\tessdata')); RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract4')); + RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302\tessdata\configs')); + RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302\tessdata')); + RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit\Tesseract302')); RemoveDir(ExpandConstant('{userappdata}\Subtitle Edit')); end; diff --git a/libse/Configuration.cs b/libse/Configuration.cs index 04c59b319..801b43458 100644 --- a/libse/Configuration.cs +++ b/libse/Configuration.cs @@ -17,7 +17,7 @@ namespace Nikse.SubtitleEdit.Core public static readonly string BaseDirectory = GetBaseDirectory(); public static readonly string DataDirectory = GetDataDirectory(); - public static readonly string TesseractOriginalDirectory = BaseDirectory + "Tesseract4" + Path.DirectorySeparatorChar; + public static readonly string TesseractOriginalDirectory = BaseDirectory + "Tesseract302" + Path.DirectorySeparatorChar; public static readonly string DictionariesDirectory = DataDirectory + "Dictionaries" + Path.DirectorySeparatorChar; public static readonly string SpectrogramsDirectory = DataDirectory + "Spectrograms" + Path.DirectorySeparatorChar; public static readonly string SceneChangesDirectory = DataDirectory + "SceneChanges" + Path.DirectorySeparatorChar; diff --git a/src/Forms/Main.cs b/src/Forms/Main.cs index da0542406..3e1997879 100644 --- a/src/Forms/Main.cs +++ b/src/Forms/Main.cs @@ -3665,7 +3665,7 @@ namespace Nikse.SubtitleEdit.Forms bool isUnicode = currentEncoding == Encoding.Unicode || currentEncoding == Encoding.UTF32 || currentEncoding == Encoding.GetEncoding(12001) || currentEncoding == Encoding.UTF7 || currentEncoding == Encoding.UTF8; if (!isUnicode && (allText.Contains(new[] { '♪', '♫', '♥', '—', '―', '…' }))) // ANSI & music/unicode symbols { - if (MessageBox.Show(string.Format(_language.UnicodeMusicSymbolsAnsiWarning), Title, MessageBoxButtons.YesNo) == DialogResult.No) + if (MessageBox.Show(string.Format(_language.UnicodeMusicSymbolsAnsiWarning), Title, MessageBoxButtons.YesNoCancel) != DialogResult.Yes) return DialogResult.No; } @@ -3685,7 +3685,7 @@ namespace Nikse.SubtitleEdit.Forms } if (containsNegativeTime) { - if (MessageBox.Show(_language.NegativeTimeWarning, Title, MessageBoxButtons.YesNo) == DialogResult.No) + if (MessageBox.Show(_language.NegativeTimeWarning, Title, MessageBoxButtons.YesNoCancel) != DialogResult.Yes) return DialogResult.No; } @@ -3698,7 +3698,7 @@ namespace Nikse.SubtitleEdit.Forms if (MessageBox.Show(string.Format(_language.OverwriteModifiedFile, _fileName, fileOnDisk.ToShortDateString(), fileOnDisk.ToString("HH:mm:ss"), Environment.NewLine, _fileDateTime.ToShortDateString(), _fileDateTime.ToString("HH:mm:ss")), - Title + " - " + _language.FileOnDiskModified, MessageBoxButtons.YesNo) == DialogResult.No) + Title + " - " + _language.FileOnDiskModified, MessageBoxButtons.YesNoCancel) != DialogResult.Yes) return DialogResult.No; } if (fileInfo.IsReadOnly) @@ -3797,7 +3797,7 @@ namespace Nikse.SubtitleEdit.Forms } if (containsNegativeTime) { - if (MessageBox.Show(_language.NegativeTimeWarning, Title, MessageBoxButtons.YesNo) == DialogResult.No) + if (MessageBox.Show(_language.NegativeTimeWarning, Title, MessageBoxButtons.YesNoCancel) != DialogResult.Yes) return DialogResult.No; } @@ -3824,7 +3824,7 @@ namespace Nikse.SubtitleEdit.Forms bool isUnicode = currentEncoding != null && (currentEncoding == Encoding.Unicode || currentEncoding == Encoding.UTF32 || currentEncoding == Encoding.UTF7 || currentEncoding == Encoding.UTF8); if (!isUnicode && (allText.Contains(new[] { '♪', '♫', '♥', '—', '―', '…' }))) // ANSI & music/unicode symbols { - if (MessageBox.Show(string.Format(_language.UnicodeMusicSymbolsAnsiWarning), Title, MessageBoxButtons.YesNo) == DialogResult.No) + if (MessageBox.Show(string.Format(_language.UnicodeMusicSymbolsAnsiWarning), Title, MessageBoxButtons.YesNoCancel) != DialogResult.Yes) return DialogResult.No; } @@ -4745,7 +4745,7 @@ namespace Nikse.SubtitleEdit.Forms //if we fail to find the text, we might want to start searching from the top of the file. if (!found && _findHelper.StartLineIndex >= 1) { - if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNo) == DialogResult.Yes) + if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNoCancel) == DialogResult.Yes) { found = _findHelper.Find(_subtitle, _subtitleAlternate, -1); } @@ -4827,7 +4827,7 @@ namespace Nikse.SubtitleEdit.Forms { if (_findHelper.StartLineIndex >= 1) { - if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNo) == DialogResult.Yes) + if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNoCancel) == DialogResult.Yes) { _findHelper.StartLineIndex = 0; if (_findHelper.Find(_subtitle, _subtitleAlternate, 0)) @@ -5094,7 +5094,7 @@ namespace Nikse.SubtitleEdit.Forms string msgText = _language.ReplaceContinueNotFound; if (matches.Count > 0) msgText = string.Format(_language.ReplaceXContinue, matches.Count); - if (MessageBox.Show(msgText, _language.ReplaceContinueTitle, MessageBoxButtons.YesNo) == DialogResult.Yes) + if (MessageBox.Show(msgText, _language.ReplaceContinueTitle, MessageBoxButtons.YesNoCancel) == DialogResult.Yes) { s = result.Substring(0, start - 1); var rest = result.Remove(0, start - 1); @@ -5234,7 +5234,7 @@ namespace Nikse.SubtitleEdit.Forms string msgText = _language.ReplaceContinueNotFound; if (replaceCount > 0) msgText = string.Format(_language.ReplaceXContinue, replaceCount); - if (MessageBox.Show(msgText, _language.ReplaceContinueTitle, MessageBoxButtons.YesNo) == DialogResult.Yes) + if (MessageBox.Show(msgText, _language.ReplaceContinueTitle, MessageBoxButtons.YesNoCancel) == DialogResult.Yes) { stopAtIndex = firstIndex; _findHelper.MatchInOriginal = false; @@ -5277,7 +5277,7 @@ namespace Nikse.SubtitleEdit.Forms if (_replaceStartLineIndex >= 1) // Prompt for start over { _replaceStartLineIndex = 0; - if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNo) == DialogResult.Yes) + if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNoCancel) == DialogResult.Yes) { SubtitleListview1.SelectIndexAndEnsureVisible(0, true); _findHelper.StartLineIndex = 0; @@ -5361,7 +5361,7 @@ namespace Nikse.SubtitleEdit.Forms if (_replaceStartLineIndex >= 1) { _replaceStartLineIndex = 0; - if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNo) == DialogResult.Yes) + if (MessageBox.Show(_language.FindContinue, _language.FindContinueTitle, MessageBoxButtons.YesNoCancel) == DialogResult.Yes) { SubtitleListview1.SelectIndexAndEnsureVisible(0, true); _findHelper.StartLineIndex = 0; @@ -6008,7 +6008,7 @@ namespace Nikse.SubtitleEdit.Forms visualSync.ShowDialog(this); if (visualSync.OkPressed) { - if (MessageBox.Show(_language.AppendSynchronizedSubtitlePrompt, _language.SubtitleAppendPromptTitle, MessageBoxButtons.YesNo) == DialogResult.Yes) + if (MessageBox.Show(_language.AppendSynchronizedSubtitlePrompt, _language.SubtitleAppendPromptTitle, MessageBoxButtons.YesNoCancel) == DialogResult.Yes) { int start = _subtitle.Paragraphs.Count + 1; var fr = CurrentFrameRate; @@ -6185,7 +6185,7 @@ namespace Nikse.SubtitleEdit.Forms if (!isSwedish) promptText = _language.TranslateSwedishToDanishWarning; - if (MessageBox.Show(promptText, Title, MessageBoxButtons.YesNo) == DialogResult.Yes) + if (MessageBox.Show(promptText, Title, MessageBoxButtons.YesNoCancel) == DialogResult.Yes) { try { @@ -6537,6 +6537,12 @@ namespace Nikse.SubtitleEdit.Forms RefreshSelectedParagraph(); } + public void DeleteLine(int i, Paragraph p) + { + MakeHistoryForUndo(Configuration.Settings.Language.Main.OneLineDeleted); + DeleteSelectedLines(); + } + public void FocusParagraph(int index) { if (tabControlSubtitle.SelectedIndex == TabControlSourceView) @@ -7445,7 +7451,7 @@ namespace Nikse.SubtitleEdit.Forms askText = _language.DeleteOneLinePrompt; } - if (Configuration.Settings.General.PromptDeleteLines && MessageBox.Show(askText, Title, MessageBoxButtons.YesNo) == DialogResult.No) + if (Configuration.Settings.General.PromptDeleteLines && MessageBox.Show(askText, Title, MessageBoxButtons.YesNoCancel) != DialogResult.Yes) { _cutText = string.Empty; return; @@ -11843,7 +11849,7 @@ namespace Nikse.SubtitleEdit.Forms if (File.Exists(idxFileName)) return true; - var dr = MessageBox.Show(string.Format(_language.IdxFileNotFoundWarning, idxFileName), _title, MessageBoxButtons.YesNo); + var dr = MessageBox.Show(string.Format(_language.IdxFileNotFoundWarning, idxFileName), _title, MessageBoxButtons.YesNoCancel); return dr == DialogResult.Yes; } if (verbose) @@ -14940,7 +14946,7 @@ namespace Nikse.SubtitleEdit.Forms if (timeCodeSubtitle.Paragraphs.Count != _subtitle.Paragraphs.Count) { var text = string.Format(_language.ImportTimeCodesDifferentNumberOfLinesWarning, timeCodeSubtitle.Paragraphs.Count, _subtitle.Paragraphs.Count); - if (MessageBox.Show(this, text, _title, MessageBoxButtons.YesNo) == DialogResult.No) + if (MessageBox.Show(this, text, _title, MessageBoxButtons.YesNoCancel) != DialogResult.Yes) return; } @@ -22658,7 +22664,7 @@ namespace Nikse.SubtitleEdit.Forms newP.EndTime.TotalMilliseconds <= p.EndTime.TotalMilliseconds)) { // new subs will overlap existing subs - if (MessageBox.Show(_language.PromptInsertSubtitleOverlap, _languageGeneral.Title, MessageBoxButtons.YesNo) == DialogResult.No) + if (MessageBox.Show(_language.PromptInsertSubtitleOverlap, _languageGeneral.Title, MessageBoxButtons.YesNoCancel) != DialogResult.Yes) { return; } diff --git a/src/Forms/Ocr/DownloadTesseract302.Designer.cs b/src/Forms/Ocr/DownloadTesseract302.Designer.cs index ff4dfb8f9..492b6f3e9 100644 --- a/src/Forms/Ocr/DownloadTesseract302.Designer.cs +++ b/src/Forms/Ocr/DownloadTesseract302.Designer.cs @@ -37,9 +37,9 @@ this.labelDescription1.AutoSize = true; this.labelDescription1.Location = new System.Drawing.Point(21, 27); this.labelDescription1.Name = "labelDescription1"; - this.labelDescription1.Size = new System.Drawing.Size(220, 13); + this.labelDescription1.Size = new System.Drawing.Size(145, 13); this.labelDescription1.TabIndex = 29; - this.labelDescription1.Text = "Get Tesseract OCR dictionaries from the web"; + this.labelDescription1.Text = "Downloading Tesseract OCR"; // // labelPleaseWait // @@ -63,7 +63,7 @@ this.Name = "DownloadTesseract302"; this.ShowInTaskbar = false; this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent; - this.Text = "DownloadTesseract302"; + this.Text = "Download Tesseract 3.02"; this.ResumeLayout(false); this.PerformLayout(); diff --git a/src/Forms/Ocr/DownloadTesseract4.Designer.cs b/src/Forms/Ocr/DownloadTesseract4.Designer.cs new file mode 100644 index 000000000..c95455fce --- /dev/null +++ b/src/Forms/Ocr/DownloadTesseract4.Designer.cs @@ -0,0 +1,77 @@ +namespace Nikse.SubtitleEdit.Forms.Ocr +{ + partial class DownloadTesseract4 + { + /// + /// Required designer variable. + /// + private System.ComponentModel.IContainer components = null; + + /// + /// Clean up any resources being used. + /// + /// true if managed resources should be disposed; otherwise, false. + protected override void Dispose(bool disposing) + { + if (disposing && (components != null)) + { + components.Dispose(); + } + base.Dispose(disposing); + } + + #region Windows Form Designer generated code + + /// + /// Required method for Designer support - do not modify + /// the contents of this method with the code editor. + /// + private void InitializeComponent() + { + this.labelDescription1 = new System.Windows.Forms.Label(); + this.labelPleaseWait = new System.Windows.Forms.Label(); + this.SuspendLayout(); + // + // labelDescription1 + // + this.labelDescription1.AutoSize = true; + this.labelDescription1.Location = new System.Drawing.Point(21, 27); + this.labelDescription1.Name = "labelDescription1"; + this.labelDescription1.Size = new System.Drawing.Size(145, 13); + this.labelDescription1.TabIndex = 29; + this.labelDescription1.Text = "Downloading Tesseract OCR"; + // + // labelPleaseWait + // + this.labelPleaseWait.AutoSize = true; + this.labelPleaseWait.Location = new System.Drawing.Point(21, 59); + this.labelPleaseWait.Name = "labelPleaseWait"; + this.labelPleaseWait.Size = new System.Drawing.Size(70, 13); + this.labelPleaseWait.TabIndex = 28; + this.labelPleaseWait.Text = "Please wait..."; + // + // DownloadTesseract4 + // + this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); + this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; + this.ClientSize = new System.Drawing.Size(320, 93); + this.Controls.Add(this.labelDescription1); + this.Controls.Add(this.labelPleaseWait); + this.FormBorderStyle = System.Windows.Forms.FormBorderStyle.FixedDialog; + this.MaximizeBox = false; + this.MinimizeBox = false; + this.Name = "DownloadTesseract4"; + this.ShowInTaskbar = false; + this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent; + this.Text = "Download Tesseract 4"; + this.ResumeLayout(false); + this.PerformLayout(); + + } + + #endregion + + private System.Windows.Forms.Label labelDescription1; + private System.Windows.Forms.Label labelPleaseWait; + } +} \ No newline at end of file diff --git a/src/Forms/Ocr/DownloadTesseract4.cs b/src/Forms/Ocr/DownloadTesseract4.cs new file mode 100644 index 000000000..8fe8138a3 --- /dev/null +++ b/src/Forms/Ocr/DownloadTesseract4.cs @@ -0,0 +1,70 @@ +using System; +using System.IO; +using System.IO.Compression; +using System.Net; +using System.Windows.Forms; +using Nikse.SubtitleEdit.Core; + +namespace Nikse.SubtitleEdit.Forms.Ocr +{ + public partial class DownloadTesseract4 : Form + { + public DownloadTesseract4() + { + InitializeComponent(); + var wc = new WebClient { Proxy = Utilities.GetProxy() }; + wc.DownloadDataAsync(new Uri("https://github.com/SubtitleEdit/support-files/raw/master/Tesseract4.tar.gz")); + wc.DownloadDataCompleted += wc_DownloadDataCompleted; + wc.DownloadProgressChanged += (o, args) => + { + labelPleaseWait.Text = Configuration.Settings.Language.General.PleaseWait + " " + args.ProgressPercentage + "%"; + }; + } + + private void wc_DownloadDataCompleted(object sender, DownloadDataCompletedEventArgs e) + { + if (e.Error != null) + { + MessageBox.Show(Configuration.Settings.Language.GetTesseractDictionaries.DownloadFailed); + DialogResult = DialogResult.Cancel; + return; + } + + string dictionaryFolder = Configuration.TesseractDirectory; + if (!Directory.Exists(dictionaryFolder)) + Directory.CreateDirectory(dictionaryFolder); + + var tempFileName = Path.GetTempFileName() + ".tar"; + using (var ms = new MemoryStream(e.Result)) + using (var fs = new FileStream(tempFileName, FileMode.Create)) + using (var zip = new GZipStream(ms, CompressionMode.Decompress)) + { + byte[] buffer = new byte[1024]; + int nRead; + while ((nRead = zip.Read(buffer, 0, buffer.Length)) > 0) + { + fs.Write(buffer, 0, nRead); + } + } + + using (var tr = new TarReader(tempFileName)) + { + foreach (var th in tr.Files) + { + string fn = Path.Combine(dictionaryFolder, th.FileName.Replace('/', Path.DirectorySeparatorChar)); + if (th.IsFolder) + { + Directory.CreateDirectory(Path.Combine(dictionaryFolder, th.FileName.Replace('/', Path.DirectorySeparatorChar))); + } + else if (th.FileSizeInBytes > 0) + { + th.WriteData(fn); + } + } + } + File.Delete(tempFileName); + Cursor = Cursors.Default; + DialogResult = DialogResult.OK; + } + } +} diff --git a/src/Forms/Ocr/DownloadTesseract4.resx b/src/Forms/Ocr/DownloadTesseract4.resx new file mode 100644 index 000000000..1af7de150 --- /dev/null +++ b/src/Forms/Ocr/DownloadTesseract4.resx @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + \ No newline at end of file diff --git a/src/Forms/Ocr/VobSubOcr.cs b/src/Forms/Ocr/VobSubOcr.cs index fe89a35ce..20e52e497 100644 --- a/src/Forms/Ocr/VobSubOcr.cs +++ b/src/Forms/Ocr/VobSubOcr.cs @@ -7,6 +7,7 @@ using Nikse.SubtitleEdit.Core.VobSub; using Nikse.SubtitleEdit.Logic; using Nikse.SubtitleEdit.Logic.Ocr; using Nikse.SubtitleEdit.Logic.Ocr.Binary; +using Nikse.SubtitleEdit.Logic.Ocr.Tesseract; using System; using System.Collections.Generic; using System.ComponentModel; @@ -292,18 +293,13 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private string[] _nocrThreadResults; public const int NocrMinColor = 300; - private bool _icThreadsStop; - private string[] _icThreadResults; - private readonly Keys _italicShortcut = UiUtil.GetKeys(Configuration.Settings.Shortcuts.MainTextBoxItalic); private readonly Keys _mainGeneralGoToNextSubtitle = UiUtil.GetKeys(Configuration.Settings.Shortcuts.GeneralGoToNextSubtitle); private readonly Keys _mainGeneralGoToPrevSubtitle = UiUtil.GetKeys(Configuration.Settings.Shortcuts.GeneralGoToPrevSubtitle); private string[] _tesseractAsyncStrings; private int _tesseractAsyncIndex; - private BackgroundWorker _tesseractThread; private int _tesseractEngineMode; - private int _tesseractErrors; private readonly DateTime _windowStartTime = DateTime.Now; private int _linesOcred; @@ -316,12 +312,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private int _ocrMethodIndex; private bool _autoBreakLines; - private int _ocrMethodTesseract; - private int _ocrMethodModi = 2; - private int _ocrMethodBinaryImageCompare = 1; - private int _ocrMethodNocr = 3; - private int _ocrMethodTesseract302 = 4; - private int _ocrMethodImageCompare = -2; //TODO: Remove + private readonly int _ocrMethodTesseract; + private readonly int _ocrMethodModi; + private readonly int _ocrMethodBinaryImageCompare; + private readonly int _ocrMethodNocr; + private readonly int _ocrMethodTesseract302; public static void SetDoubleBuffered(Control c) { @@ -418,24 +413,23 @@ namespace Nikse.SubtitleEdit.Forms.Ocr FillSpellCheckDictionaries(); + InitializeModi(); comboBoxOcrMethod.Items.Clear(); - comboBoxOcrMethod.Items.Add(language.OcrViaTesseract); comboBoxOcrMethod.Items.Add("Binary image compare"); - comboBoxOcrMethod.Items.Add(language.OcrViaModi); + comboBoxOcrMethod.Items.Add("Tesseract 3.02"); + comboBoxOcrMethod.Items.Add("Tesseract 4 Beta"); + if (_modiEnabled) + comboBoxOcrMethod.Items.Add(language.OcrViaModi); if (Configuration.Settings.General.ShowBetaStuff) { comboBoxOcrMethod.Items.Add(language.OcrViaNOCR); comboBoxOcrMethod.Items.Add(language.OcrViaImageCompare); - _ocrMethodImageCompare = 4; - _ocrMethodTesseract302 = 5; - } - else - { - comboBoxOcrMethod.Items.Add(language.OcrViaTesseract + " (3.02)"); - _ocrMethodTesseract302 = comboBoxOcrMethod.Items.Count; - _ocrMethodImageCompare = -1; - _ocrMethodNocr = -1; } + _ocrMethodBinaryImageCompare = comboBoxOcrMethod.Items.IndexOf("Binary image compare"); + _ocrMethodTesseract302 = comboBoxOcrMethod.Items.IndexOf("Tesseract 3.02"); + _ocrMethodTesseract = comboBoxOcrMethod.Items.IndexOf("Tesseract 4 Beta"); + _ocrMethodModi = comboBoxOcrMethod.Items.IndexOf(language.OcrViaModi); + _ocrMethodNocr = comboBoxOcrMethod.Items.IndexOf(language.OcrViaNOCR); checkBoxTesseractItalicsOn.Checked = Configuration.Settings.VobSubOcr.UseItalicsInTesseract; checkBoxTesseractItalicsOn.Text = Configuration.Settings.Language.General.Italic; @@ -551,12 +545,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { _tesseractAsyncStrings = new string[max]; _tesseractAsyncIndex = (int)numericUpDownStartNumber.Value + 5; - _tesseractThread = new BackgroundWorker(); - _tesseractThread.DoWork += TesseractThreadDoWork; - _tesseractThread.RunWorkerCompleted += TesseractThreadRunWorkerCompleted; - _tesseractThread.WorkerSupportsCancellation = true; - if (_tesseractAsyncIndex >= 0 && _tesseractAsyncIndex < max) - _tesseractThread.RunWorkerAsync(GetSubtitleBitmap(_tesseractAsyncIndex)); } System.Threading.Thread.Sleep(1000); subtitleListView1.SelectedIndexChanged -= SubtitleListView1SelectedIndexChanged; @@ -627,7 +615,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr numericUpDownNumberOfPixelsIsSpaceNOCR.Value = vobSubOcrSettings.XOrMorePixelsMakesSpace; _vobSubOcrSettings = vobSubOcrSettings; - InitializeModi(); InitializeTesseract(); LoadImageCompareCharacterDatabaseList(); @@ -655,7 +642,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr numericUpDownNumberOfPixelsIsSpaceNOCR.Value = vobSubOcrSettings.XOrMorePixelsMakesSpace; _vobSubOcrSettings = vobSubOcrSettings; - InitializeModi(); InitializeTesseract(); LoadImageCompareCharacterDatabaseList(); @@ -687,7 +673,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr numericUpDownNumberOfPixelsIsSpaceNOCR.Value = vobSubOcrSettings.XOrMorePixelsMakesSpace; _vobSubOcrSettings = vobSubOcrSettings; - InitializeModi(); InitializeTesseract(); LoadImageCompareCharacterDatabaseList(); @@ -744,6 +729,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void DoBatch() { + _abort = false; FormVobSubOcr_Shown(null, null); checkBoxPromptForUnknownWords.Checked = false; @@ -752,12 +738,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { _tesseractAsyncStrings = new string[max]; _tesseractAsyncIndex = (int)numericUpDownStartNumber.Value + 5; - _tesseractThread = new BackgroundWorker(); - _tesseractThread.DoWork += TesseractThreadDoWork; - _tesseractThread.RunWorkerCompleted += TesseractThreadRunWorkerCompleted; - _tesseractThread.WorkerSupportsCancellation = true; - if (_tesseractAsyncIndex >= 0 && _tesseractAsyncIndex < max) - _tesseractThread.RunWorkerAsync(GetSubtitleBitmap(_tesseractAsyncIndex)); } System.Threading.Thread.Sleep(1000); subtitleListView1.SelectedIndexChanged -= SubtitleListView1SelectedIndexChanged; @@ -849,7 +829,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr numericUpDownNumberOfPixelsIsSpaceNOCR.Value = 11; _vobSubOcrSettings = vobSubOcrSettings; - InitializeModi(); InitializeTesseract(); LoadImageCompareCharacterDatabaseList(); @@ -911,36 +890,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } } } - else if (_ocrMethodIndex == _ocrMethodImageCompare) - { - comboBoxCharacterDatabase.SelectedIndexChanged -= ComboBoxCharacterDatabaseSelectedIndexChanged; - string characterDatabasePath = Configuration.VobSubCompareDirectory.TrimEnd(Path.DirectorySeparatorChar); - if (!Directory.Exists(characterDatabasePath)) - Directory.CreateDirectory(characterDatabasePath); - - comboBoxCharacterDatabase.Items.Clear(); - - foreach (string dir in Directory.GetDirectories(characterDatabasePath)) - comboBoxCharacterDatabase.Items.Add(Path.GetFileName(dir)); - - if (comboBoxCharacterDatabase.Items.Count == 0) - { - Directory.CreateDirectory(characterDatabasePath + Path.DirectorySeparatorChar + _vobSubOcrSettings.LastImageCompareFolder); - comboBoxCharacterDatabase.Items.Add(_vobSubOcrSettings.LastImageCompareFolder); - } - - for (int i = 0; i < comboBoxCharacterDatabase.Items.Count; i++) - { - if (comboBoxCharacterDatabase.Items[i].ToString().Equals(_vobSubOcrSettings.LastImageCompareFolder, StringComparison.OrdinalIgnoreCase)) - { - comboBoxCharacterDatabase.SelectedIndex = i; - break; - } - } - if (comboBoxCharacterDatabase.SelectedIndex < 0) - comboBoxCharacterDatabase.SelectedIndex = 0; - comboBoxCharacterDatabase.SelectedIndexChanged += ComboBoxCharacterDatabaseSelectedIndexChanged; - } } catch (Exception ex) { @@ -953,89 +902,13 @@ namespace Nikse.SubtitleEdit.Forms.Ocr DisposeImageCompareBitmaps(); _binaryOcrDb = null; - if (_ocrMethodIndex == _ocrMethodImageCompare) - { - LoadOldCompareImages(); - } - else if (_ocrMethodIndex == _ocrMethodBinaryImageCompare) + if (_ocrMethodIndex == _ocrMethodBinaryImageCompare) { string db = Configuration.OcrDirectory + comboBoxCharacterDatabase.SelectedItem + ".db"; _binaryOcrDb = new BinaryOcrDb(db, true); } } - private void LoadOldCompareImages() - { - _compareBitmaps = new List(); - string path = Configuration.VobSubCompareDirectory + comboBoxCharacterDatabase.SelectedItem + Path.DirectorySeparatorChar; - if (!File.Exists(path + "CompareDescription.xml")) - _compareDoc.LoadXml(""); - else - _compareDoc.Load(path + "CompareDescription.xml"); - - string databaseName = path + "Images.db"; - if (!File.Exists(databaseName)) - { - labelStatus.Text = Configuration.Settings.Language.VobSubOcr.LoadingImageCompareDatabase; - labelStatus.Refresh(); - using (var f = new FileStream(databaseName, FileMode.Create)) - { - foreach (string bmpFileName in Directory.GetFiles(path, "*.bmp")) - { - string name = Path.GetFileNameWithoutExtension(bmpFileName); - - XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + name + "']"); - if (node != null) - { - node.InnerText = f.Position.ToString(CultureInfo.InvariantCulture); - var b = new Bitmap(bmpFileName); - var m = new ManagedBitmap(b); - b.Dispose(); - m.AppendToStream(f); - } - } - } - _compareDoc.Save(path + "Images.xml"); - string text = File.ReadAllText(path + "Images.xml"); - File.WriteAllText(path + "Images.xml", text.Replace("", "")); - labelStatus.Text = string.Empty; - } - - if (File.Exists(databaseName)) - { - labelStatus.Text = Configuration.Settings.Language.VobSubOcr.LoadingImageCompareDatabase; - labelStatus.Refresh(); - _compareDoc.Load(path + "Images.xml"); - using (var f = new FileStream(databaseName, FileMode.Open)) - { - foreach (XmlNode node in _compareDoc.DocumentElement.SelectNodes("Item")) - { - try - { - string name = node.InnerText; - int pos = Convert.ToInt32(name); - bool isItalic = node.Attributes["Italic"] != null; - string text = node.Attributes["Text"].InnerText; - int expandCount = 0; - if (node.Attributes["Expand"] != null) - { - if (!int.TryParse(node.Attributes["Expand"].InnerText, out expandCount)) - expandCount = 0; - } - f.Position = pos; - var mbmp = new ManagedBitmap(f); - _compareBitmaps.Add(new CompareItem(mbmp, name, isItalic, expandCount, text)); - } - catch - { - // ignored - } - } - } - labelStatus.Text = string.Empty; - } - } - private void DisposeImageCompareBitmaps() { _compareBitmaps = null; @@ -1067,9 +940,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr chooseLanguage.ShowIcon = true; } chooseLanguage.Initialize(_vobSubMergedPackist, _palette, vobSubParser.IdxLanguages, string.Empty); - Form form = _main; - if (form == null) - form = this; + var form = _main ?? (Form)this; if (batchMode) { chooseLanguage.SelectActive(); @@ -1100,7 +971,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr languageString = languageString.ToLower(); for (int i = 0; i < comboBoxTesseractLanguages.Items.Count; i++) { - var tl = (comboBoxTesseractLanguages.Items[i] as TesseractLanguage); + var tl = comboBoxTesseractLanguages.Items[i] as TesseractLanguage; if (tl.Text.StartsWith("Chinese", StringComparison.OrdinalIgnoreCase) && (languageString.StartsWith("chinese", StringComparison.OrdinalIgnoreCase) || languageString.StartsWith("中文", StringComparison.OrdinalIgnoreCase))) { comboBoxTesseractLanguages.SelectedIndex = i; @@ -1199,9 +1070,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr checkBoxShowOnlyForced.Checked == false) { _bdnXmlSubtitle.Paragraphs.Add(new Paragraph(x)); - var p = new Paragraph(x); - p.Text = string.Empty; - _subtitle.Paragraphs.Add(p); + _subtitle.Paragraphs.Add(new Paragraph(x) { Text = string.Empty }); } } _subtitle.Renumber(); @@ -1233,14 +1102,14 @@ namespace Nikse.SubtitleEdit.Forms.Ocr for (int i = 0; i < max; i++) { var x = _bluRaySubtitlesOriginal[i]; - if (checkBoxShowOnlyForced.Checked && x.IsForced || - checkBoxShowOnlyForced.Checked == false) + if (checkBoxShowOnlyForced.Checked && x.IsForced || checkBoxShowOnlyForced.Checked == false) { _bluRaySubtitles.Add(x); - Paragraph p = new Paragraph(); - p.StartTime = new TimeCode(x.StartTime / 90.0); - p.EndTime = new TimeCode(x.EndTime / 90.0); - _subtitle.Paragraphs.Add(p); + _subtitle.Paragraphs.Add(new Paragraph + { + StartTime = new TimeCode(x.StartTime / 90.0), + EndTime = new TimeCode(x.EndTime / 90.0) + }); } } _subtitle.Renumber(); @@ -1727,20 +1596,19 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { if (_mp4List != null) return _mp4List.Count; - else if (_spList != null) + if (_spList != null) return _spList.Count; - else if (_bdnXmlSubtitle != null) + if (_bdnXmlSubtitle != null) return _bdnXmlSubtitle.Paragraphs.Count; - else if (_bluRaySubtitlesOriginal != null) + if (_bluRaySubtitlesOriginal != null) return _bluRaySubtitles.Count; - else if (_xSubList != null) + if (_xSubList != null) return _xSubList.Count; - else if (_dvbSubtitles != null) + if (_dvbSubtitles != null) return _dvbSubtitles.Count; - else if (_dvbPesSubtitles != null) + if (_dvbPesSubtitles != null) return _dvbPesSubtitles.Count; - else - return _vobSubMergedPackist.Count; + return _vobSubMergedPackist.Count; } private Bitmap ShowSubtitleImage(int index) @@ -1759,11 +1627,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr groupBoxSubtitleImage.Text = Configuration.Settings.Language.VobSubOcr.SubtitleImage; bmp = new Bitmap(1, 1); } - Bitmap old = pictureBoxSubtitleImage.Image as Bitmap; + var old = pictureBoxSubtitleImage.Image as Bitmap; pictureBoxSubtitleImage.Image = bmp.Clone() as Bitmap; pictureBoxSubtitleImage.Invalidate(); - if (old != null) - old.Dispose(); + old?.Dispose(); return bmp; } @@ -2959,12 +2826,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr internal CompareMatch GetNOcrCompareMatch(ImageSplitterItem targetItem, NikseBitmap parentBitmap, NOcrDb nOcrDb, bool tryItalicScaling, bool deepSeek) { - bool italic; var expandedResult = NOcrFindExpandedMatch(parentBitmap, targetItem, nOcrDb.OcrCharacters); if (expandedResult != null) return new CompareMatch(expandedResult.Text, expandedResult.Italic, expandedResult.ExpandCount, null, expandedResult); - var result = NOcrFindBestMatchNew(targetItem, targetItem.Y - targetItem.ParentY, out italic, nOcrDb, tryItalicScaling, deepSeek); + var result = NOcrFindBestMatchNew(targetItem, targetItem.Y - targetItem.ParentY, out var italic, nOcrDb, tryItalicScaling, deepSeek); if (result == null) { if (checkBoxNOcrCorrect.Checked) @@ -3008,8 +2874,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr return new CompareMatch(expandedResult.Text, expandedResult.Italic, expandedResult.ExpandCount, null, expandedResult); } - bool italic; - var result = NOcrFindBestMatchNew(targetItem, targetItem.Y - targetItem.ParentY, out italic, nOcrDb, tryItalicScaling, deepSeek); + var result = NOcrFindBestMatchNew(targetItem, targetItem.Y - targetItem.ParentY, out var italic, nOcrDb, tryItalicScaling, deepSeek); if (result == null) { if (checkBoxNOcrCorrect.Checked) @@ -3047,12 +2912,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr internal static CompareMatch GetNOcrCompareMatch(ImageSplitterItem targetItem, NikseBitmap parentBitmap, NOcrThreadParameter p) { - bool italic; var expandedResult = NOcrFindExpandedMatch(parentBitmap, targetItem, p.NOcrChars); if (expandedResult != null) return new CompareMatch(expandedResult.Text, expandedResult.Italic, expandedResult.ExpandCount, null, expandedResult); - var result = NOcrFindBestMatch(targetItem, targetItem.Y - targetItem.ParentY, out italic, p.NOcrChars, p.UnItalicFactor, p.AdvancedItalicDetection, true); + var result = NOcrFindBestMatch(targetItem, targetItem.Y - targetItem.ParentY, out var italic, p.NOcrChars, p.UnItalicFactor, p.AdvancedItalicDetection, true); if (result == null) return null; @@ -3149,8 +3013,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr if (smallestDifference > 2 && target.Width > 12) { var cutBitmap = target.CopyRectangle(new Rectangle(1, 0, target.Width - 2, target.Height)); - int topCrop = 0; - var cutBitmap2 = NikseBitmapImageSplitter.CropTopAndBottom(cutBitmap, out topCrop, 2); + var cutBitmap2 = NikseBitmapImageSplitter.CropTopAndBottom(cutBitmap, out _, 2); if (cutBitmap2.Height != target.Height) FindBestMatch(out index, ref smallestDifference, ref smallestIndex, cutBitmap2, _compareBitmaps); } @@ -3197,7 +3060,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr secondBestGuess = null; int index = 0; int smallestDifference = 10000; - BinaryOcrBitmap hit = null; var target = targetItem.NikseBitmap; if (_binaryOcrDb == null) { @@ -3254,7 +3116,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } } - FindBestMatchNew(ref index, ref smallestDifference, out hit, target, _binaryOcrDb, bob, maxDiff); + FindBestMatchNew(ref index, ref smallestDifference, out var hit, target, _binaryOcrDb, bob, maxDiff); if (maxDiff > 0) { if (target.Width > 16 && target.Height > 16 && (hit == null || smallestDifference * 100.0 / (target.Width * target.Height) > maxDiff)) @@ -3366,8 +3228,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { return new CompareMatch("j", false, 0, null); } - bool italicLowercaseI; - if (bob.IsLowercaseI(out italicLowercaseI)) + + if (bob.IsLowercaseI(out var italicLowercaseI)) { return new CompareMatch("i", italicLowercaseI, 0, null); } @@ -3422,16 +3284,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr return percentage; } - public static Bitmap CopyBitmapSection(Bitmap srcBitmap, Rectangle section) - { - var bmp = new Bitmap(section.Width, section.Height); - var g = Graphics.FromImage(bmp); - g.DrawImage(srcBitmap, 0, 0, section, GraphicsUnit.Pixel); - g.Dispose(); - return bmp; - } - - private static void FindBestMatchNew(ref int index, ref int smallestDifference, out BinaryOcrBitmap hit, NikseBitmap target, BinaryOcrDb binOcrDb, BinaryOcrBitmap bob, double maxDiff) { hit = null; @@ -4096,66 +3948,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr return count; } - private string SaveCompareItem(NikseBitmap newTarget, string text, bool isItalic, int expandCount) - { - string path = Configuration.VobSubCompareDirectory + comboBoxCharacterDatabase.SelectedItem + Path.DirectorySeparatorChar; - string databaseName = path + "Images.db"; - FileStream f; - long pos; - if (!File.Exists(databaseName)) - { - if (!Directory.Exists(Configuration.OcrDirectory)) - { - Directory.CreateDirectory(Configuration.OcrDirectory); - } - - using (f = new FileStream(databaseName, FileMode.Create)) - { - pos = f.Position; - new ManagedBitmap(newTarget).AppendToStream(f); - } - } - else - { - using (f = new FileStream(databaseName, FileMode.Append)) - { - pos = f.Position; - new ManagedBitmap(newTarget).AppendToStream(f); - } - } - string name = pos.ToString(CultureInfo.InvariantCulture); - - if (_compareDoc == null) - { - _compareDoc = new XmlDocument(); - _compareDoc.LoadXml(""); - } - if (_compareBitmaps == null) - _compareBitmaps = new List(); - _compareBitmaps.Add(new CompareItem(new ManagedBitmap(newTarget), name, isItalic, expandCount, text)); - - XmlElement element = _compareDoc.CreateElement("Item"); - XmlAttribute attribute = _compareDoc.CreateAttribute("Text"); - attribute.InnerText = text; - element.Attributes.Append(attribute); - if (expandCount > 0) - { - XmlAttribute expandSelection = _compareDoc.CreateAttribute("Expand"); - expandSelection.InnerText = expandCount.ToString(CultureInfo.InvariantCulture); - element.Attributes.Append(expandSelection); - } - if (isItalic) - { - XmlAttribute italic = _compareDoc.CreateAttribute("Italic"); - italic.InnerText = "true"; - element.Attributes.Append(italic); - } - element.InnerText = pos.ToString(CultureInfo.InvariantCulture); - _compareDoc.DocumentElement.AppendChild(element); - _compareDoc.Save(path + "Images.xml"); - return name; - } - private string SaveCompareItemNew(ImageSplitterItem newTarget, string text, bool isItalic, List expandList) { int expandCount = 0; @@ -4186,190 +3978,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } } - /// - /// Ocr via image compare - /// - private string SplitAndOcrBitmapNormal(Bitmap bitmap, int listViewIndex) - { - if (_ocrFixEngine == null) - LoadOcrFixEngine(null, LanguageString); - - string threadText = null; - if (_icThreadResults != null && !string.IsNullOrEmpty(_icThreadResults[listViewIndex])) - threadText = _icThreadResults[listViewIndex]; - - string line; - if (threadText == null) - { - var matches = new List(); - var parentBitmap = new NikseBitmap(bitmap); - List list = NikseBitmapImageSplitter.SplitBitmapToLetters(parentBitmap, (int)numericUpDownPixelsIsSpace.Value, checkBoxRightToLeft.Checked, Configuration.Settings.VobSubOcr.TopToBottom); - int index = 0; - bool expandSelection = false; - bool shrinkSelection = false; - var expandSelectionList = new List(); - while (index < list.Count) - { - ImageSplitterItem item = list[index]; - if (expandSelection || shrinkSelection) - { - expandSelection = false; - if (shrinkSelection && index > 0) - { - shrinkSelection = false; - } - else if (index + 1 < list.Count && list[index + 1].NikseBitmap != null) // only allow expand to EndOfLine or space - { - index++; - expandSelectionList.Add(list[index]); - } - item = GetExpandedSelection(parentBitmap, expandSelectionList, checkBoxRightToLeft.Checked); - - _vobSubOcrCharacter.Initialize(bitmap, item, _manualOcrDialogPosition, _italicCheckedLast, expandSelectionList.Count > 1, null, _lastAdditions, this); - DialogResult result = _vobSubOcrCharacter.ShowDialog(this); - _manualOcrDialogPosition = _vobSubOcrCharacter.FormPosition; - if (result == DialogResult.OK && _vobSubOcrCharacter.ShrinkSelection) - { - shrinkSelection = true; - index--; - if (expandSelectionList.Count > 0) - expandSelectionList.RemoveAt(expandSelectionList.Count - 1); - } - else if (result == DialogResult.OK && _vobSubOcrCharacter.ExpandSelection) - { - expandSelection = true; - } - else if (result == DialogResult.OK) - { - string text = _vobSubOcrCharacter.ManualRecognizedCharacters; - string name = SaveCompareItem(item.NikseBitmap, text, _vobSubOcrCharacter.IsItalic, expandSelectionList.Count); - var addition = new ImageCompareAddition(name, text, item.NikseBitmap, _vobSubOcrCharacter.IsItalic, listViewIndex); - _lastAdditions.Add(addition); - matches.Add(new CompareMatch(text, _vobSubOcrCharacter.IsItalic, expandSelectionList.Count, null)); - expandSelectionList = new List(); - } - else if (result == DialogResult.Abort) - { - _abort = true; - } - else - { - matches.Add(new CompareMatch("*", false, 0, null)); - } - _italicCheckedLast = _vobSubOcrCharacter.IsItalic; - } - else if (item.NikseBitmap == null) - { - matches.Add(new CompareMatch(item.SpecialCharacter, false, 0, null)); - } - else - { - CompareMatch bestGuess; - CompareMatch match = GetCompareMatch(item, parentBitmap, out bestGuess, list, index); - if (match == null) - { - _vobSubOcrCharacter.Initialize(bitmap, item, _manualOcrDialogPosition, _italicCheckedLast, false, bestGuess, _lastAdditions, this); - DialogResult result = _vobSubOcrCharacter.ShowDialog(this); - _manualOcrDialogPosition = _vobSubOcrCharacter.FormPosition; - if (result == DialogResult.OK && _vobSubOcrCharacter.ExpandSelection) - { - expandSelectionList.Add(item); - expandSelection = true; - } - else if (result == DialogResult.OK) - { - string text = _vobSubOcrCharacter.ManualRecognizedCharacters; - string name = SaveCompareItem(item.NikseBitmap, text, _vobSubOcrCharacter.IsItalic, 0); - var addition = new ImageCompareAddition(name, text, item.NikseBitmap, _vobSubOcrCharacter.IsItalic, listViewIndex); - _lastAdditions.Add(addition); - matches.Add(new CompareMatch(text, _vobSubOcrCharacter.IsItalic, 0, null)); - } - else if (result == DialogResult.Abort) - { - _abort = true; - } - else - { - matches.Add(new CompareMatch("*", false, 0, null)); - } - _italicCheckedLast = _vobSubOcrCharacter.IsItalic; - } - else // found image match - { - matches.Add(new CompareMatch(match.Text, match.Italic, 0, null)); - if (match.ExpandCount > 0) - index += match.ExpandCount - 1; - } - } - if (_abort) - return string.Empty; - if (!expandSelection && !shrinkSelection) - index++; - if (shrinkSelection && expandSelectionList.Count < 2) - { - shrinkSelection = false; - expandSelectionList = new List(); - } - } - - line = GetStringWithItalicTags(matches); - } - else - { - line = threadText; - } - if (checkBoxAutoFixCommonErrors.Checked && _ocrFixEngine != null) - line = _ocrFixEngine.FixOcrErrorsViaHardcodedRules(line, _lastLine, null); // TODO: Add abbreviations list - - if (checkBoxRightToLeft.Checked) - line = ReverseNumberStrings(line); - - //OCR fix engine - string textWithOutFixes = line; - //OCR fix engine not loaded, when no dictionary is selected - if (_ocrFixEngine != null && _ocrFixEngine.IsDictionaryLoaded) - { - if (checkBoxAutoFixCommonErrors.Checked) - line = _ocrFixEngine.FixOcrErrors(line, listViewIndex, _lastLine, true, GetAutoGuessLevel()); - - int correctWords; - int wordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(line, out correctWords); - - if (wordsNotFound > 0 || correctWords == 0 || textWithOutFixes != null && string.IsNullOrWhiteSpace(textWithOutFixes.Replace("~", string.Empty))) - { - _ocrFixEngine.AutoGuessesUsed.Clear(); - _ocrFixEngine.UnknownWordsFound.Clear(); - line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, listViewIndex, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel()); - } - - if (_ocrFixEngine.Abort) - { - ButtonStopClick(null, null); - _ocrFixEngine.Abort = false; - return string.Empty; - } - - // Log used word guesses (via word replace list) - foreach (string guess in _ocrFixEngine.AutoGuessesUsed) - listBoxLogSuggestions.Items.Add(guess); - _ocrFixEngine.AutoGuessesUsed.Clear(); - - // Log unkown words guess (found via spelling dictionaries) - LogUnknownWords(); - - ColorLineByNumberOfUnknownWords(listViewIndex, wordsNotFound, line); - } - - if (textWithOutFixes != null && textWithOutFixes.Trim() != line.Trim()) - { - _tesseractOcrAutoFixes++; - labelFixesMade.Text = $" - {_tesseractOcrAutoFixes}"; - LogOcrFix(listViewIndex, textWithOutFixes, line); - } - - return line; - } - private void ColorLineByNumberOfUnknownWords(int index, int wordsNotFound, string line) { if (wordsNotFound >= 3) @@ -4492,8 +4100,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { _ocrCount++; _ocrHeight += (item.NikseBitmap.Height - _ocrHeight) / _ocrCount; - CompareMatch bestGuess; - CompareMatch match = GetCompareMatchNew(item, out bestGuess, list, index); + CompareMatch match = GetCompareMatchNew(item, out var bestGuess, list, index); if (match == null) // Try line OCR if no image compare match { if (_nOcrDb != null && _nOcrDb.OcrCharacters.Count > 0 && _numericUpDownMaxErrorPct < 1) @@ -4670,55 +4277,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } } - public static List LoadNOcrForTesseract(string xmlRessourceName) - { - var nocrChars = new List(); - Assembly asm = Assembly.GetExecutingAssembly(); - Stream strm = asm.GetManifestResourceStream(xmlRessourceName); - if (strm != null) - { - XmlDocument doc = new XmlDocument(); - var rdr = new StreamReader(strm); - using (var zip = new System.IO.Compression.GZipStream(rdr.BaseStream, System.IO.Compression.CompressionMode.Decompress)) - { - byte[] data = new byte[175000]; - zip.Read(data, 0, 175000); - doc.LoadXml(Encoding.UTF8.GetString(data)); - } - rdr.Close(); - - try - { - foreach (XmlNode node in doc.DocumentElement.SelectNodes("Char")) - { - var oc = new NOcrChar(node.Attributes["Text"].Value); - oc.Width = Convert.ToInt32(node.Attributes["Width"].Value, CultureInfo.InvariantCulture); - oc.Height = Convert.ToInt32(node.Attributes["Height"].Value, CultureInfo.InvariantCulture); - oc.MarginTop = Convert.ToInt32(node.Attributes["MarginTop"].Value, CultureInfo.InvariantCulture); - if (node.Attributes["Italic"] != null) - oc.Italic = Convert.ToBoolean(node.Attributes["Italic"].Value, CultureInfo.InvariantCulture); - if (node.Attributes["ExpandCount"] != null) - oc.ExpandCount = Convert.ToInt32(node.Attributes["ExpandCount"].Value, CultureInfo.InvariantCulture); - foreach (XmlNode pointNode in node.SelectNodes("Point")) - { - var op = new NOcrPoint(DecodePoint(pointNode.Attributes["Start"].Value), DecodePoint(pointNode.Attributes["End"].Value)); - XmlAttribute a = pointNode.Attributes["On"]; - if (a != null && Convert.ToBoolean(a.Value)) - oc.LinesForeground.Add(op); - else - oc.LinesBackground.Add(op); - } - nocrChars.Add(oc); - } - } - catch (Exception exception) - { - MessageBox.Show(exception.Message); - } - } - return nocrChars; - } - private static Point DecodePoint(string text) { var arr = text.Split(','); @@ -4897,8 +4455,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { if (checkBoxAutoFixCommonErrors.Checked) line = _ocrFixEngine.FixOcrErrors(line, listViewIndex, _lastLine, true, GetAutoGuessLevel()); - int correctWords; - int wordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(line, out correctWords); + int wordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(line, out var correctWords); if (wordsNotFound > 0 || correctWords == 0 || textWithOutFixes != null && string.IsNullOrWhiteSpace(textWithOutFixes.Replace("~", string.Empty))) { @@ -5158,8 +4715,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr maximumX = item.X + item.NikseBitmap.Width; } nbmp.CropTransparentSidesAndBottom(0, true); - int topCropping; - nbmp = NikseBitmapImageSplitter.CropTopAndBottom(nbmp, out topCropping); + nbmp = NikseBitmapImageSplitter.CropTopAndBottom(nbmp, out _); return new ImageSplitterItem(minimumX, minimumY, nbmp); } @@ -5249,8 +4805,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { if (!convertAllToItalic && !convertAllToNonItalic) { - bool italicOrNot = false; - bool isMixedCaseWithoutDashAndAlike = IsMixedCaseWithoutDashAndAlike(matches, i, out italicOrNot); + bool isMixedCaseWithoutDashAndAlike = IsMixedCaseWithoutDashAndAlike(matches, i, out var italicOrNot); if ((text == "-" || text == "—" || text == "." || text == "'") && !isMixedCaseWithoutDashAndAlike) { italic = italicOrNot; @@ -5383,13 +4938,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr lettersNonItalics = 0; } - public Subtitle SubtitleFromOcr - { - get - { - return _subtitle; - } - } + public Subtitle SubtitleFromOcr => _subtitle; private void FormVobSubOcr_Shown(object sender, EventArgs e) { @@ -5552,271 +5101,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr subtitleListView1.MultiSelect = true; } - private static void ImageCompareThreadDoWork(object sender, DoWorkEventArgs e) - { - var p = (ImageCompareThreadParameter)e.Argument; - e.Result = p; - Bitmap bitmap = p.Picture; - var matches = new List(); - List lines = NikseBitmapImageSplitter.SplitVertical(bitmap); - List list = NikseBitmapImageSplitter.SplitBitmapToLetters(lines, p.NumberOfPixelsIsSpace, p.RightToLeft, Configuration.Settings.VobSubOcr.TopToBottom); - - int outerIndex = 0; - while (outerIndex < list.Count) - { - ImageSplitterItem item = list[outerIndex]; - if (item.NikseBitmap == null) - { - matches.Add(new CompareMatch(item.SpecialCharacter, false, 0, null)); - } - else - { - var target = item.NikseBitmap; - int numberOfForegroundColors = CalculateNumberOfForegroundColors(target); - - int smallestDifference = 10000; - int smallestIndex = -1; - - int index; - if (smallestDifference > 0) - { - index = 0; - foreach (CompareItem compareItem in p.CompareBitmaps) - { - if (compareItem.Bitmap.Width == target.Width && compareItem.Bitmap.Height == target.Height) - { - if (compareItem.NumberOfForegroundColors < 1) - compareItem.NumberOfForegroundColors = CalculateNumberOfForegroundColors(compareItem.Bitmap); - - if (Math.Abs(compareItem.NumberOfForegroundColors - numberOfForegroundColors) < 30) - { - int dif = NikseBitmapImageSplitter.IsBitmapsAlike(compareItem.Bitmap, target); - if (dif < smallestDifference) - { - smallestDifference = dif; - smallestIndex = index; - if (dif < 0.2) - break; // foreach ending - } - } - } - index++; - } - } - - if (smallestDifference > 1 && target.Width < 55 && target.Width > 5) - { - index = 0; - foreach (CompareItem compareItem in p.CompareBitmaps) - { - if (compareItem.Bitmap.Width == target.Width && compareItem.Bitmap.Height == target.Height + 1) - { - if (compareItem.NumberOfForegroundColors == -1) - compareItem.NumberOfForegroundColors = CalculateNumberOfForegroundColors(compareItem.Bitmap); - - if (Math.Abs(compareItem.NumberOfForegroundColors - numberOfForegroundColors) < 50) - { - int dif = NikseBitmapImageSplitter.IsBitmapsAlike(target, compareItem.Bitmap); - if (dif < smallestDifference) - { - smallestDifference = dif; - smallestIndex = index; - if (dif < 0.5) - break; // foreach ending - } - } - } - index++; - } - } - - if (smallestDifference > 1 && target.Width < 55 && target.Width > 5) - { - index = 0; - foreach (CompareItem compareItem in p.CompareBitmaps) - { - if (compareItem.Bitmap.Width == target.Width - 1 && compareItem.Bitmap.Height == target.Height || - compareItem.Bitmap.Width == target.Width - 1 && compareItem.Bitmap.Height == target.Height - 1 || - compareItem.Bitmap.Width == target.Width && compareItem.Bitmap.Height == target.Height - 1) - { - if (compareItem.NumberOfForegroundColors < 1) - compareItem.NumberOfForegroundColors = CalculateNumberOfForegroundColors(compareItem.Bitmap); - - if (Math.Abs(compareItem.NumberOfForegroundColors - numberOfForegroundColors) < 55) - { - int dif = NikseBitmapImageSplitter.IsBitmapsAlike(compareItem.Bitmap, target); - if (dif < smallestDifference) - { - smallestDifference = dif; - smallestIndex = index; - if (dif < 0.5) - break; // foreach ending - } - } - } - index++; - } - } - - double differencePercentage = smallestDifference * 100.0 / (item.NikseBitmap.Width * item.NikseBitmap.Height); - double maxDiff = p.MaxErrorPercent; - if (differencePercentage <= maxDiff && smallestIndex >= 0) - { - var hit = p.CompareBitmaps[smallestIndex]; - var match = new CompareMatch(hit.Text, hit.Italic, hit.ExpandCount, hit.Name); - matches.Add(new CompareMatch(match.Text, match.Italic, 0, null)); - if (match.ExpandCount > 0) - outerIndex += match.ExpandCount - 1; - } - else - { - p.Result = string.Empty; - return; - } - } - outerIndex++; - } - bitmap.Dispose(); - p.Result = GetStringWithItalicTags(matches); - } - - private void ImageCompareThreadRunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) - { - var p = (ImageCompareThreadParameter)e.Result; - if (!_icThreadsStop) - { - if (string.IsNullOrEmpty(_icThreadResults[p.Index])) - _icThreadResults[p.Index] = p.Result; - p.Index += p.Increment; - while (p.Index <= _mainOcrIndex) - p.Index += p.Increment; - p.Picture.Dispose(); - if (p.Index < _subtitle.Paragraphs.Count) - { - p.Result = string.Empty; - p.Picture = GetSubtitleBitmap(p.Index); - p.Self.RunWorkerAsync(p); - } - } - else - { - _mainOcrRunning = false; - } - } - - public string NocrFastCheck(Bitmap bitmap) - { - var nbmpInput = new NikseBitmap(bitmap); - nbmpInput.ReplaceNonWhiteWithTransparent(); - - var matches = new List(); - List list = NikseBitmapImageSplitter.SplitBitmapToLetters(nbmpInput, (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value, checkBoxRightToLeft.Checked, Configuration.Settings.VobSubOcr.TopToBottom); - - foreach (ImageSplitterItem item in list) - { - if (item.NikseBitmap != null) - { - item.NikseBitmap.ReplaceTransparentWith(Color.Black); - } - } - int index = 0; - - while (index < list.Count) - { - ImageSplitterItem item = list[index]; - if (item.NikseBitmap == null) - { - matches.Add(new CompareMatch(item.SpecialCharacter, false, 0, null)); - } - else - { - CompareMatch match = null; - - var nbmp = item.NikseBitmap; - int topMargin = item.Y - item.ParentY; - foreach (NOcrChar oc in _nOcrDb.OcrCharacters) - { - if (Math.Abs(oc.Width - nbmp.Width) < 3 && Math.Abs(oc.Height - nbmp.Height) < 4 && Math.Abs(oc.MarginTop - topMargin) < 4) - { // only very accurate matches - - bool ok = true; - var index2 = 0; - while (index2 < oc.LinesForeground.Count && ok) - { - NOcrPoint op = oc.LinesForeground[index2]; - foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height)) - { - if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height) - { - Color c = nbmp.GetPixel(point.X, point.Y); - if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) - { - } - else - { - Point p = new Point(point.X - 1, point.Y); - if (p.X < 0) - p.X = 1; - c = nbmp.GetPixel(p.X, p.Y); - if (nbmp.Width > 20 && c.A > 150 && c.R + c.G + c.B > NocrMinColor) - { - } - else - { - ok = false; - break; - } - } - } - } - index2++; - } - index2 = 0; - while (index2 < oc.LinesBackground.Count && ok) - { - NOcrPoint op = oc.LinesBackground[index2]; - foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height)) - { - if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height) - { - Color c = nbmp.GetPixel(point.X, point.Y); - if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) - { - Point p = new Point(point.X, point.Y); - if (oc.Width > 19 && point.X > 0) - p.X = p.X - 1; - c = nbmp.GetPixel(p.X, p.Y); - if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) - { - ok = false; - break; - } - } - } - } - index2++; - } - if (ok) - match = new CompareMatch(oc.Text, oc.Italic, 0, null); - } - } - - if (match == null) - { - matches.Add(new CompareMatch("*", false, 0, null)); - } - else // found image match - { - matches.Add(new CompareMatch(match.Text, match.Italic, 0, null)); - if (match.ExpandCount > 0) - index += match.ExpandCount - 1; - } - } - index++; - } - return GetStringWithItalicTags(matches); - } - private static void NOcrThreadDoWork(object sender, DoWorkEventArgs e) { var p = (NOcrThreadParameter)e.Argument; @@ -5891,30 +5175,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } } - private void TesseractThreadDoWork(object sender, DoWorkEventArgs e) - { - var bitmap = (Bitmap)e.Argument; - if (bitmap != null) - { - if (_tesseractAsyncIndex >= 0 && _tesseractAsyncStrings != null && _tesseractAsyncIndex < _tesseractAsyncStrings.Length) - { - if (string.IsNullOrEmpty(_tesseractAsyncStrings[_tesseractAsyncIndex])) - _tesseractAsyncStrings[_tesseractAsyncIndex] = Tesseract3DoOcrViaExe(bitmap, _languageId, "--psm 6", _tesseractEngineMode); // 6 = Assume a single uniform block of text.); - } - bitmap.Dispose(); - } - } - - private void TesseractThreadRunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) - { - if (!e.Cancelled) - { - _tesseractAsyncIndex++; - if (_tesseractAsyncIndex >= 0 && _tesseractAsyncStrings != null && _tesseractAsyncIndex < _tesseractAsyncStrings.Length) - _tesseractThread.RunWorkerAsync(GetSubtitleBitmap(_tesseractAsyncIndex)); - } - } - private bool _isLatinDb; private void ButtonStartOcrClick(object sender, EventArgs e) @@ -5924,6 +5184,12 @@ namespace Nikse.SubtitleEdit.Forms.Ocr return; } + if (_ocrMethodIndex == _ocrMethodTesseract302 || _ocrMethodIndex == _ocrMethodTesseract) + { + _tesseractThreadRunner = new TesseractThreadRunner(OcrDone); + _tesseractRunner = new TesseractRunner(); + } + if (_ocrMethodIndex == _ocrMethodTesseract && comboBoxTesseractLanguages.Items.Count == 0) { buttonGetTesseractDictionaries_Click(sender, e); @@ -5931,7 +5197,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } _tesseractEngineMode = comboBoxTesseractEngineMode.SelectedIndex; - _tesseractErrors = 0; _isLatinDb = comboBoxCharacterDatabase.SelectedItem != null && comboBoxCharacterDatabase.SelectedItem.ToString().Equals("Latin", StringComparison.Ordinal); Configuration.Settings.VobSubOcr.RightToLeft = checkBoxRightToLeft.Checked; _lastLine = null; @@ -5953,18 +5218,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr _nOcrDb = null; _tesseractAsyncStrings = new string[max]; _tesseractAsyncIndex = (int)numericUpDownStartNumber.Value + 5; - _tesseractThread = new BackgroundWorker(); - _tesseractThread.DoWork += TesseractThreadDoWork; - _tesseractThread.RunWorkerCompleted += TesseractThreadRunWorkerCompleted; - _tesseractThread.WorkerSupportsCancellation = true; - if (_tesseractAsyncIndex >= 0 && _tesseractAsyncIndex < max) - _tesseractThread.RunWorkerAsync(GetSubtitleBitmap(_tesseractAsyncIndex)); - } - else if (_ocrMethodIndex == _ocrMethodImageCompare) - { - if (_compareBitmaps == null) - LoadImageCompareBitmaps(); - _numericUpDownMaxErrorPct = (double)numericUpDownMaxErrorPct.Value; } else if (_ocrMethodIndex == _ocrMethodNocr) { @@ -5999,9 +5252,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr if (start + i < max) { var bw = new BackgroundWorker(); - var p = new NOcrThreadParameter(GetSubtitleBitmap(start + i), start + i, _nOcrDb.OcrCharacters, bw, noOfThreads, _unItalicFactor, checkBoxNOcrItalic.Checked, (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value, checkBoxRightToLeft.Checked); - p.NOcrLastLowercaseHeight = _nocrLastLowercaseHeight; - p.NOcrLastUppercaseHeight = _nocrLastUppercaseHeight; + var p = new NOcrThreadParameter(GetSubtitleBitmap(start + i), start + i, _nOcrDb.OcrCharacters, bw, noOfThreads, _unItalicFactor, checkBoxNOcrItalic.Checked, (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value, checkBoxRightToLeft.Checked) + { + NOcrLastLowercaseHeight = _nocrLastLowercaseHeight, + NOcrLastUppercaseHeight = _nocrLastUppercaseHeight + }; bw.DoWork += NOcrThreadDoWork; bw.RunWorkerCompleted += NOcrThreadRunWorkerCompleted; bw.RunWorkerAsync(p); @@ -6033,90 +5288,133 @@ namespace Nikse.SubtitleEdit.Forms.Ocr _mainOcrRunning = true; subtitleListView1.MultiSelect = false; mainOcrTimer_Tick(null, null); - - if (_ocrMethodIndex == _ocrMethodImageCompare) - { - _icThreadsStop = false; - _icThreadResults = new string[_subtitle.Paragraphs.Count]; - int noOfThreads = Environment.ProcessorCount - 2; // -1 or -2? - if (noOfThreads >= max) - noOfThreads = max - 1; - int start = (int)numericUpDownStartNumber.Value + 5; - if (noOfThreads > 2) - noOfThreads = 2; // Threading is not really good - subtitle picture creation should probably be threaded also/instead - for (int i = 0; i < noOfThreads; i++) - { - if (start + i < max) - { - Application.DoEvents(); - var bw = new BackgroundWorker(); - var p = new ImageCompareThreadParameter(GetSubtitleBitmap(start + i), start + i, _compareBitmaps, bw, noOfThreads, (int)numericUpDownPixelsIsSpace.Value, checkBoxRightToLeft.Checked, (float)numericUpDownMaxErrorPct.Value); - bw.DoWork += ImageCompareThreadDoWork; - bw.RunWorkerCompleted += ImageCompareThreadRunWorkerCompleted; - bw.RunWorkerAsync(p); - } - } - } } - // public List _elapseds = new List(); - private bool MainLoop(int max, int i) + private TesseractThreadRunner _tesseractThreadRunner; + + public void OcrDone(int index, TesseractThreadRunner.ImageJob job) { - if (i >= max) - { - SetButtonsEnabledAfterOcrDone(); - _mainOcrRunning = false; - return true; - } - - var bmp = ShowSubtitleImage(i); - TimeCode startTime; - TimeCode endTime; - GetSubtitleTime(i, out startTime, out endTime); - labelStatus.Text = $"{i + 1} / {max}: {startTime} - {endTime}"; - progressBar1.Value = i + 1; - labelStatus.Refresh(); - progressBar1.Refresh(); - if (_abort) - { - bmp.Dispose(); - SetButtonsEnabledAfterOcrDone(); - _mainOcrRunning = false; - return true; - } - - _mainOcrBitmap = bmp; - - int j = i; - subtitleListView1.Items[j].Selected = true; - if (j < max - 1) - j++; - if (j < max - 1) - j++; - subtitleListView1.Items[j].EnsureVisible(); - - string text = string.Empty; - // var sw = Stopwatch.StartNew(); - if (_ocrMethodIndex == _ocrMethodBinaryImageCompare) - text = SplitAndOcrBinaryImageCompare(bmp, i); - else if (_ocrMethodIndex == _ocrMethodTesseract) - text = OcrViaTesseract(bmp, i); - else if (_ocrMethodIndex == _ocrMethodTesseract302) - text = OcrViaTesseract(bmp, i); - else if (_ocrMethodIndex == _ocrMethodImageCompare) - text = SplitAndOcrBitmapNormal(bmp, i); - else if (_ocrMethodIndex == _ocrMethodNocr) - text = OcrViaNOCR(bmp, i); - else if (_ocrMethodIndex == _ocrMethodModi) - text = CallModi(i); - // sw.Stop(); - //_elapseds.Add(sw.ElapsedMilliseconds); - //double ts = 0; - //for (int k = 0; k < _elapseds.Count; k++) - //{ - // ts += _elapseds[k]; - //} - //Text = (ts / _elapseds.Count).ToString(); // display ms in win title bar + _tesseractAsyncStrings[index] = job.Result; + string text = OcrViaTesseract(job.Bitmap, index); + + _lastLine = text; + + text = text.Replace("-", "-"); + text = text.Replace("a", "a"); + text = text.Replace(".", "."); + text = text.Replace(" ", " "); + text = text.Trim(); + + text = text.Replace(" " + Environment.NewLine, Environment.NewLine); + text = text.Replace(Environment.NewLine + " ", Environment.NewLine); + + // max allow 2 lines + if (_autoBreakLines && text.Replace(Environment.NewLine, "*").Length + 2 <= text.Length) + { + text = text.Replace(" " + Environment.NewLine, Environment.NewLine); + text = text.Replace(Environment.NewLine + " ", Environment.NewLine); + while (text.Contains(Environment.NewLine + Environment.NewLine)) + text = text.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine); + + if (text.Replace(Environment.NewLine, "*").Length + 2 <= text.Length) + text = Utilities.AutoBreakLine(text); + } + + if (_dvbSubtitles != null && checkBoxTransportStreamGetColorAndSplit.Checked) + { + text = Utilities.UnbreakLine(text); + if (_dvbSubColor != Color.Transparent) + text = "" + text + ""; + } + + _linesOcred++; + + if (_abort) + { + textBoxCurrentText.Text = text; + _mainOcrRunning = false; + SetButtonsEnabledAfterOcrDone(); + _nocrThreadsStop = true; + } + + text = text.Trim(); + text = text.Replace(" ", " "); + text = text.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine); + text = text.Replace(" ", " "); + text = text.Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine); + + if (index >= subtitleListView1.Items.Count) + return; + var item = subtitleListView1.Items[index]; + item.Selected = true; + item.EnsureVisible(); + + Paragraph p = _subtitle.GetParagraphOrDefault(index); + if (p != null) + p.Text = text; + if (subtitleListView1.SelectedItems.Count == 1 && subtitleListView1.SelectedItems[0].Index == index) + textBoxCurrentText.Text = text; + else + subtitleListView1.SetText(index, text); + + var max = GetSubtitleCount(); + GetSubtitleTime(index, out var startTime, out var endTime); + labelStatus.Text = $"{index + 1} / {max}: {startTime} - {endTime}"; + progressBar1.Value = index + 1; + labelStatus.Refresh(); + progressBar1.Refresh(); + + _linesOcred++; + job.Bitmap.Dispose(); + if (index >= max - 1) + { + SetButtonsEnabledAfterOcrDone(); + _mainOcrRunning = false; + } + } + + private bool MainLoop(int max, int i) + { + if (i >= max) + { + SetButtonsEnabledAfterOcrDone(); + _mainOcrRunning = false; + return true; + } + + var bmp = ShowSubtitleImage(i); + TimeCode startTime; + TimeCode endTime; + GetSubtitleTime(i, out startTime, out endTime); + labelStatus.Text = $"{i + 1} / {max}: {startTime} - {endTime}"; + progressBar1.Value = i + 1; + labelStatus.Refresh(); + progressBar1.Refresh(); + if (_abort) + { + bmp.Dispose(); + SetButtonsEnabledAfterOcrDone(); + _mainOcrRunning = false; + return true; + } + + _mainOcrBitmap = bmp; + + int j = i; + subtitleListView1.Items[j].Selected = true; + if (j < max - 1) + j++; + if (j < max - 1) + j++; + subtitleListView1.Items[j].EnsureVisible(); + + string text = string.Empty; + if (_ocrMethodIndex == _ocrMethodBinaryImageCompare) + text = SplitAndOcrBinaryImageCompare(bmp, i); + else if (_ocrMethodIndex == _ocrMethodNocr) + text = OcrViaNOCR(bmp, i); + else if (_ocrMethodIndex == _ocrMethodModi) + text = CallModi(i); _lastLine = text; @@ -6156,7 +5454,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr _mainOcrRunning = false; SetButtonsEnabledAfterOcrDone(); _nocrThreadsStop = true; - _icThreadsStop = true; return true; } @@ -6177,10 +5474,35 @@ namespace Nikse.SubtitleEdit.Forms.Ocr return false; } + private bool MainLoopTesseract(int max, int i) + { + if (i >= max) + { + _tesseractThreadRunner.CheckQueue(); + return false; + } + + if (_abort) + { + SetButtonsEnabledAfterOcrDone(); + _mainOcrRunning = false; + return true; + } + + var bmp = GetSubtitleBitmap(i); + _mainOcrBitmap = bmp; + _tesseractThreadRunner.AddImageJob(bmp, i, _languageId, string.Empty, _tesseractEngineMode.ToString(CultureInfo.InvariantCulture), _ocrMethodIndex == _ocrMethodTesseract302); + _tesseractThreadRunner.CheckQueue(); + return false; + } + private void mainOcrTimer_Tick(object sender, EventArgs e) { _mainOcrTimer.Stop(); - bool done = MainLoop(_mainOcrTimerMax, _mainOcrIndex); + + bool done = _ocrMethodIndex == _ocrMethodTesseract || _ocrMethodIndex == _ocrMethodTesseract302 ? + MainLoopTesseract(_mainOcrTimerMax, _mainOcrIndex) : + MainLoop(_mainOcrTimerMax, _mainOcrIndex); if (done || _abort) { SetButtonsEnabledAfterOcrDone(); @@ -6209,7 +5531,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr public static Bitmap ResizeBitmap(Bitmap b, int width, int height) { var result = new Bitmap(width, height); - using (Graphics g = Graphics.FromImage(result)) + using (var g = Graphics.FromImage(result)) g.DrawImage(b, 0, 0, width, height); return result; } @@ -6217,186 +5539,47 @@ namespace Nikse.SubtitleEdit.Forms.Ocr public static Bitmap UnItalic(Bitmap bmp, double factor) { int left = (int)(bmp.Height * factor); - Bitmap unItaliced = new Bitmap(bmp.Width + left + 4, bmp.Height); - - Point[] destinationPoints = { - new Point(0, 0), // destination for upper-left point of original - new Point(bmp.Width, 0), // destination for upper-right point of original - new Point(left, bmp.Height) // destination for lower-left point of original - }; - + var unItaliced = new Bitmap(bmp.Width + left + 4, bmp.Height); using (var g = Graphics.FromImage(unItaliced)) { - g.DrawImage(bmp, destinationPoints); + g.DrawImage(bmp, new[] { + new Point(0, 0), // destination for upper-left point of original + new Point(bmp.Width, 0), // destination for upper-right point of original + new Point(left, bmp.Height) // destination for lower-left point of original + }); } return unItaliced; } + TesseractRunner _tesseractRunner; + private string Tesseract3DoOcrViaExe(Bitmap bmp, string language, string psmMode, int tesseractEngineMode) { - var directory = _ocrMethodIndex == _ocrMethodTesseract302 ? Configuration.Tesseract302Directory : Configuration.TesseractDirectory; + if (_tesseractRunner == null) + { + _tesseractThreadRunner = new TesseractThreadRunner(OcrDone); + _tesseractRunner = new TesseractRunner(); + } // change yellow color to white - easier for Tesseract var nbmp = new NikseBitmap(bmp); nbmp.ReplaceYellowWithWhite(); // optimized replace string pngFileName = Path.GetTempPath() + Guid.NewGuid() + ".png"; - string tempTextFileName; using (var b = nbmp.GetBitmap()) { b.Save(pngFileName, System.Drawing.Imaging.ImageFormat.Png); - tempTextFileName = Path.GetTempPath() + Guid.NewGuid(); } - using (var process = new Process()) + var result = _tesseractRunner.Run(language, psmMode, tesseractEngineMode.ToString(CultureInfo.InvariantCulture), pngFileName, _ocrMethodIndex == _ocrMethodTesseract302); + if (_tesseractRunner.TesseractErrors.Count <= 2 && !string.IsNullOrEmpty(_tesseractRunner.LastError)) { - process.StartInfo = new ProcessStartInfo(directory + "tesseract.exe") { UseShellExecute = true }; - - if (_ocrMethodIndex == _ocrMethodTesseract302) - { - process.StartInfo.Arguments = "\"" + pngFileName + "\" \"" + tempTextFileName + "\" " + language; - } - else - { - process.StartInfo.Arguments = "\"" + pngFileName + "\" \"" + tempTextFileName + "\" --oem " + tesseractEngineMode + " -l " + language; - } - - if (!string.IsNullOrEmpty(psmMode)) - process.StartInfo.Arguments += " " + psmMode.Trim(); - - process.StartInfo.Arguments += " hocr"; - process.StartInfo.WindowStyle = ProcessWindowStyle.Hidden; - - if (Configuration.IsRunningOnLinux() || Configuration.IsRunningOnMac()) - { - process.StartInfo.UseShellExecute = false; - process.StartInfo.RedirectStandardError = true; - process.StartInfo.FileName = "tesseract"; - } - else - { - var tessdataPath = Path.Combine(directory, "tessdata"); - - if (_ocrMethodIndex == _ocrMethodTesseract) - { - process.StartInfo.Arguments = " --tessdata-dir \"" + tessdataPath + "\" " + process.StartInfo.Arguments.Trim(); - process.ErrorDataReceived += TesseractErrorReceived; - } - process.StartInfo.WorkingDirectory = directory; - process.StartInfo.UseShellExecute = false; - process.StartInfo.CreateNoWindow = true; - process.StartInfo.RedirectStandardError = true; - process.EnableRaisingEvents = true; - } - - try - { - process.Start(); - process.BeginErrorReadLine(); - } - catch - { - _tesseractErrors++; - if (_tesseractErrors <= 2) - { - if (Configuration.IsRunningOnLinux() || Configuration.IsRunningOnMac()) - { - MessageBox.Show("Unable to start 'Tesseract' - make sure tesseract-ocr 4.x is installed!"); - } - else - { - MessageBox.Show("Unable to start 'Tesseract' (" + Configuration.TesseractDirectory + "tesseract.exe) - make sure Subtitle Edit is install correctly + Visual Studio 2017 C++ runtime"); - } - throw; - } - } - process.WaitForExit(5000); - } - - string result = string.Empty; - string outputFileName = tempTextFileName + ".html"; - if (!File.Exists(outputFileName)) - outputFileName = tempTextFileName + ".hocr"; - try - { - if (File.Exists(outputFileName)) - { - result = File.ReadAllText(outputFileName, Encoding.UTF8); - result = ParseHocr(result); - File.Delete(outputFileName); - } - File.Delete(pngFileName); - } - catch - { - // ignored + MessageBox.Show(_tesseractRunner.LastError); } return result; } - private void TesseractErrorReceived(object sender, DataReceivedEventArgs e) - { - string msg = e.Data; - - if (string.IsNullOrEmpty(msg) || - msg.StartsWith("Tesseract Open Source OCR Engine", StringComparison.OrdinalIgnoreCase) || - msg.Contains("Too few characters", StringComparison.OrdinalIgnoreCase) || - msg.Contains("Empty page", StringComparison.OrdinalIgnoreCase) || - msg.Contains(" diacritics", StringComparison.OrdinalIgnoreCase) || - msg.Contains("Weak margin", StringComparison.OrdinalIgnoreCase)) - { - return; - } - - _tesseractErrors++; - if (_tesseractErrors <= 2) - { - MessageBox.Show("An error occurred while running tesseract: " + msg); - } - } - - private static string ParseHocr(string html) - { - string s = html.Replace("", "@001_____").Replace("", "@002_____"); - - int first = s.IndexOf('<'); - while (first >= 0) - { - int last = s.IndexOf('>'); - if (last > 0) - { - s = s.Remove(first, last - first + 1); - first = s.IndexOf('<'); - } - else - { - first = -1; - } - } - - s = s.Trim(); - s = s.Replace("@001_____", "").Replace("@002_____", ""); - while (s.Contains(" ")) - s = s.Replace(" ", " "); - s = s.Replace(" ", " "); - - // html escape decoding - s = s.Replace("&", "&"); - s = s.Replace("<", "<"); - s = s.Replace(">", ">"); - s = s.Replace(""", "\""); - s = s.Replace("'", "'"); - s = s.Replace("'", "'"); - - while (s.Contains("\n\n")) - s = s.Replace("\n\n", "\n"); - s = s.Replace("\n", "\n"); - s = s.Replace("\n", Environment.NewLine); - - return s; - } - private bool HasSingleLetters(string line) { if (!_ocrFixEngine.IsDictionaryLoaded || !_ocrFixEngine.SpellCheckDictionaryName.StartsWith("en_", StringComparison.Ordinal)) @@ -6467,10 +5650,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } else if (_ocrFixEngine != null && !psm.Contains('$') && !psm.Contains('•') && !psm.Contains('€')) { - int correctWordsNoFixes; - int wordsNotFoundNoFixes = _ocrFixEngine.CountUnknownWordsViaDictionary(textWithOutFixes, out correctWordsNoFixes); - int correctWordsPsm7; - int wordsNotFoundPsm7 = _ocrFixEngine.CountUnknownWordsViaDictionary(psm, out correctWordsPsm7); + int wordsNotFoundNoFixes = _ocrFixEngine.CountUnknownWordsViaDictionary(textWithOutFixes, out var correctWordsNoFixes); + int wordsNotFoundPsm7 = _ocrFixEngine.CountUnknownWordsViaDictionary(psm, out var correctWordsPsm7); if (wordsNotFoundPsm7 <= wordsNotFoundNoFixes && correctWordsPsm7 > correctWordsNoFixes) { textWithOutFixes = psm; @@ -7047,6 +6228,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr s = "" + HtmlUtil.RemoveOpenCloseTags(s, HtmlUtil.TagItalic) + ""; s = s.Replace("" + Environment.NewLine + "", Environment.NewLine); + s = s.Replace(" a ", " a "); + return HtmlUtil.FixInvalidItalicTags(s); } @@ -7203,34 +6386,18 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { _modiEnabled = false; } - if (!_modiEnabled) - { - comboBoxOcrMethod.Items.RemoveAt(_ocrMethodModi); - - if (_ocrMethodTesseract > _ocrMethodModi) - _ocrMethodTesseract--; - if (_ocrMethodBinaryImageCompare > _ocrMethodModi) - _ocrMethodBinaryImageCompare--; - if (_ocrMethodNocr > _ocrMethodModi) - _ocrMethodNocr--; - if (_ocrMethodTesseract302 > _ocrMethodModi) - _ocrMethodTesseract302--; - if (_ocrMethodTesseract302 > _ocrMethodNocr) - _ocrMethodTesseract302--; - if (_ocrMethodImageCompare > _ocrMethodModi) - _ocrMethodImageCompare--; - } } private void InitializeTesseract(string chosenLanguage = null) { - if (!Directory.Exists(Configuration.TesseractDirectory) && !Configuration.IsRunningOnLinux() && !Configuration.IsRunningOnMac()) + if (!Directory.Exists(Configuration.Tesseract302Directory) && !Configuration.IsRunningOnLinux() && !Configuration.IsRunningOnMac() && + Directory.Exists(Configuration.TesseractOriginalDirectory)) { foreach (string dirPath in Directory.GetDirectories(Configuration.TesseractOriginalDirectory, "*", SearchOption.AllDirectories)) - Directory.CreateDirectory(dirPath.Replace(Configuration.TesseractOriginalDirectory, Configuration.TesseractDirectory)); + Directory.CreateDirectory(dirPath.Replace(Configuration.TesseractOriginalDirectory, Configuration.Tesseract302Directory)); foreach (string newPath in Directory.GetFiles(Configuration.TesseractOriginalDirectory, "*.*", SearchOption.AllDirectories)) - File.Copy(newPath, newPath.Replace(Configuration.TesseractOriginalDirectory, Configuration.TesseractDirectory), true); + File.Copy(newPath, newPath.Replace(Configuration.TesseractOriginalDirectory, Configuration.Tesseract302Directory), true); } string dir = _ocrMethodIndex == _ocrMethodTesseract302 ? Configuration.Tesseract302DataDirectory : Configuration.TesseractDataDirectory; @@ -7281,7 +6448,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void InitializeModiLanguages() { - foreach (ModiLanguage ml in ModiLanguage.AllLanguages) + foreach (var ml in ModiLanguage.AllLanguages) { comboBoxModiLanguage.Items.Add(ml); if (ml.Id == _vobSubOcrSettings.LastModiLanguageId) @@ -7299,11 +6466,9 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void ButtonStopClick(object sender, EventArgs e) { - if (_mainOcrTimer != null) - _mainOcrTimer.Stop(); + _mainOcrTimer?.Stop(); _abort = true; _nocrThreadsStop = true; - _icThreadsStop = true; buttonStop.Enabled = false; progressBar1.Visible = false; labelStatus.Text = string.Empty; @@ -7363,7 +6528,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void ButtonNewCharacterDatabaseClick(object sender, EventArgs e) { - using (var newFolder = new VobSubOcrNewFolder(_ocrMethodIndex == _ocrMethodImageCompare)) + using (var newFolder = new VobSubOcrNewFolder(false)) { if (newFolder.ShowDialog(this) == DialogResult.OK) { @@ -7523,13 +6688,12 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void ComboBoxTesseractLanguagesSelectedIndexChanged(object sender, EventArgs e) { Configuration.Settings.VobSubOcr.TesseractLastLanguage = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Id; - if (_ocrFixEngine != null) - _ocrFixEngine.Dispose(); + _ocrFixEngine?.Dispose(); _ocrFixEngine = null; LoadOcrFixEngine(null, null); } - private void LoadOcrFixEngine(string threeLetterISOLanguageName, string hunspellName) + private void LoadOcrFixEngine(string threeLetterIsoLanguageName, string hunspellName) { if (_ocrMethodIndex != _ocrMethodTesseract && _ocrMethodIndex != _ocrMethodTesseract302) { @@ -7537,21 +6701,21 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { var ci = CultureInfo.GetCultureInfo(LanguageString.Replace("_", "-")); _languageId = ci.ThreeLetterISOLanguageName; - threeLetterISOLanguageName = ci.ThreeLetterISOLanguageName; + threeLetterIsoLanguageName = ci.ThreeLetterISOLanguageName; } catch { // ignored } } - else if (string.IsNullOrEmpty(threeLetterISOLanguageName) && comboBoxTesseractLanguages.SelectedItem != null) + else if (string.IsNullOrEmpty(threeLetterIsoLanguageName) && comboBoxTesseractLanguages.SelectedItem != null) { _languageId = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Id; - threeLetterISOLanguageName = _languageId; + threeLetterIsoLanguageName = _languageId; } _ocrFixEngine?.Dispose(); - _ocrFixEngine = new OcrFixEngine(threeLetterISOLanguageName, hunspellName, this, _ocrMethodIndex == _ocrMethodBinaryImageCompare); + _ocrFixEngine = new OcrFixEngine(threeLetterIsoLanguageName, hunspellName, this, _ocrMethodIndex == _ocrMethodBinaryImageCompare); if (_ocrFixEngine.IsDictionaryLoaded) { string loadedDictionaryName = _ocrFixEngine.SpellCheckDictionaryName; @@ -7604,30 +6768,49 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void ComboBoxOcrMethodSelectedIndexChanged(object sender, EventArgs e) { - _icThreadsStop = true; + _abort = true; _binaryOcrDb = null; _nOcrDb = null; _ocrMethodIndex = comboBoxOcrMethod.SelectedIndex; if (_ocrMethodIndex == _ocrMethodTesseract) { + ResetTesseractThread(); InitializeTesseract(); ShowOcrMethodGroupBox(GroupBoxTesseractMethod); - Configuration.Settings.VobSubOcr.LastOcrMethod = "Tesseract"; + Configuration.Settings.VobSubOcr.LastOcrMethod = "Tesseract4"; comboBoxTesseractEngineMode.Visible = true; labelTesseractEngineMode.Visible = true; checkBoxTesseractFallback.Text = "Fallback to Tesseract 3.02"; checkBoxTesseractFallback.Visible = File.Exists(Path.Combine(Configuration.Tesseract302Directory, "tesseract.exe")); + if (!File.Exists(Path.Combine(Configuration.TesseractDirectory, "tesseract.exe"))) + { + if (MessageBox.Show("Download Tesseract 4 Beta", "Subtitle Edit", MessageBoxButtons.YesNoCancel) == DialogResult.Yes) + { + using (var form = new DownloadTesseract4()) + { + form.ShowDialog(this); + } + } + else + { + comboBoxOcrMethod.SelectedIndex = _ocrMethodBinaryImageCompare; + return; + } + } } else if (_ocrMethodIndex == _ocrMethodTesseract302) { + ResetTesseractThread(); InitializeTesseract(); ShowOcrMethodGroupBox(GroupBoxTesseractMethod); Configuration.Settings.VobSubOcr.LastOcrMethod = "Tesseract302"; comboBoxTesseractEngineMode.Visible = false; labelTesseractEngineMode.Visible = false; + checkBoxTesseractFallback.Text = "Fallback to Tesseract 4"; + checkBoxTesseractFallback.Visible = File.Exists(Path.Combine(Configuration.TesseractDirectory, "tesseract.exe")); if (!File.Exists(Path.Combine(Configuration.Tesseract302Directory, "tesseract.exe"))) { - if (MessageBox.Show("Download Tesseract 3.02", null, MessageBoxButtons.YesNo) == DialogResult.Yes) + if (MessageBox.Show("Download Tesseract 3.02", "Subtitle Edit", MessageBoxButtons.YesNoCancel) == DialogResult.Yes) { using (var form = new DownloadTesseract302()) { @@ -7636,19 +6819,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } else { - comboBoxOcrMethod.SelectedIndex = _ocrMethodTesseract; + comboBoxOcrMethod.SelectedIndex = _ocrMethodBinaryImageCompare; return; } } - checkBoxTesseractFallback.Text = "Fallback to Tesseract 4"; - checkBoxTesseractFallback.Visible = true; - } - else if (_ocrMethodIndex == _ocrMethodImageCompare) - { - ShowOcrMethodGroupBox(groupBoxImageCompareMethod); - Configuration.Settings.VobSubOcr.LastOcrMethod = "BitmapCompare"; - checkBoxPromptForUnknownWords.Checked = false; - LoadImageCompareCharacterDatabaseList(); } else if (_ocrMethodIndex == _ocrMethodNocr) { @@ -7736,7 +6910,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr saveImageAsToolStripMenuItem.Visible = !enableIfRaisedBySubListView || subtitleListView1.SelectedItems.Count == 1; // Image compare. - bool enableIfImageCompare = _ocrMethodIndex == _ocrMethodImageCompare || _ocrMethodIndex == _ocrMethodBinaryImageCompare; + bool enableIfImageCompare = _ocrMethodIndex == _ocrMethodBinaryImageCompare; inspectImageCompareMatchesForCurrentImageToolStripMenuItem.Visible = enableIfImageCompare; EditLastAdditionsToolStripMenuItem.Visible = enableIfImageCompare && _lastAdditions != null && _lastAdditions.Count > 0; @@ -7860,16 +7034,13 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void ResetTesseractThread() { - if (_tesseractThread != null) + _tesseractThreadRunner?.Cancel(); + if (_tesseractAsyncStrings != null) { - _tesseractThread.CancelAsync(); - if (_tesseractAsyncStrings != null) - { - for (int i = 0; i < _tesseractAsyncStrings.Length; i++) - _tesseractAsyncStrings[i] = string.Empty; - } - _tesseractAsyncIndex = 0; + for (int i = 0; i < _tesseractAsyncStrings.Length; i++) + _tesseractAsyncStrings[i] = string.Empty; } + _tesseractAsyncIndex = 0; } private void PictureBoxColorChooserClick(object sender, EventArgs e) @@ -7903,11 +7074,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void checkBoxShowOnlyForced_CheckedChanged(object sender, EventArgs e) { - if (_tesseractThread != null) + if (_tesseractThreadRunner != null) { - _tesseractThread.CancelAsync(); + _tesseractThreadRunner.Cancel(); int i = 0; - while (i < 10 && _tesseractThread.IsBusy) + while (i < 10) { System.Threading.Thread.Sleep(100); i++; @@ -8032,7 +7203,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } catch { - var arr = LanguageString.Split(new char[] { '-', '_' }); + var arr = LanguageString.Split('-', '_'); if (arr.Length > 1 && arr[0].Length == 2) { foreach (var x in CultureInfo.GetCultures(CultureTypes.NeutralCultures)) @@ -8077,7 +7248,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr numericUpDownPixelsIsSpace.Value = 11; _vobSubOcrSettings = vobSubOcrSettings; - InitializeModi(); InitializeTesseract(); LoadImageCompareCharacterDatabaseList(); @@ -8093,9 +7263,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void SetOcrMethod() { - if (Configuration.Settings.VobSubOcr.LastOcrMethod == "BitmapCompare" && comboBoxOcrMethod.Items.Count > _ocrMethodBinaryImageCompare) - comboBoxOcrMethod.SelectedIndex = _ocrMethodBinaryImageCompare; //_ocrMethodImageCompare; - else if (Configuration.Settings.VobSubOcr.LastOcrMethod == "BinaryImageCompare" && comboBoxOcrMethod.Items.Count > _ocrMethodBinaryImageCompare) + if (Configuration.Settings.VobSubOcr.LastOcrMethod == "BinaryImageCompare" && comboBoxOcrMethod.Items.Count > _ocrMethodBinaryImageCompare) comboBoxOcrMethod.SelectedIndex = _ocrMethodBinaryImageCompare; else if (Configuration.Settings.VobSubOcr.LastOcrMethod == "MODI" && comboBoxOcrMethod.Items.Count > _ocrMethodModi) comboBoxOcrMethod.SelectedIndex = _ocrMethodModi; @@ -8103,6 +7271,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr comboBoxOcrMethod.SelectedIndex = _ocrMethodNocr; else if (Configuration.Settings.VobSubOcr.LastOcrMethod == "Tesseract302" && comboBoxOcrMethod.Items.Count > _ocrMethodTesseract302) comboBoxOcrMethod.SelectedIndex = _ocrMethodTesseract302; + else if (Configuration.Settings.VobSubOcr.LastOcrMethod == "Tesseract4" && comboBoxOcrMethod.Items.Count > _ocrMethodTesseract302) + comboBoxOcrMethod.SelectedIndex = _ocrMethodTesseract; else comboBoxOcrMethod.SelectedIndex = 0; } @@ -8270,12 +7440,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } else { - CompareMatch bestGuess; CompareMatch match; if (_binaryOcrDb != null) - match = GetCompareMatchNew(item, out bestGuess, sourceList, index); + match = GetCompareMatchNew(item, out _, sourceList, index); else - match = GetCompareMatch(item, parentBitmap, out bestGuess, sourceList, index); + match = GetCompareMatch(item, parentBitmap, out _, sourceList, index); if (match == null) { matches.Add(new CompareMatch(Configuration.Settings.Language.VobSubOcr.NoMatch, false, 0, null)); @@ -8326,8 +7495,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr Cursor = Cursors.Default; } } - if (_binaryOcrDb != null) - _binaryOcrDb.LoadCompareImages(); + + _binaryOcrDb?.LoadCompareImages(); Cursor = Cursors.Default; } @@ -8379,7 +7548,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr numericUpDownPixelsIsSpace.Value = vobSubOcrSettings.XOrMorePixelsMakesSpace; _vobSubOcrSettings = vobSubOcrSettings; - InitializeModi(); InitializeTesseract(); LoadImageCompareCharacterDatabaseList(); @@ -8442,7 +7610,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr numericUpDownPixelsIsSpace.Value = vobSubOcrSettings.XOrMorePixelsMakesSpace; _vobSubOcrSettings = vobSubOcrSettings; - InitializeModi(); InitializeTesseract(); LoadImageCompareCharacterDatabaseList(); @@ -8480,7 +7647,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr numericUpDownPixelsIsSpace.Value = vobSubOcrSettings.XOrMorePixelsMakesSpace; _vobSubOcrSettings = vobSubOcrSettings; - InitializeModi(); InitializeTesseract(); LoadImageCompareCharacterDatabaseList(); @@ -8538,14 +7704,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } } - _icThreadsStop = true; _abort = true; _nocrThreadsStop = true; - if (_mainOcrTimer != null) - _mainOcrTimer.Stop(); + _mainOcrTimer?.Stop(); - if (_tesseractThread != null) - _tesseractThread.CancelAsync(); + _tesseractThreadRunner?.Cancel(); _tesseractAsyncIndex = 10000; System.Threading.Thread.Sleep(100); @@ -9015,7 +8178,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void buttonLineOcrNewLanguage_Click(object sender, EventArgs e) { - using (var newFolder = new VobSubOcrNewFolder(_ocrMethodIndex == _ocrMethodImageCompare)) + using (var newFolder = new VobSubOcrNewFolder(false)) { if (newFolder.ShowDialog(this) == DialogResult.OK) { @@ -9107,7 +8270,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr numericUpDownNumberOfPixelsIsSpaceNOCR.Value = vobSubOcrSettings.XOrMorePixelsMakesSpace; _vobSubOcrSettings = vobSubOcrSettings; - InitializeModi(); InitializeTesseract(); LoadImageCompareCharacterDatabaseList(); @@ -9137,7 +8299,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { if (_ocrMethodIndex == _ocrMethodTesseract) { - _icThreadsStop = true; _abort = true; _nocrThreadsStop = true; ResetTesseractThread(); @@ -9300,8 +8461,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } var sub = new Subtitle(); - Encoding encoding; - SubtitleFormat format = sub.LoadSubtitle(fileName, out encoding, null); + SubtitleFormat format = sub.LoadSubtitle(fileName, out _, null); if (format == null) return; @@ -9350,7 +8510,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private void subtitleListView1_DoubleClick(object sender, EventArgs e) { - if (subtitleListView1.SelectedItems.Count > 0 && (_ocrMethodIndex == _ocrMethodBinaryImageCompare || _ocrMethodIndex == _ocrMethodImageCompare)) + if (subtitleListView1.SelectedItems.Count > 0 && _ocrMethodIndex == _ocrMethodBinaryImageCompare) { InspectImageCompareMatchesForCurrentImageToolStripMenuItem_Click(null, null); } @@ -9378,4 +8538,4 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } } } -} +} \ No newline at end of file diff --git a/src/Logic/OCR/Tesseract/TesseractMultiRunner.cs b/src/Logic/OCR/Tesseract/TesseractMultiRunner.cs new file mode 100644 index 000000000..b057c8f2c --- /dev/null +++ b/src/Logic/OCR/Tesseract/TesseractMultiRunner.cs @@ -0,0 +1,188 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Text; +using Nikse.SubtitleEdit.Core; + +namespace Nikse.SubtitleEdit.Logic.Ocr.Tesseract +{ + /// + /// Run multiple images per tesseract call + /// + public class TesseractMultiRunner + { + private readonly List _tesseractErrors; + + public TesseractMultiRunner() + { + _tesseractErrors = new List(); + } + + private void TesseractErrorReceived(object sender, DataReceivedEventArgs e) + { + var msg = e.Data; + + if (string.IsNullOrEmpty(msg) || + msg.StartsWith("Tesseract Open Source OCR Engine", StringComparison.OrdinalIgnoreCase) || + msg.Contains("Too few characters", StringComparison.OrdinalIgnoreCase) || + msg.Contains("Empty page", StringComparison.OrdinalIgnoreCase) || + msg.Contains(" diacritics", StringComparison.OrdinalIgnoreCase) || + msg.Contains("Weak margin", StringComparison.OrdinalIgnoreCase)) + { + return; + } + + _tesseractErrors.Add(msg); + } + + public string Run(List bmps, string language, string psmMode) + { + // change yellow color to white - easier for Tesseract + string inputFileName = Path.GetTempPath() + Guid.NewGuid() + ".txt"; + var filesToDelete = new List(); + var sb = new StringBuilder(); + foreach (var bmp in bmps) + { + bmp.ReplaceYellowWithWhite(); // optimized replace + string pngFileName = Path.GetTempPath() + Guid.NewGuid() + ".png"; + using (var b = bmp.GetBitmap()) + { + b.Save(pngFileName, System.Drawing.Imaging.ImageFormat.Png); + } + filesToDelete.Add(pngFileName); + sb.AppendLine(pngFileName); + } + + File.WriteAllText(inputFileName, sb.ToString()); + filesToDelete.Add(inputFileName); + var outputFileName = Path.GetTempPath() + Guid.NewGuid(); + var dir = @"C:\Data\SubtitleEdit\subtitleedit\src\bin\Debug\Tesseract4"; + using (var process = new Process()) + { + process.StartInfo = new ProcessStartInfo(dir + "tesseract.exe") + { + UseShellExecute = true, + Arguments = "\"" + inputFileName + "\" \"" + outputFileName + "\" -l " + language + }; + + if (!string.IsNullOrEmpty(psmMode)) + process.StartInfo.Arguments += " " + psmMode.Trim(); + + process.StartInfo.Arguments += " hocr"; + process.StartInfo.Arguments = " --tessdata-dir \"" + Path.Combine(dir, "tessdata") + "\" " + process.StartInfo.Arguments.Trim(); + process.StartInfo.WindowStyle = ProcessWindowStyle.Hidden; + + if (Configuration.IsRunningOnLinux() || Configuration.IsRunningOnMac()) + { + process.StartInfo.UseShellExecute = false; + process.StartInfo.RedirectStandardError = true; + process.StartInfo.FileName = "tesseract"; + } + else + { + var tessdataPath = Path.Combine(Configuration.TesseractDirectory, "tessdata"); + process.StartInfo.Arguments = " --tessdata-dir \"" + tessdataPath + "\" " + process.StartInfo.Arguments.Trim(); + process.StartInfo.WorkingDirectory = Configuration.TesseractDirectory; + process.StartInfo.UseShellExecute = false; + process.StartInfo.CreateNoWindow = true; + process.StartInfo.RedirectStandardError = true; + process.ErrorDataReceived += TesseractErrorReceived; + process.EnableRaisingEvents = true; + } + + try + { + process.Start(); + process.BeginErrorReadLine(); + } + catch + { + if (_tesseractErrors.Count <= 2) + { + + if (Configuration.IsRunningOnLinux() || Configuration.IsRunningOnMac()) + { + _tesseractErrors.Add("Unable to start 'Tesseract' - make sure tesseract-ocr 4.x is installed!"); + } + else + { + _tesseractErrors.Add("Unable to start 'Tesseract' (" + Configuration.TesseractDirectory + "tesseract.exe) - make sure Subtitle Edit is install correctly + Visual Studio 2017 C++ runtime"); + } + } + } + + process.WaitForExit(5000 + bmps.Count * 500); + + string result = string.Empty; + string resultFileName = outputFileName + ".html"; + if (!File.Exists(outputFileName)) + resultFileName = outputFileName + ".hocr"; + filesToDelete.Add(resultFileName); + try + { + if (File.Exists(outputFileName)) + { + result = File.ReadAllText(outputFileName, Encoding.UTF8); + result = ParseHocr(result); + } + foreach (var fileName in filesToDelete) + { + if (File.Exists(fileName)) + { + File.Delete(fileName); + } + } + } + catch + { + // ignored + } + + return result; + } + } + + private static string ParseHocr(string html) + { + string s = html.Replace("", "@001_____").Replace("", "@002_____"); + + int first = s.IndexOf('<'); + while (first >= 0) + { + int last = s.IndexOf('>'); + if (last > 0) + { + s = s.Remove(first, last - first + 1); + first = s.IndexOf('<'); + } + else + { + first = -1; + } + } + + s = s.Trim(); + s = s.Replace("@001_____", "").Replace("@002_____", ""); + while (s.Contains(" ")) + s = s.Replace(" ", " "); + s = s.Replace(" ", " "); + + // html escape decoding + s = s.Replace("&", "&"); + s = s.Replace("<", "<"); + s = s.Replace(">", ">"); + s = s.Replace(""", "\""); + s = s.Replace("'", "'"); + s = s.Replace("'", "'"); + + while (s.Contains("\n\n")) + s = s.Replace("\n\n", "\n"); + s = s.Replace("\n", "\n"); + s = s.Replace("\n", Environment.NewLine); + + return s; + } + + } +} \ No newline at end of file diff --git a/src/Logic/OCR/Tesseract/TesseractRunner.cs b/src/Logic/OCR/Tesseract/TesseractRunner.cs new file mode 100644 index 000000000..6804a981d --- /dev/null +++ b/src/Logic/OCR/Tesseract/TesseractRunner.cs @@ -0,0 +1,160 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Text; +using Nikse.SubtitleEdit.Core; + +namespace Nikse.SubtitleEdit.Logic.Ocr.Tesseract +{ + public class TesseractRunner + { + public List TesseractErrors { get; set; } + public string LastError { get; set; } + + public TesseractRunner() + { + TesseractErrors = new List(); + } + + public string Run(string languageCode, string psmMode, string engineMode, string imageFileName, bool run302 = false) + { + LastError = null; + var dir = run302 ? Configuration.Tesseract302Directory : Configuration.TesseractDirectory; + string tempTextFileName = Path.GetTempPath() + Guid.NewGuid(); + using (var process = new Process()) + { + process.StartInfo = new ProcessStartInfo(Path.Combine(dir, "tesseract.exe")) + { + UseShellExecute = true, + Arguments = "\"" + imageFileName + "\" \"" + tempTextFileName + "\" -l " + languageCode + }; + + if (!string.IsNullOrEmpty(psmMode)) + { + process.StartInfo.Arguments += " --psm " + psmMode; + } + + if (!string.IsNullOrEmpty(engineMode) && !run302) + { + process.StartInfo.Arguments += " --oem " + engineMode; + } + + process.StartInfo.Arguments += " hocr"; + if (run302) + { + process.StartInfo.WorkingDirectory = Configuration.Tesseract302Directory; + } + else + { + process.ErrorDataReceived += TesseractErrorReceived; + process.StartInfo.Arguments = " --tessdata-dir \"" + Path.Combine(dir, "tessdata") + "\" " + process.StartInfo.Arguments.Trim(); + } + + process.StartInfo.WindowStyle = ProcessWindowStyle.Hidden; + try + { + process.Start(); + } + catch (Exception exception) + { + LastError = exception.Message + Environment.NewLine + exception.StackTrace; + TesseractErrors.Add(LastError); + return "Error!"; + } + process.WaitForExit(5000); + } + + string result = string.Empty; + string outputFileName = tempTextFileName + ".html"; + if (!File.Exists(outputFileName)) + outputFileName = tempTextFileName + ".hocr"; + try + { + if (File.Exists(outputFileName)) + { + result = File.ReadAllText(outputFileName, Encoding.UTF8); + result = ParseHocr(result); + File.Delete(outputFileName); + } + File.Delete(imageFileName); + } + catch + { + // ignored + } + + return result; + } + + private static string ParseHocr(string html) + { + string s = html.Replace("", "@001_____").Replace("", "@002_____"); + + int first = s.IndexOf('<'); + while (first >= 0) + { + int last = s.IndexOf('>'); + if (last > 0) + { + s = s.Remove(first, last - first + 1); + first = s.IndexOf('<'); + } + else + { + first = -1; + } + } + + s = s.Trim(); + s = s.Replace("@001_____", "").Replace("@002_____", ""); + while (s.Contains(" ")) + s = s.Replace(" ", " "); + s = s.Replace(" ", " "); + + // html escape decoding + s = s.Replace("&", "&") + .Replace("<", "<") + .Replace(">", ">") + .Replace(""", "\"") + .Replace("'", "'") + .Replace("'", "'"); + + while (s.Contains("\n\n")) + s = s.Replace("\n\n", "\n"); + s = s.Replace("\n", "\n"); + s = s.Replace("\n", Environment.NewLine); + + return s; + } + + private void TesseractErrorReceived(object sender, DataReceivedEventArgs e) + { + string msg = e.Data; + + if (string.IsNullOrEmpty(msg) || + msg.StartsWith("Tesseract Open Source OCR Engine", StringComparison.OrdinalIgnoreCase) || + msg.Contains("Too few characters", StringComparison.OrdinalIgnoreCase) || + msg.Contains("Empty page", StringComparison.OrdinalIgnoreCase) || + msg.Contains(" diacritics", StringComparison.OrdinalIgnoreCase) || + msg.Contains("Weak margin", StringComparison.OrdinalIgnoreCase)) + { + return; + } + + if (TesseractErrors.Count <= 100) + { + if (string.IsNullOrEmpty(LastError)) + { + LastError = msg; + } + else if (!LastError.Contains(msg)) + { + LastError = LastError + Environment.NewLine + msg; + } + TesseractErrors.Add(msg); + } + } + + } +} diff --git a/src/Logic/OCR/Tesseract/TesseractThreadRunner.cs b/src/Logic/OCR/Tesseract/TesseractThreadRunner.cs new file mode 100644 index 000000000..16788c007 --- /dev/null +++ b/src/Logic/OCR/Tesseract/TesseractThreadRunner.cs @@ -0,0 +1,95 @@ +using System; +using System.Collections.Generic; +using System.Drawing; +using System.IO; +using System.Threading; + +namespace Nikse.SubtitleEdit.Logic.Ocr.Tesseract +{ + public class TesseractThreadRunner + { + public delegate void OcrDone(int index, ImageJob job); + private readonly OcrDone _callback; + private readonly Queue _jobQueue; + private static readonly object QueueLock = new object(); + private readonly TesseractRunner _tesseractRunner; + private bool _abort; + + public TesseractThreadRunner(OcrDone callback = null) + { + _jobQueue = new Queue(); + _callback = callback; + _tesseractRunner = new TesseractRunner(); + } + + public class ImageJob + { + public string FileName { get; set; } + public int Index { get; set; } + public string Result { get; set; } + public DateTime Completed { get; set; } + public string LanguageCode { get; set; } + public string PsmMode { get; set; } + public string EngineMode { get; set; } + public bool Run302 { get; set; } + public Bitmap Bitmap { get; set; } + } + + private void DoOcr(object j) + { + if (_abort) + { + return; + } + + var job = (ImageJob)j; + job.Result = _tesseractRunner.Run(job.LanguageCode, job.PsmMode, job.EngineMode, job.FileName, job.Run302); + lock (QueueLock) + { + job.Completed = DateTime.UtcNow; + } + } + + public void AddImageJob(Bitmap bmp, int index, string language, string psmMode, string engineMode, bool run302) + { + var job = new ImageJob + { + FileName = Path.GetTempFileName() + ".png", + Index = index, + Completed = DateTime.MaxValue, + Bitmap = bmp, + LanguageCode = language, + PsmMode = psmMode, + EngineMode = engineMode, + Run302 = run302 + }; + bmp.Save(job.FileName, System.Drawing.Imaging.ImageFormat.Png); + ThreadPool.QueueUserWorkItem(DoOcr, job); + _jobQueue.Enqueue(job); + } + + public void CheckQueue() + { + if (_jobQueue.Count == 0) + { + return; + } + + lock (QueueLock) + { + var checkTime = DateTime.UtcNow; + var job = _jobQueue.Peek(); + if (job != null && job.Completed < checkTime) + { + _jobQueue.Dequeue(); + _callback?.Invoke(job.Index, job); + } + } + } + + public void Cancel() + { + _abort = true; + } + } +} diff --git a/src/SubtitleEdit.csproj b/src/SubtitleEdit.csproj index faae6059a..a0ebdf9b9 100644 --- a/src/SubtitleEdit.csproj +++ b/src/SubtitleEdit.csproj @@ -518,6 +518,12 @@ NetworkStart.cs + + Form + + + DownloadTesseract4.cs + Form @@ -937,6 +943,9 @@ + + + @@ -1207,6 +1216,9 @@ NetworkStart.cs + + DownloadTesseract4.cs + DownloadTesseract302.cs