Try to improve ocr via tesseract on linux

This commit is contained in:
Nikolaj Olsson 2019-09-22 17:15:56 +02:00
parent a74c7cb9ba
commit 8e9e15a292
3 changed files with 27 additions and 11 deletions

View File

@ -180,16 +180,16 @@ namespace Nikse.SubtitleEdit.Core
{
if (IsRunningOnLinux || IsRunningOnMac)
{
if (Directory.Exists("/usr/share/tesseract-ocr/4.00/tessdata"))
{
return "/usr/share/tesseract-ocr/4.00/tessdata";
}
if (Directory.Exists("/usr/share/tesseract-ocr/tessdata"))
{
return "/usr/share/tesseract-ocr/tessdata";
}
if (Directory.Exists("/usr/share/tesseract/tessdata"))
{
return "/usr/share/tesseract/tessdata";
}
if (Directory.Exists("/usr/share/tessdata"))
{
return "/usr/share/tessdata";

View File

@ -289,7 +289,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
// Dictionaries/spellchecking/fixing
private OcrFixEngine _ocrFixEngine;
private int _tesseractOcrAutoFixes;
private const string Tesseract4Version = "4.1.0";
private string Tesseract4Version = "4.1.0";
private Subtitle _bdnXmlOriginal;
private Subtitle _bdnXmlSubtitle;
@ -436,7 +436,18 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
InitializeModi();
comboBoxOcrMethod.Items.Clear();
comboBoxOcrMethod.Items.Add("Binary image compare");
comboBoxOcrMethod.Items.Add("Tesseract 3.02");
if (Configuration.IsRunningOnLinux || Configuration.IsRunningOnMac)
{
Tesseract4Version = "4";
checkBoxTesseractMusicOn.Checked = false;
checkBoxTesseractMusicOn.Visible = false;
checkBoxTesseractFallback.Checked = false;
checkBoxTesseractFallback.Visible = false;
}
else
{
comboBoxOcrMethod.Items.Add("Tesseract 3.02");
}
comboBoxOcrMethod.Items.Add("Tesseract " + Tesseract4Version);
if (_modiEnabled)
{
@ -6925,7 +6936,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
if (_ocrMethodIndex == _ocrMethodTesseract4)
{
var ok = File.Exists(Path.Combine(Configuration.Tesseract302Directory, "tesseract.exe")) &&
var ok = Configuration.IsRunningOnWindows &&
File.Exists(Path.Combine(Configuration.Tesseract302Directory, "tesseract.exe")) &&
File.Exists(Path.Combine(Configuration.Tesseract302DataDirectory, l + ".traineddata"));
checkBoxTesseractFallback.Visible = ok;
if (!ok)
@ -7038,7 +7050,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
comboBoxTesseractEngineMode.Visible = true;
labelTesseractEngineMode.Visible = true;
checkBoxTesseractFallback.Text = string.Format(Configuration.Settings.Language.VobSubOcr.FallbackToX, "Tesseract 3.02");
if (!File.Exists(Path.Combine(Configuration.TesseractDirectory, "tesseract.exe")))
if (Configuration.IsRunningOnWindows &&
!File.Exists(Path.Combine(Configuration.TesseractDirectory, "tesseract.exe")))
{
if (MessageBox.Show("Download Tesseract " + Tesseract4Version, "Subtitle Edit", MessageBoxButtons.YesNoCancel) == DialogResult.Yes)
{

View File

@ -539,6 +539,9 @@ namespace Nikse.SubtitleEdit.Forms
radioButtonVideoPlayerMPV.Checked = true;
labelMpvSettings.Text = "--vo=" + Configuration.Settings.General.MpvVideoOutputLinux;
}
textBoxVlcPath.Visible = false;
labelVlcPath.Visible = false;
buttonVlcPathBrowse.Visible = false;
}
labelFFmpegPath.Text = language.WaveformFFmpegPath;
@ -825,7 +828,7 @@ namespace Nikse.SubtitleEdit.Forms
buttonOK.Text = Configuration.Settings.Language.General.Ok;
buttonCancel.Text = Configuration.Settings.Language.General.Cancel;
InitComboxWordListLanguages();
InitComboBoxWordListLanguages();
checkBoxWaveformShowGrid.Checked = Configuration.Settings.VideoControls.WaveformDrawGrid;
checkBoxWaveformShowCps.Checked = Configuration.Settings.VideoControls.WaveformDrawCps;
@ -1343,7 +1346,7 @@ namespace Nikse.SubtitleEdit.Forms
pictureBoxHelp.Image = (Image)help.Clone();
}
private void InitComboxWordListLanguages()
private void InitComboBoxWordListLanguages()
{
//Examples: da_DK_user.xml, eng_OCRFixReplaceList.xml, en_names.xml
string dir = Utilities.DictionaryFolder;