Update Tesseract from 5.2.0 to 5.3.0

This commit is contained in:
niksedk 2023-01-10 21:43:51 +01:00
parent 8c5c37a681
commit c56e481540
5 changed files with 13 additions and 30 deletions

View File

@ -10,6 +10,7 @@
* Add new subtitle format - thx Chris
* Add ASSA style "Replace" - thx Mishasama
* Add "audio to text selected lines" shortcuts
* Add Vosk Korean/Uzbek models
* IMPROVED:
* Update Chinese translation - thx nkh0472
* Update Korean translation - thx domddol
@ -31,6 +32,7 @@
* Remember "Merge lines with same text" settings - thx kamitsu2004
* Update Whisper CPP to v1.0.4
* Rename "Fix dialogs on one line" to "Break dialogs on one line"
* Update Tesseract 5.2.0 to 5.3.0
* FIXED:
* Fix italic in DCinema interop - thx Andrey
* Fix Whisper model download - thx darnn

View File

@ -24,7 +24,7 @@ namespace Nikse.SubtitleEdit.Core.Common
public static readonly string ShotChangesDirectory = DataDirectory + "ShotChanges" + Path.DirectorySeparatorChar;
public static readonly string AutoBackupDirectory = DataDirectory + "AutoBackup" + Path.DirectorySeparatorChar;
public static readonly string VobSubCompareDirectory = DataDirectory + "VobSub" + Path.DirectorySeparatorChar;
public static readonly string TesseractDirectory = DataDirectory + "Tesseract520" + Path.DirectorySeparatorChar;
public static readonly string TesseractDirectory = DataDirectory + "Tesseract530" + Path.DirectorySeparatorChar;
public static readonly string Tesseract302Directory = DataDirectory + "Tesseract302" + Path.DirectorySeparatorChar;
public static readonly string WaveformsDirectory = DataDirectory + "Waveforms" + Path.DirectorySeparatorChar;
public static readonly string PluginsDirectory = DataDirectory + "Plugins";
@ -227,6 +227,7 @@ namespace Nikse.SubtitleEdit.Core.Common
return "/usr/share/tessdata";
}
}
return Path.Combine(TesseractDirectory, "tessdata");
}

View File

@ -142,8 +142,6 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
Cursor = Cursors.Default;
labelPleaseWait.Text = string.Empty;
if (AutoClose)
{
DialogResult = DialogResult.OK;

View File

@ -2,7 +2,6 @@
using Nikse.SubtitleEdit.Logic;
using System;
using System.IO;
using System.IO.Compression;
using System.Threading;
using System.Windows.Forms;
@ -10,7 +9,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
public sealed partial class DownloadTesseract5 : Form
{
public const string TesseractDownloadUrl = "https://github.com/SubtitleEdit/support-files/raw/master/Tesseract520.tar.gz";
public const string TesseractDownloadUrl = "https://github.com/SubtitleEdit/support-files/raw/master/Tesseract530.zip";
private readonly CancellationTokenSource _cancellationTokenSource;
public DownloadTesseract5(string version)
@ -77,35 +76,18 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
Directory.CreateDirectory(dictionaryFolder);
}
downloadStream.Position = 0;
var tempFileName = FileUtil.GetTempFileName(".tar");
using (var fs = new FileStream(tempFileName, FileMode.Create))
using (var zip = new GZipStream(downloadStream, CompressionMode.Decompress))
using (var zip = ZipExtractor.Open(downloadStream))
{
byte[] buffer = new byte[1024];
int nRead;
while ((nRead = zip.Read(buffer, 0, buffer.Length)) > 0)
var dir = zip.ReadCentralDir();
foreach (var entry in dir)
{
fs.Write(buffer, 0, nRead);
var path = Path.Combine(dictionaryFolder, entry.FilenameInZip);
zip.ExtractFile(entry, path);
}
}
using (var tr = new TarReader(tempFileName))
{
foreach (var th in tr.Files)
{
var fn = Path.Combine(dictionaryFolder, th.FileName.Replace('/', Path.DirectorySeparatorChar));
if (th.IsFolder)
{
Directory.CreateDirectory(Path.Combine(dictionaryFolder, th.FileName.Replace('/', Path.DirectorySeparatorChar)));
}
else if (th.FileSizeInBytes > 0)
{
th.WriteData(fn);
}
}
}
File.Delete(tempFileName);
Cursor = Cursors.Default;
labelPleaseWait.Text = string.Empty;
Cursor = Cursors.Default;
DialogResult = DialogResult.OK;
}

View File

@ -329,7 +329,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
// Dictionaries/spellchecking/fixing
private OcrFixEngine _ocrFixEngine;
private int _tesseractOcrAutoFixes;
private string Tesseract5Version = "5.2.0";
private string Tesseract5Version = "5.3.0";
private Subtitle _bdnXmlOriginal;
private Subtitle _bdnXmlSubtitle;