Update Tesseract from 5.2.0 to 5.3.0

This commit is contained in:
niksedk 2023-01-10 21:43:51 +01:00
parent 8c5c37a681
commit c56e481540
5 changed files with 13 additions and 30 deletions

View File

@ -10,6 +10,7 @@
* Add new subtitle format - thx Chris * Add new subtitle format - thx Chris
* Add ASSA style "Replace" - thx Mishasama * Add ASSA style "Replace" - thx Mishasama
* Add "audio to text selected lines" shortcuts * Add "audio to text selected lines" shortcuts
* Add Vosk Korean/Uzbek models
* IMPROVED: * IMPROVED:
* Update Chinese translation - thx nkh0472 * Update Chinese translation - thx nkh0472
* Update Korean translation - thx domddol * Update Korean translation - thx domddol
@ -31,6 +32,7 @@
* Remember "Merge lines with same text" settings - thx kamitsu2004 * Remember "Merge lines with same text" settings - thx kamitsu2004
* Update Whisper CPP to v1.0.4 * Update Whisper CPP to v1.0.4
* Rename "Fix dialogs on one line" to "Break dialogs on one line" * Rename "Fix dialogs on one line" to "Break dialogs on one line"
* Update Tesseract 5.2.0 to 5.3.0
* FIXED: * FIXED:
* Fix italic in DCinema interop - thx Andrey * Fix italic in DCinema interop - thx Andrey
* Fix Whisper model download - thx darnn * Fix Whisper model download - thx darnn

View File

@ -24,7 +24,7 @@ namespace Nikse.SubtitleEdit.Core.Common
public static readonly string ShotChangesDirectory = DataDirectory + "ShotChanges" + Path.DirectorySeparatorChar; public static readonly string ShotChangesDirectory = DataDirectory + "ShotChanges" + Path.DirectorySeparatorChar;
public static readonly string AutoBackupDirectory = DataDirectory + "AutoBackup" + Path.DirectorySeparatorChar; public static readonly string AutoBackupDirectory = DataDirectory + "AutoBackup" + Path.DirectorySeparatorChar;
public static readonly string VobSubCompareDirectory = DataDirectory + "VobSub" + Path.DirectorySeparatorChar; public static readonly string VobSubCompareDirectory = DataDirectory + "VobSub" + Path.DirectorySeparatorChar;
public static readonly string TesseractDirectory = DataDirectory + "Tesseract520" + Path.DirectorySeparatorChar; public static readonly string TesseractDirectory = DataDirectory + "Tesseract530" + Path.DirectorySeparatorChar;
public static readonly string Tesseract302Directory = DataDirectory + "Tesseract302" + Path.DirectorySeparatorChar; public static readonly string Tesseract302Directory = DataDirectory + "Tesseract302" + Path.DirectorySeparatorChar;
public static readonly string WaveformsDirectory = DataDirectory + "Waveforms" + Path.DirectorySeparatorChar; public static readonly string WaveformsDirectory = DataDirectory + "Waveforms" + Path.DirectorySeparatorChar;
public static readonly string PluginsDirectory = DataDirectory + "Plugins"; public static readonly string PluginsDirectory = DataDirectory + "Plugins";
@ -227,6 +227,7 @@ namespace Nikse.SubtitleEdit.Core.Common
return "/usr/share/tessdata"; return "/usr/share/tessdata";
} }
} }
return Path.Combine(TesseractDirectory, "tessdata"); return Path.Combine(TesseractDirectory, "tessdata");
} }

View File

@ -142,8 +142,6 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
Cursor = Cursors.Default; Cursor = Cursors.Default;
labelPleaseWait.Text = string.Empty; labelPleaseWait.Text = string.Empty;
if (AutoClose) if (AutoClose)
{ {
DialogResult = DialogResult.OK; DialogResult = DialogResult.OK;

View File

@ -2,7 +2,6 @@
using Nikse.SubtitleEdit.Logic; using Nikse.SubtitleEdit.Logic;
using System; using System;
using System.IO; using System.IO;
using System.IO.Compression;
using System.Threading; using System.Threading;
using System.Windows.Forms; using System.Windows.Forms;
@ -10,7 +9,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{ {
public sealed partial class DownloadTesseract5 : Form public sealed partial class DownloadTesseract5 : Form
{ {
public const string TesseractDownloadUrl = "https://github.com/SubtitleEdit/support-files/raw/master/Tesseract520.tar.gz"; public const string TesseractDownloadUrl = "https://github.com/SubtitleEdit/support-files/raw/master/Tesseract530.zip";
private readonly CancellationTokenSource _cancellationTokenSource; private readonly CancellationTokenSource _cancellationTokenSource;
public DownloadTesseract5(string version) public DownloadTesseract5(string version)
@ -77,35 +76,18 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
Directory.CreateDirectory(dictionaryFolder); Directory.CreateDirectory(dictionaryFolder);
} }
downloadStream.Position = 0; using (var zip = ZipExtractor.Open(downloadStream))
var tempFileName = FileUtil.GetTempFileName(".tar");
using (var fs = new FileStream(tempFileName, FileMode.Create))
using (var zip = new GZipStream(downloadStream, CompressionMode.Decompress))
{ {
byte[] buffer = new byte[1024]; var dir = zip.ReadCentralDir();
int nRead; foreach (var entry in dir)
while ((nRead = zip.Read(buffer, 0, buffer.Length)) > 0)
{ {
fs.Write(buffer, 0, nRead); var path = Path.Combine(dictionaryFolder, entry.FilenameInZip);
zip.ExtractFile(entry, path);
} }
} }
using (var tr = new TarReader(tempFileName)) Cursor = Cursors.Default;
{ labelPleaseWait.Text = string.Empty;
foreach (var th in tr.Files)
{
var fn = Path.Combine(dictionaryFolder, th.FileName.Replace('/', Path.DirectorySeparatorChar));
if (th.IsFolder)
{
Directory.CreateDirectory(Path.Combine(dictionaryFolder, th.FileName.Replace('/', Path.DirectorySeparatorChar)));
}
else if (th.FileSizeInBytes > 0)
{
th.WriteData(fn);
}
}
}
File.Delete(tempFileName);
Cursor = Cursors.Default; Cursor = Cursors.Default;
DialogResult = DialogResult.OK; DialogResult = DialogResult.OK;
} }

View File

@ -329,7 +329,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
// Dictionaries/spellchecking/fixing // Dictionaries/spellchecking/fixing
private OcrFixEngine _ocrFixEngine; private OcrFixEngine _ocrFixEngine;
private int _tesseractOcrAutoFixes; private int _tesseractOcrAutoFixes;
private string Tesseract5Version = "5.2.0"; private string Tesseract5Version = "5.3.0";
private Subtitle _bdnXmlOriginal; private Subtitle _bdnXmlOriginal;
private Subtitle _bdnXmlSubtitle; private Subtitle _bdnXmlSubtitle;