diff --git a/Changelog.txt b/Changelog.txt index 9bf4c115e..785dc2b20 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -10,6 +10,7 @@ * Add new subtitle format - thx Chris * Add ASSA style "Replace" - thx Mishasama * Add "audio to text selected lines" shortcuts + * Add Vosk Korean/Uzbek models * IMPROVED: * Update Chinese translation - thx nkh0472 * Update Korean translation - thx domddol @@ -31,6 +32,7 @@ * Remember "Merge lines with same text" settings - thx kamitsu2004 * Update Whisper CPP to v1.0.4 * Rename "Fix dialogs on one line" to "Break dialogs on one line" + * Update Tesseract 5.2.0 to 5.3.0 * FIXED: * Fix italic in DCinema interop - thx Andrey * Fix Whisper model download - thx darnn diff --git a/src/libse/Common/Configuration.cs b/src/libse/Common/Configuration.cs index e1104e6b8..7c15a5976 100644 --- a/src/libse/Common/Configuration.cs +++ b/src/libse/Common/Configuration.cs @@ -24,7 +24,7 @@ namespace Nikse.SubtitleEdit.Core.Common public static readonly string ShotChangesDirectory = DataDirectory + "ShotChanges" + Path.DirectorySeparatorChar; public static readonly string AutoBackupDirectory = DataDirectory + "AutoBackup" + Path.DirectorySeparatorChar; public static readonly string VobSubCompareDirectory = DataDirectory + "VobSub" + Path.DirectorySeparatorChar; - public static readonly string TesseractDirectory = DataDirectory + "Tesseract520" + Path.DirectorySeparatorChar; + public static readonly string TesseractDirectory = DataDirectory + "Tesseract530" + Path.DirectorySeparatorChar; public static readonly string Tesseract302Directory = DataDirectory + "Tesseract302" + Path.DirectorySeparatorChar; public static readonly string WaveformsDirectory = DataDirectory + "Waveforms" + Path.DirectorySeparatorChar; public static readonly string PluginsDirectory = DataDirectory + "Plugins"; @@ -227,6 +227,7 @@ namespace Nikse.SubtitleEdit.Core.Common return "/usr/share/tessdata"; } } + return Path.Combine(TesseractDirectory, "tessdata"); } diff --git a/src/ui/Forms/AudioToText/VoskModelDownload.cs b/src/ui/Forms/AudioToText/VoskModelDownload.cs index f833ea183..f88cf1c4a 100644 --- a/src/ui/Forms/AudioToText/VoskModelDownload.cs +++ b/src/ui/Forms/AudioToText/VoskModelDownload.cs @@ -142,8 +142,6 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText Cursor = Cursors.Default; labelPleaseWait.Text = string.Empty; - - if (AutoClose) { DialogResult = DialogResult.OK; diff --git a/src/ui/Forms/Ocr/DownloadTesseract5.cs b/src/ui/Forms/Ocr/DownloadTesseract5.cs index f836be5ff..75da65fd6 100644 --- a/src/ui/Forms/Ocr/DownloadTesseract5.cs +++ b/src/ui/Forms/Ocr/DownloadTesseract5.cs @@ -2,7 +2,6 @@ using Nikse.SubtitleEdit.Logic; using System; using System.IO; -using System.IO.Compression; using System.Threading; using System.Windows.Forms; @@ -10,7 +9,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { public sealed partial class DownloadTesseract5 : Form { - public const string TesseractDownloadUrl = "https://github.com/SubtitleEdit/support-files/raw/master/Tesseract520.tar.gz"; + public const string TesseractDownloadUrl = "https://github.com/SubtitleEdit/support-files/raw/master/Tesseract530.zip"; private readonly CancellationTokenSource _cancellationTokenSource; public DownloadTesseract5(string version) @@ -77,35 +76,18 @@ namespace Nikse.SubtitleEdit.Forms.Ocr Directory.CreateDirectory(dictionaryFolder); } - downloadStream.Position = 0; - var tempFileName = FileUtil.GetTempFileName(".tar"); - using (var fs = new FileStream(tempFileName, FileMode.Create)) - using (var zip = new GZipStream(downloadStream, CompressionMode.Decompress)) + using (var zip = ZipExtractor.Open(downloadStream)) { - byte[] buffer = new byte[1024]; - int nRead; - while ((nRead = zip.Read(buffer, 0, buffer.Length)) > 0) + var dir = zip.ReadCentralDir(); + foreach (var entry in dir) { - fs.Write(buffer, 0, nRead); + var path = Path.Combine(dictionaryFolder, entry.FilenameInZip); + zip.ExtractFile(entry, path); } } - using (var tr = new TarReader(tempFileName)) - { - foreach (var th in tr.Files) - { - var fn = Path.Combine(dictionaryFolder, th.FileName.Replace('/', Path.DirectorySeparatorChar)); - if (th.IsFolder) - { - Directory.CreateDirectory(Path.Combine(dictionaryFolder, th.FileName.Replace('/', Path.DirectorySeparatorChar))); - } - else if (th.FileSizeInBytes > 0) - { - th.WriteData(fn); - } - } - } - File.Delete(tempFileName); + Cursor = Cursors.Default; + labelPleaseWait.Text = string.Empty; Cursor = Cursors.Default; DialogResult = DialogResult.OK; } diff --git a/src/ui/Forms/Ocr/VobSubOcr.cs b/src/ui/Forms/Ocr/VobSubOcr.cs index 50352ae27..ebe2053c4 100644 --- a/src/ui/Forms/Ocr/VobSubOcr.cs +++ b/src/ui/Forms/Ocr/VobSubOcr.cs @@ -329,7 +329,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr // Dictionaries/spellchecking/fixing private OcrFixEngine _ocrFixEngine; private int _tesseractOcrAutoFixes; - private string Tesseract5Version = "5.2.0"; + private string Tesseract5Version = "5.3.0"; private Subtitle _bdnXmlOriginal; private Subtitle _bdnXmlSubtitle;