mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-10-27 22:42:38 +01:00
Ocr via image compare spell check dialog - now removes italic tags
git-svn-id: https://subtitleedit.googlecode.com/svn/trunk@515 99eadd0c-20b8-1223-b5c4-2a2b2df33de2
This commit is contained in:
parent
136e299432
commit
03e7b3cfdf
@ -20,6 +20,8 @@ Subtitle Edit Changelog
|
||||
* New customizable shortcut for Adjust mode: Set current subtitle end time at video pos, keep duration, and go to next sub (Shift+End, thx Bavo)
|
||||
* IMPROVED:
|
||||
* OCR unknown words highlighting now works better (thx sialivi)
|
||||
* OCR image compare UI improved with "Edit last char" and more (thx Zoltán)
|
||||
* OCR image compare: Compare more effective + word breaking improved
|
||||
* Merge short lines can now also merge lines ending with ".", "?", or "!" (optional)
|
||||
* Can now read Adobe Encore files starting with line numbers (thx Pier)
|
||||
* Advanced Sub Station Alpha (and SSA) now keep original styles (thx Rebecca)
|
||||
|
@ -43,7 +43,8 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
SourceVideoFileName = labelVideoFileName.Text;
|
||||
string targetFile = Path.GetTempFileName() + ".wav";
|
||||
string parameters = "-I dummy -vvv \"" + SourceVideoFileName + "\" --sout=#transcode{vcodec=none,acodec=s16l}:file{dst=\"" + targetFile + "\"} vlc://quit";
|
||||
// string parameters = "-I dummy -vvv \"" + SourceVideoFileName + "\" --sout=#transcode{vcodec=none,acodec=s16l}:file{dst=\"" + targetFile + "\"} vlc://quit";
|
||||
string parameters = "-I dummy -vvv --no-sout-video --sout #transcode{acodec=s16l}:std{mux=wav,access=file,dst=\"" + targetFile + "\"} \"" + SourceVideoFileName + "\" vlc://quit";
|
||||
|
||||
string vlcPath;
|
||||
if (Utilities.IsRunningOnLinux() || Utilities.IsRunningOnMac())
|
||||
@ -115,6 +116,21 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
buttonRipWave.Enabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
FileInfo fi = new FileInfo(targetFile);
|
||||
if (fi.Length <= 200)
|
||||
{
|
||||
MessageBox.Show("Sorry! VLC was unable to extract audio to wave file via this command line:" + Environment.NewLine
|
||||
+ Environment.NewLine +
|
||||
"Command line: " + vlcPath + " " + parameters);
|
||||
|
||||
labelPleaseWait.Visible = false;
|
||||
labelProgress.Text = string.Empty;
|
||||
buttonRipWave.Enabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
ReadWaveFile(targetFile);
|
||||
labelProgress.Text = string.Empty;
|
||||
File.Delete(targetFile);
|
||||
|
@ -684,7 +684,7 @@
|
||||
AAEAAAD/////AQAAAAAAAAAMAgAAAFdTeXN0ZW0uV2luZG93cy5Gb3JtcywgVmVyc2lvbj00LjAuMC4w
|
||||
LCBDdWx0dXJlPW5ldXRyYWwsIFB1YmxpY0tleVRva2VuPWI3N2E1YzU2MTkzNGUwODkFAQAAACZTeXN0
|
||||
ZW0uV2luZG93cy5Gb3Jtcy5JbWFnZUxpc3RTdHJlYW1lcgEAAAAERGF0YQcCAgAAAAkDAAAADwMAAAD2
|
||||
CAAAAk1TRnQBSQFMAgEBAgEAAbABCAGwAQgBEAEAARABAAT/AQkBAAj/AUIBTQE2AQQGAAE2AQQCAAEo
|
||||
CAAAAk1TRnQBSQFMAgEBAgEAAbgBCAG4AQgBEAEAARABAAT/AQkBAAj/AUIBTQE2AQQGAAE2AQQCAAEo
|
||||
AwABQAMAARADAAEBAQABCAYAAQQYAAGAAgABgAMAAoABAAGAAwABgAEAAYABAAKAAgADwAEAAcAB3AHA
|
||||
AQAB8AHKAaYBAAEzBQABMwEAATMBAAEzAQACMwIAAxYBAAMcAQADIgEAAykBAANVAQADTQEAA0IBAAM5
|
||||
AQABgAF8Af8BAAJQAf8BAAGTAQAB1gEAAf8B7AHMAQABxgHWAe8BAAHWAucBAAGQAakBrQIAAf8BMwMA
|
||||
|
@ -2173,6 +2173,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
private void LoadOcrFixEngine()
|
||||
{
|
||||
if (comboBoxTesseractLanguages.SelectedItem != null)
|
||||
_languageId = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Id;
|
||||
_ocrFixEngine = new OcrFixEngine(_languageId, this);
|
||||
if (_ocrFixEngine.IsDictionaryLoaded)
|
||||
|
@ -323,8 +323,8 @@ namespace Nikse.SubtitleEdit.Logic.OCR
|
||||
string oldText = text;
|
||||
text = FixCommonErrors.FixAloneLowercaseIToUppercaseLine(regexAloneI, oldText, text, 'i');
|
||||
text = FixCommonErrors.FixAloneLowercaseIToUppercaseLine(regexAloneIAsL, oldText, text, 'l');
|
||||
text = RemoveSpaceBetweenNumbers(text);
|
||||
}
|
||||
text = RemoveSpaceBetweenNumbers(text);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
@ -340,7 +340,6 @@ namespace Nikse.SubtitleEdit.Logic.OCR
|
||||
return text;
|
||||
}
|
||||
|
||||
|
||||
private string FixCommonWordErrors(string word, string lastWord)
|
||||
{
|
||||
if (Configuration.Settings.Tools.OcrFixUseHardcodedRules)
|
||||
@ -1026,7 +1025,14 @@ namespace Nikse.SubtitleEdit.Logic.OCR
|
||||
if (word != "Lt'S" && word != "Sox's") //TODO: get fixed nhunspell
|
||||
suggestions = DoSuggest(word); // 0.9.6 fails on "Lt'S"
|
||||
|
||||
if (word.StartsWith("<i>"))
|
||||
word = word.Remove(0, 3);
|
||||
|
||||
if (word.EndsWith("</i>"))
|
||||
word = word.Remove(word.Length-4, 4);
|
||||
|
||||
SpellcheckOcrTextResult res = SpellcheckOcrText(line, bitmap, words, i, word, suggestions);
|
||||
|
||||
if (res.FixedWholeLine)
|
||||
{
|
||||
return res.Line;
|
||||
|
Loading…
Reference in New Issue
Block a user