Ocr via image compare spell check dialog - now removes italic tags

git-svn-id: https://subtitleedit.googlecode.com/svn/trunk@515 99eadd0c-20b8-1223-b5c4-2a2b2df33de2
This commit is contained in:
niksedk 2011-06-22 19:15:34 +00:00
parent 136e299432
commit 03e7b3cfdf
5 changed files with 30 additions and 5 deletions

View File

@ -20,6 +20,8 @@ Subtitle Edit Changelog
* New customizable shortcut for Adjust mode: Set current subtitle end time at video pos, keep duration, and go to next sub (Shift+End, thx Bavo)
* IMPROVED:
* OCR unknown words highlighting now works better (thx sialivi)
* OCR image compare UI improved with "Edit last char" and more (thx Zoltán)
* OCR image compare: Compare more effective + word breaking improved
* Merge short lines can now also merge lines ending with ".", "?", or "!" (optional)
* Can now read Adobe Encore files starting with line numbers (thx Pier)
* Advanced Sub Station Alpha (and SSA) now keep original styles (thx Rebecca)

View File

@ -43,7 +43,8 @@ namespace Nikse.SubtitleEdit.Forms
SourceVideoFileName = labelVideoFileName.Text;
string targetFile = Path.GetTempFileName() + ".wav";
string parameters = "-I dummy -vvv \"" + SourceVideoFileName + "\" --sout=#transcode{vcodec=none,acodec=s16l}:file{dst=\"" + targetFile + "\"} vlc://quit";
// string parameters = "-I dummy -vvv \"" + SourceVideoFileName + "\" --sout=#transcode{vcodec=none,acodec=s16l}:file{dst=\"" + targetFile + "\"} vlc://quit";
string parameters = "-I dummy -vvv --no-sout-video --sout #transcode{acodec=s16l}:std{mux=wav,access=file,dst=\"" + targetFile + "\"} \"" + SourceVideoFileName + "\" vlc://quit";
string vlcPath;
if (Utilities.IsRunningOnLinux() || Utilities.IsRunningOnMac())
@ -115,6 +116,21 @@ namespace Nikse.SubtitleEdit.Forms
buttonRipWave.Enabled = true;
return;
}
FileInfo fi = new FileInfo(targetFile);
if (fi.Length <= 200)
{
MessageBox.Show("Sorry! VLC was unable to extract audio to wave file via this command line:" + Environment.NewLine
+ Environment.NewLine +
"Command line: " + vlcPath + " " + parameters);
labelPleaseWait.Visible = false;
labelProgress.Text = string.Empty;
buttonRipWave.Enabled = true;
return;
}
ReadWaveFile(targetFile);
labelProgress.Text = string.Empty;
File.Delete(targetFile);

View File

@ -684,7 +684,7 @@
AAEAAAD/////AQAAAAAAAAAMAgAAAFdTeXN0ZW0uV2luZG93cy5Gb3JtcywgVmVyc2lvbj00LjAuMC4w
LCBDdWx0dXJlPW5ldXRyYWwsIFB1YmxpY0tleVRva2VuPWI3N2E1YzU2MTkzNGUwODkFAQAAACZTeXN0
ZW0uV2luZG93cy5Gb3Jtcy5JbWFnZUxpc3RTdHJlYW1lcgEAAAAERGF0YQcCAgAAAAkDAAAADwMAAAD2
CAAAAk1TRnQBSQFMAgEBAgEAAbABCAGwAQgBEAEAARABAAT/AQkBAAj/AUIBTQE2AQQGAAE2AQQCAAEo
CAAAAk1TRnQBSQFMAgEBAgEAAbgBCAG4AQgBEAEAARABAAT/AQkBAAj/AUIBTQE2AQQGAAE2AQQCAAEo
AwABQAMAARADAAEBAQABCAYAAQQYAAGAAgABgAMAAoABAAGAAwABgAEAAYABAAKAAgADwAEAAcAB3AHA
AQAB8AHKAaYBAAEzBQABMwEAATMBAAEzAQACMwIAAxYBAAMcAQADIgEAAykBAANVAQADTQEAA0IBAAM5
AQABgAF8Af8BAAJQAf8BAAGTAQAB1gEAAf8B7AHMAQABxgHWAe8BAAHWAucBAAGQAakBrQIAAf8BMwMA

View File

@ -2173,7 +2173,8 @@ namespace Nikse.SubtitleEdit.Forms
private void LoadOcrFixEngine()
{
_languageId = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Id;
if (comboBoxTesseractLanguages.SelectedItem != null)
_languageId = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Id;
_ocrFixEngine = new OcrFixEngine(_languageId, this);
if (_ocrFixEngine.IsDictionaryLoaded)
{

View File

@ -323,8 +323,8 @@ namespace Nikse.SubtitleEdit.Logic.OCR
string oldText = text;
text = FixCommonErrors.FixAloneLowercaseIToUppercaseLine(regexAloneI, oldText, text, 'i');
text = FixCommonErrors.FixAloneLowercaseIToUppercaseLine(regexAloneIAsL, oldText, text, 'l');
text = RemoveSpaceBetweenNumbers(text);
}
text = RemoveSpaceBetweenNumbers(text);
}
return text;
}
@ -340,7 +340,6 @@ namespace Nikse.SubtitleEdit.Logic.OCR
return text;
}
private string FixCommonWordErrors(string word, string lastWord)
{
if (Configuration.Settings.Tools.OcrFixUseHardcodedRules)
@ -1026,7 +1025,14 @@ namespace Nikse.SubtitleEdit.Logic.OCR
if (word != "Lt'S" && word != "Sox's") //TODO: get fixed nhunspell
suggestions = DoSuggest(word); // 0.9.6 fails on "Lt'S"
if (word.StartsWith("<i>"))
word = word.Remove(0, 3);
if (word.EndsWith("</i>"))
word = word.Remove(word.Length-4, 4);
SpellcheckOcrTextResult res = SpellcheckOcrText(line, bitmap, words, i, word, suggestions);
if (res.FixedWholeLine)
{
return res.Line;