Added option which allows for, not using hard coded rules in Fix common OCR errors

git-svn-id: https://subtitleedit.googlecode.com/svn/trunk@291 99eadd0c-20b8-1223-b5c4-2a2b2df33de2
This commit is contained in:
niksedk 2011-01-30 15:04:03 +00:00
parent 33d6eccedc
commit 2ac40da6c6
7 changed files with 119 additions and 96 deletions

View File

@ -291,7 +291,6 @@ namespace Nikse.SubtitleEdit.Forms
toolStripComboBoxWaveForm.SelectedIndexChanged += toolStripComboBoxWaveForm_SelectedIndexChanged;
FixLargeFonts();
//this.Width = (int)(5304958 / (8 - 8 + (this.Width) - this.Width));
_timerAddHistoryWhenDone.Interval = 500;
_timerAddHistoryWhenDone.Tick += new EventHandler(timerAddHistoryWhenDone_Tick);
}
@ -1075,6 +1074,8 @@ namespace Nikse.SubtitleEdit.Forms
AudioWaveForm.WavePeaks = null;
AudioWaveForm.Invalidate();
if (Configuration.Settings.General.ShowVideoPlayer || Configuration.Settings.General.ShowWaveForm)
{
if (!string.IsNullOrEmpty(videoFileName) && File.Exists(videoFileName))
{
OpenVideo(videoFileName);
@ -1083,6 +1084,7 @@ namespace Nikse.SubtitleEdit.Forms
{
TryToFindAndOpenVideoFile(Path.Combine(Path.GetDirectoryName(fileName), Path.GetFileNameWithoutExtension(fileName)));
}
}
videoFileLoaded = _videoFileName != null;
@ -6358,6 +6360,7 @@ namespace Nikse.SubtitleEdit.Forms
if (fi.Length < 1000)
return;
Cursor = Cursors.WaitCursor;
VideoFileName = fileName;
if (mediaPlayer.VideoPlayer != null)
{
@ -6368,7 +6371,6 @@ namespace Nikse.SubtitleEdit.Forms
VideoInfo videoInfo = ShowVideoInfo(fileName);
toolStripComboBoxFrameRate.Text = videoInfo.FramesPerSecond.ToString();
Utilities.InitializeVideoPlayerAndContainer(fileName, videoInfo, mediaPlayer, VideoLoaded, VideoEnded);
mediaPlayer.Volume = 0;
labelVideoInfo.Text = Path.GetFileName(fileName) + " " + videoInfo.Width + "x" + videoInfo.Height + " " + videoInfo.VideoCodec;
@ -6387,6 +6389,7 @@ namespace Nikse.SubtitleEdit.Forms
AudioWaveForm.SetPosition(0, _subtitle, 0, 0);
timerWaveForm.Start();
}
Cursor = Cursors.Default;
}
}
@ -6409,19 +6412,6 @@ namespace Nikse.SubtitleEdit.Forms
_videoInfo = Utilities.GetVideoInfo(fileName, delegate { Application.DoEvents(); });
var info = new FileInfo(fileName);
long fileSizeInBytes = info.Length;
//labelVideoInfo.Text = string.Format(_languageGeneral.FileNameXAndSize, fileName, Utilities.FormatBytesToDisplayFileSize(fileSizeInBytes)) + Environment.NewLine +
// string.Format(_languageGeneral.ResolutionX, +_videoInfo.Width + "x" + _videoInfo.Height) + " ";
//if (_videoInfo.FramesPerSecond > 5 && _videoInfo.FramesPerSecond < 200)
// labelVideoInfo.Text += string.Format(_languageGeneral.FrameRateX + " ", _videoInfo.FramesPerSecond);
//if (_videoInfo.TotalFrames > 10)
// labelVideoInfo.Text += string.Format(_languageGeneral.TotalFramesX + " ", (int)_videoInfo.TotalFrames);
//if (!string.IsNullOrEmpty(_videoInfo.VideoCodec))
// labelVideoInfo.Text += string.Format(_languageGeneral.VideoEncodingX, _videoInfo.VideoCodec) + " ";
//TimeSpan span = TimeSpan.FromMilliseconds(_videoInfo.TotalMilliseconds);
//_totalPositionString = " / " + string.Format("{0:00}:{1:00}:{2:00},{3:000}", span.Hours, span.Minutes, span.Seconds, span.Milliseconds);
return _videoInfo;
}
@ -6456,7 +6446,6 @@ namespace Nikse.SubtitleEdit.Forms
else
mediaPlayer.CurrentPosition = 0;
Utilities.ShowSubtitle(_subtitle.Paragraphs, videoPlayerContainer);
// ShowPosition(labelPosition, mediaPlayer);
}
}

View File

@ -33,6 +33,7 @@
this.tabControlSettings = new System.Windows.Forms.TabControl();
this.tabPageGenerel = new System.Windows.Forms.TabPage();
this.groupBoxMiscellaneous = new System.Windows.Forms.GroupBox();
this.checkBoxAutoWrapWhileTyping = new System.Windows.Forms.CheckBox();
this.labelMergeShortLines = new System.Windows.Forms.Label();
this.comboBoxMergeShortLineLength = new System.Windows.Forms.ComboBox();
this.checkBoxAllowEditOfOriginalSubtitle = new System.Windows.Forms.CheckBox();
@ -195,7 +196,7 @@
this.colorDialogSSAStyle = new System.Windows.Forms.ColorDialog();
this.fontDialogSSAStyle = new System.Windows.Forms.FontDialog();
this.labelStatus = new System.Windows.Forms.Label();
this.checkBoxAutoWrapWhileTyping = new System.Windows.Forms.CheckBox();
this.checkBoxFixCommonOcrErrorsUsingHardcodedRules = new System.Windows.Forms.CheckBox();
this.tabControlSettings.SuspendLayout();
this.tabPageGenerel.SuspendLayout();
this.groupBoxMiscellaneous.SuspendLayout();
@ -329,6 +330,16 @@
this.groupBoxMiscellaneous.TabStop = false;
this.groupBoxMiscellaneous.Text = "Miscellaneous";
//
// checkBoxAutoWrapWhileTyping
//
this.checkBoxAutoWrapWhileTyping.AutoSize = true;
this.checkBoxAutoWrapWhileTyping.Location = new System.Drawing.Point(193, 146);
this.checkBoxAutoWrapWhileTyping.Name = "checkBoxAutoWrapWhileTyping";
this.checkBoxAutoWrapWhileTyping.Size = new System.Drawing.Size(132, 17);
this.checkBoxAutoWrapWhileTyping.TabIndex = 4;
this.checkBoxAutoWrapWhileTyping.Text = "Auto-wrap while typing";
this.checkBoxAutoWrapWhileTyping.UseVisualStyleBackColor = true;
//
// labelMergeShortLines
//
this.labelMergeShortLines.AutoSize = true;
@ -352,7 +363,7 @@
this.checkBoxAllowEditOfOriginalSubtitle.AutoSize = true;
this.checkBoxAllowEditOfOriginalSubtitle.Location = new System.Drawing.Point(436, 296);
this.checkBoxAllowEditOfOriginalSubtitle.Name = "checkBoxAllowEditOfOriginalSubtitle";
this.checkBoxAllowEditOfOriginalSubtitle.Size = new System.Drawing.Size(160, 17);
this.checkBoxAllowEditOfOriginalSubtitle.Size = new System.Drawing.Size(155, 17);
this.checkBoxAllowEditOfOriginalSubtitle.TabIndex = 24;
this.checkBoxAllowEditOfOriginalSubtitle.Text = "Allow edit of original subtitle";
this.checkBoxAllowEditOfOriginalSubtitle.UseVisualStyleBackColor = true;
@ -520,7 +531,7 @@
this.checkBoxRememberWindowPosition.AutoSize = true;
this.checkBoxRememberWindowPosition.Location = new System.Drawing.Point(436, 101);
this.checkBoxRememberWindowPosition.Name = "checkBoxRememberWindowPosition";
this.checkBoxRememberWindowPosition.Size = new System.Drawing.Size(223, 17);
this.checkBoxRememberWindowPosition.Size = new System.Drawing.Size(222, 17);
this.checkBoxRememberWindowPosition.TabIndex = 15;
this.checkBoxRememberWindowPosition.Text = "Remember main window position and size";
this.checkBoxRememberWindowPosition.UseVisualStyleBackColor = true;
@ -567,7 +578,7 @@
this.checkBoxStartInSourceView.AutoSize = true;
this.checkBoxStartInSourceView.Location = new System.Drawing.Point(436, 124);
this.checkBoxStartInSourceView.Name = "checkBoxStartInSourceView";
this.checkBoxStartInSourceView.Size = new System.Drawing.Size(121, 17);
this.checkBoxStartInSourceView.Size = new System.Drawing.Size(119, 17);
this.checkBoxStartInSourceView.TabIndex = 16;
this.checkBoxStartInSourceView.Text = "Start in source view";
this.checkBoxStartInSourceView.UseVisualStyleBackColor = true;
@ -577,7 +588,7 @@
this.checkBoxReopenLastOpened.AutoSize = true;
this.checkBoxReopenLastOpened.Location = new System.Drawing.Point(444, 52);
this.checkBoxReopenLastOpened.Name = "checkBoxReopenLastOpened";
this.checkBoxReopenLastOpened.Size = new System.Drawing.Size(145, 17);
this.checkBoxReopenLastOpened.Size = new System.Drawing.Size(140, 17);
this.checkBoxReopenLastOpened.TabIndex = 13;
this.checkBoxReopenLastOpened.Text = "Start with last file loaded";
this.checkBoxReopenLastOpened.UseVisualStyleBackColor = true;
@ -587,7 +598,7 @@
this.checkBoxRememberRecentFiles.AutoSize = true;
this.checkBoxRememberRecentFiles.Location = new System.Drawing.Point(436, 28);
this.checkBoxRememberRecentFiles.Name = "checkBoxRememberRecentFiles";
this.checkBoxRememberRecentFiles.Size = new System.Drawing.Size(195, 17);
this.checkBoxRememberRecentFiles.Size = new System.Drawing.Size(188, 17);
this.checkBoxRememberRecentFiles.TabIndex = 12;
this.checkBoxRememberRecentFiles.Text = "Remember recent files (for reopen)";
this.checkBoxRememberRecentFiles.UseVisualStyleBackColor = true;
@ -598,7 +609,7 @@
this.checkBoxSubtitleFontBold.AutoSize = true;
this.checkBoxSubtitleFontBold.Location = new System.Drawing.Point(193, 269);
this.checkBoxSubtitleFontBold.Name = "checkBoxSubtitleFontBold";
this.checkBoxSubtitleFontBold.Size = new System.Drawing.Size(46, 17);
this.checkBoxSubtitleFontBold.Size = new System.Drawing.Size(47, 17);
this.checkBoxSubtitleFontBold.TabIndex = 8;
this.checkBoxSubtitleFontBold.Text = "Bold";
this.checkBoxSubtitleFontBold.UseVisualStyleBackColor = true;
@ -1410,9 +1421,9 @@
// groupBoxSpellCheck
//
this.groupBoxSpellCheck.Controls.Add(this.checkBoxSpellCheckAutoChangeNames);
this.groupBoxSpellCheck.Location = new System.Drawing.Point(7, 257);
this.groupBoxSpellCheck.Location = new System.Drawing.Point(6, 283);
this.groupBoxSpellCheck.Name = "groupBoxSpellCheck";
this.groupBoxSpellCheck.Size = new System.Drawing.Size(785, 148);
this.groupBoxSpellCheck.Size = new System.Drawing.Size(785, 122);
this.groupBoxSpellCheck.TabIndex = 4;
this.groupBoxSpellCheck.TabStop = false;
this.groupBoxSpellCheck.Text = "Spell check";
@ -1422,20 +1433,21 @@
this.checkBoxSpellCheckAutoChangeNames.AutoSize = true;
this.checkBoxSpellCheckAutoChangeNames.Location = new System.Drawing.Point(15, 20);
this.checkBoxSpellCheckAutoChangeNames.Name = "checkBoxSpellCheckAutoChangeNames";
this.checkBoxSpellCheckAutoChangeNames.Size = new System.Drawing.Size(209, 17);
this.checkBoxSpellCheckAutoChangeNames.Size = new System.Drawing.Size(216, 17);
this.checkBoxSpellCheckAutoChangeNames.TabIndex = 0;
this.checkBoxSpellCheckAutoChangeNames.Text = "Auto fix names where only casing differ";
this.checkBoxSpellCheckAutoChangeNames.UseVisualStyleBackColor = true;
//
// groupBoxFixCommonErrors
//
this.groupBoxFixCommonErrors.Controls.Add(this.checkBoxFixCommonOcrErrorsUsingHardcodedRules);
this.groupBoxFixCommonErrors.Controls.Add(this.comboBoxToolsMusicSymbol);
this.groupBoxFixCommonErrors.Controls.Add(this.textBoxMusicSymbolsToReplace);
this.groupBoxFixCommonErrors.Controls.Add(this.labelToolsMusicSymbolsToReplace);
this.groupBoxFixCommonErrors.Controls.Add(this.labelToolsMusicSymbol);
this.groupBoxFixCommonErrors.Location = new System.Drawing.Point(7, 129);
this.groupBoxFixCommonErrors.Name = "groupBoxFixCommonErrors";
this.groupBoxFixCommonErrors.Size = new System.Drawing.Size(785, 121);
this.groupBoxFixCommonErrors.Size = new System.Drawing.Size(785, 148);
this.groupBoxFixCommonErrors.TabIndex = 3;
this.groupBoxFixCommonErrors.TabStop = false;
this.groupBoxFixCommonErrors.Text = "Fix common errors";
@ -1452,7 +1464,7 @@
this.comboBoxToolsMusicSymbol.Location = new System.Drawing.Point(199, 71);
this.comboBoxToolsMusicSymbol.Name = "comboBoxToolsMusicSymbol";
this.comboBoxToolsMusicSymbol.Size = new System.Drawing.Size(86, 21);
this.comboBoxToolsMusicSymbol.TabIndex = 36;
this.comboBoxToolsMusicSymbol.TabIndex = 1;
//
// textBoxMusicSymbolsToReplace
//
@ -1460,7 +1472,7 @@
this.textBoxMusicSymbolsToReplace.MaxLength = 100;
this.textBoxMusicSymbolsToReplace.Name = "textBoxMusicSymbolsToReplace";
this.textBoxMusicSymbolsToReplace.Size = new System.Drawing.Size(274, 21);
this.textBoxMusicSymbolsToReplace.TabIndex = 35;
this.textBoxMusicSymbolsToReplace.TabIndex = 0;
//
// labelToolsMusicSymbolsToReplace
//
@ -1516,7 +1528,7 @@
this.comboBoxToolsEndSceneIndex.Location = new System.Drawing.Point(200, 76);
this.comboBoxToolsEndSceneIndex.Name = "comboBoxToolsEndSceneIndex";
this.comboBoxToolsEndSceneIndex.Size = new System.Drawing.Size(73, 21);
this.comboBoxToolsEndSceneIndex.TabIndex = 28;
this.comboBoxToolsEndSceneIndex.TabIndex = 2;
//
// labelToolsStartScene
//
@ -1539,7 +1551,7 @@
this.comboBoxToolsStartSceneIndex.Location = new System.Drawing.Point(200, 49);
this.comboBoxToolsStartSceneIndex.Name = "comboBoxToolsStartSceneIndex";
this.comboBoxToolsStartSceneIndex.Size = new System.Drawing.Size(73, 21);
this.comboBoxToolsStartSceneIndex.TabIndex = 26;
this.comboBoxToolsStartSceneIndex.TabIndex = 1;
//
// comboBoxToolsVerifySeconds
//
@ -1553,7 +1565,7 @@
this.comboBoxToolsVerifySeconds.Location = new System.Drawing.Point(200, 22);
this.comboBoxToolsVerifySeconds.Name = "comboBoxToolsVerifySeconds";
this.comboBoxToolsVerifySeconds.Size = new System.Drawing.Size(73, 21);
this.comboBoxToolsVerifySeconds.TabIndex = 21;
this.comboBoxToolsVerifySeconds.TabIndex = 0;
//
// labelVerifyButton
//
@ -1988,15 +2000,15 @@
this.labelStatus.TabIndex = 3;
this.labelStatus.Text = "labelStatus";
//
// checkBoxAutoWrapWhileTyping
// checkBoxFixCommonOcrErrorsUsingHardcodedRules
//
this.checkBoxAutoWrapWhileTyping.AutoSize = true;
this.checkBoxAutoWrapWhileTyping.Location = new System.Drawing.Point(193, 146);
this.checkBoxAutoWrapWhileTyping.Name = "checkBoxAutoWrapWhileTyping";
this.checkBoxAutoWrapWhileTyping.Size = new System.Drawing.Size(137, 17);
this.checkBoxAutoWrapWhileTyping.TabIndex = 4;
this.checkBoxAutoWrapWhileTyping.Text = "Auto-wrap while typing";
this.checkBoxAutoWrapWhileTyping.UseVisualStyleBackColor = true;
this.checkBoxFixCommonOcrErrorsUsingHardcodedRules.AutoSize = true;
this.checkBoxFixCommonOcrErrorsUsingHardcodedRules.Location = new System.Drawing.Point(15, 115);
this.checkBoxFixCommonOcrErrorsUsingHardcodedRules.Name = "checkBoxFixCommonOcrErrorsUsingHardcodedRules";
this.checkBoxFixCommonOcrErrorsUsingHardcodedRules.Size = new System.Drawing.Size(268, 17);
this.checkBoxFixCommonOcrErrorsUsingHardcodedRules.TabIndex = 2;
this.checkBoxFixCommonOcrErrorsUsingHardcodedRules.Text = "Fix common OCR errors - also use hardcoded rules";
this.checkBoxFixCommonOcrErrorsUsingHardcodedRules.UseVisualStyleBackColor = true;
//
// Settings
//
@ -2247,5 +2259,6 @@
private System.Windows.Forms.Label labelMergeShortLines;
private System.Windows.Forms.ComboBox comboBoxMergeShortLineLength;
private System.Windows.Forms.CheckBox checkBoxAutoWrapWhileTyping;
private System.Windows.Forms.CheckBox checkBoxFixCommonOcrErrorsUsingHardcodedRules;
}
}

View File

@ -378,7 +378,7 @@ namespace Nikse.SubtitleEdit.Forms
}
textBoxMusicSymbolsToReplace.Text = toolsSettings.MusicSymbolToReplace;
checkBoxFixCommonOcrErrorsUsingHardcodedRules.Checked = toolsSettings.OcrFixUseHardcodedRules;
checkBoxSpellCheckAutoChangeNames.Checked = toolsSettings.SpellCheckAutoChangeNames;
@ -617,6 +617,7 @@ namespace Nikse.SubtitleEdit.Forms
toolsSettings.MusicSymbol = comboBoxToolsMusicSymbol.SelectedItem.ToString();
toolsSettings.MusicSymbolToReplace = textBoxMusicSymbolsToReplace.Text;
toolsSettings.SpellCheckAutoChangeNames = checkBoxSpellCheckAutoChangeNames.Checked;
toolsSettings.OcrFixUseHardcodedRules = checkBoxFixCommonOcrErrorsUsingHardcodedRules.Checked;
WordListSettings wordListSettings = Configuration.Settings.WordLists;
wordListSettings.UseOnlineNamesEtc = checkBoxNamesEtcOnline.Checked;

View File

@ -1142,6 +1142,7 @@ can edit in same subtitle file (collaboration)",
MergeLinesShorterThan = "Merge lines shorter than",
MusicSymbol = "Music symbol",
MusicSymbolsToReplace = "Music symbols to replace (separate by space)",
FixCommonOcrErrorsUseHardcodedRules = "Fix common OCR errors - also use hardcoded rules",
};
ShowEarlierLater = new LanguageStructure.ShowEarlierLater

View File

@ -1076,6 +1076,7 @@
public string MergeLinesShorterThan { get; set; }
public string MusicSymbol { get; set; }
public string MusicSymbolsToReplace { get; set; }
public string FixCommonOcrErrorsUseHardcodedRules { get; set; }
}
public class ShowEarlierLater

View File

@ -247,6 +247,8 @@ namespace Nikse.SubtitleEdit.Logic.OCR
private string FixCommonWordErrors(string word, string lastWord)
{
if (Configuration.Settings.Tools.OcrFixUseHardcodedRules)
{
while (word.Contains("--"))
word = word.Replace("--", "-");
@ -275,6 +277,7 @@ namespace Nikse.SubtitleEdit.Logic.OCR
if (regex.IsMatch(word))
word = word.Replace("¤", "o");
}
}
string pre = string.Empty;
string post = string.Empty;
@ -359,6 +362,8 @@ namespace Nikse.SubtitleEdit.Logic.OCR
return _wordReplaceList[from];
}
if (Configuration.Settings.Tools.OcrFixUseHardcodedRules)
{
// uppercase I or 1 inside lowercase word (will be replaced by lowercase L)
word = FixIor1InsideLowerCaseWord(word);
@ -369,6 +374,7 @@ namespace Nikse.SubtitleEdit.Logic.OCR
word = FixIor1InsideLowerCaseWord(word);
word = FixLowerCaseLInsideUpperCaseWord(word); // eg. SCARLETTl => SCARLETTI
}
// Retry word replace list
foreach (string from in _wordReplaceList.Keys)
@ -506,6 +512,8 @@ namespace Nikse.SubtitleEdit.Logic.OCR
input = FixOcrErrorsViaHardcodedRules(input, lastLine, _abbreviationList);
input = FixOcrErrorViaLineReplaceList(input);
if (Configuration.Settings.Tools.OcrFixUseHardcodedRules)
{
// e.g. "selectionsu." -> "selections..."
if (input.EndsWith("u.") && _hunspell != null)
{
@ -525,6 +533,7 @@ namespace Nikse.SubtitleEdit.Logic.OCR
{
input = input.Replace(".'", Configuration.Settings.Tools.MusicSymbol);
}
}
return input;
}
@ -547,6 +556,9 @@ namespace Nikse.SubtitleEdit.Logic.OCR
public static string FixOcrErrorsViaHardcodedRules(string input, string lastLine, List<string> abbreviationList)
{
if (!Configuration.Settings.Tools.OcrFixUseHardcodedRules)
return input;
if (lastLine == null ||
lastLine.EndsWith(".") ||
lastLine.EndsWith("!") ||

View File

@ -77,6 +77,7 @@ namespace Nikse.SubtitleEdit.Logic
public string MusicSymbol { get; set; }
public string MusicSymbolToReplace { get; set; }
public bool SpellCheckAutoChangeNames { get; set; }
public bool OcrFixUseHardcodedRules { get; set; }
public string Interjections { get; set; }
public ToolsSettings()
@ -88,6 +89,7 @@ namespace Nikse.SubtitleEdit.Logic
MusicSymbol = "♪";
MusicSymbolToReplace = "♪ ⶠ♪ âTª ã¢â™âª ?t×3 ?t¤3";
SpellCheckAutoChangeNames = true;
OcrFixUseHardcodedRules = true;
Interjections = "Ah;Ahh;Ahhh;Eh;Ehh;Ehhh;Hm;Hmm;Hmmm;Phew;Gah;Oh;Ohh;Ohhh;Ow;Oww;Owww;Ugh;Ughh;Uh;Uhh;Uhhh;Whew";
}
}
@ -679,6 +681,9 @@ namespace Nikse.SubtitleEdit.Logic
subNode = node.SelectSingleNode("SpellCheckAutoChangeNames");
if (subNode != null)
settings.Tools.SpellCheckAutoChangeNames = Convert.ToBoolean(subNode.InnerText);
subNode = node.SelectSingleNode("OcrFixUseHardcodedRules");
if (subNode != null)
settings.Tools.OcrFixUseHardcodedRules = Convert.ToBoolean(subNode.InnerText);
subNode = node.SelectSingleNode("Interjections");
if (subNode != null)
settings.Tools.Interjections = subNode.InnerText;
@ -979,6 +984,7 @@ namespace Nikse.SubtitleEdit.Logic
textWriter.WriteElementString("MusicSymbol", settings.Tools.MusicSymbol);
textWriter.WriteElementString("MusicSymbolToReplace", settings.Tools.MusicSymbolToReplace);
textWriter.WriteElementString("SpellCheckAutoChangeNames", settings.Tools.SpellCheckAutoChangeNames.ToString());
textWriter.WriteElementString("OcrFixUseHardcodedRules", settings.Tools.OcrFixUseHardcodedRules.ToString());
textWriter.WriteElementString("Interjections", settings.Tools.Interjections);
textWriter.WriteEndElement();