diff --git a/src/Forms/FixCommonErrors.cs b/src/Forms/FixCommonErrors.cs
index b3c7331b0..26c22de9f 100644
--- a/src/Forms/FixCommonErrors.cs
+++ b/src/Forms/FixCommonErrors.cs
@@ -2737,14 +2737,14 @@ namespace Nikse.SubtitleEdit.Forms
public void FixOcrErrorsViaReplaceList(string threeLetterISOLanguageName)
{
- OcrFixEngine ocrFixEngine = new OcrFixEngine(threeLetterISOLanguageName, null, this);
+ var ocrFixEngine = new OcrFixEngine(threeLetterISOLanguageName, null, this);
string fixAction = _language.FixCommonOcrErrors;
int noOfFixes = 0;
string lastLine = string.Empty;
for (int i = 0; i < _subtitle.Paragraphs.Count; i++)
{
- Paragraph p = _subtitle.Paragraphs[i];
- string text = ocrFixEngine.FixOcrErrors(p.Text, i, lastLine, false, false);
+ var p = _subtitle.Paragraphs[i];
+ string text = ocrFixEngine.FixOcrErrors(p.Text, i, lastLine, false, OcrFixEngine.AutoGuessLevel.Cautious);
lastLine = text;
if (p.Text != text)
{
diff --git a/src/Forms/VobSubOcr.cs b/src/Forms/VobSubOcr.cs
index bce3e726d..49a6ce63e 100644
--- a/src/Forms/VobSubOcr.cs
+++ b/src/Forms/VobSubOcr.cs
@@ -3915,7 +3915,8 @@ namespace Nikse.SubtitleEdit.Forms
if (_ocrFixEngine != null && _ocrFixEngine.IsDictionaryLoaded)
{
if (checkBoxAutoFixCommonErrors.Checked)
- line = _ocrFixEngine.FixOcrErrors(line, listViewIndex, _lastLine, true, checkBoxGuessUnknownWords.Checked);
+ line = _ocrFixEngine.FixOcrErrors(line, listViewIndex, _lastLine, true, GetAutoGuessLevel());
+
int correctWords;
int wordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(line, out correctWords);
@@ -3923,7 +3924,7 @@ namespace Nikse.SubtitleEdit.Forms
{
_ocrFixEngine.AutoGuessesUsed.Clear();
_ocrFixEngine.UnknownWordsFound.Clear();
- line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, listViewIndex, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, checkBoxGuessUnknownWords.Checked);
+ line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, listViewIndex, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel());
}
if (_ocrFixEngine.Abort)
@@ -4120,8 +4121,12 @@ namespace Nikse.SubtitleEdit.Forms
string textWithOutFixes = line;
if (_ocrFixEngine.IsDictionaryLoaded)
{
+ var autoGuessLevel = OcrFixEngine.AutoGuessLevel.None;
+ if (checkBoxGuessUnknownWords.Checked)
+ autoGuessLevel = OcrFixEngine.AutoGuessLevel.Aggressive;
+
if (checkBoxAutoFixCommonErrors.Checked)
- line = _ocrFixEngine.FixOcrErrors(line, listViewIndex, _lastLine, true, checkBoxGuessUnknownWords.Checked);
+ line = _ocrFixEngine.FixOcrErrors(line, listViewIndex, _lastLine, true, autoGuessLevel);
int correctWords;
int wordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(line, out correctWords);
@@ -4129,7 +4134,7 @@ namespace Nikse.SubtitleEdit.Forms
{
_ocrFixEngine.AutoGuessesUsed.Clear();
_ocrFixEngine.UnknownWordsFound.Clear();
- line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, listViewIndex, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, checkBoxGuessUnknownWords.Checked);
+ line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, listViewIndex, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, autoGuessLevel);
}
if (_ocrFixEngine.Abort)
@@ -4427,7 +4432,7 @@ namespace Nikse.SubtitleEdit.Forms
if (_ocrFixEngine.IsDictionaryLoaded)
{
if (checkBoxAutoFixCommonErrors.Checked)
- line = _ocrFixEngine.FixOcrErrors(line, listViewIndex, _lastLine, true, checkBoxGuessUnknownWords.Checked);
+ line = _ocrFixEngine.FixOcrErrors(line, listViewIndex, _lastLine, true, GetAutoGuessLevel());
int correctWords;
int wordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(line, out correctWords);
@@ -4435,7 +4440,7 @@ namespace Nikse.SubtitleEdit.Forms
{
_ocrFixEngine.AutoGuessesUsed.Clear();
_ocrFixEngine.UnknownWordsFound.Clear();
- line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, listViewIndex, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, checkBoxGuessUnknownWords.Checked);
+ line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, listViewIndex, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel());
}
if (_ocrFixEngine.Abort)
@@ -5930,7 +5935,7 @@ namespace Nikse.SubtitleEdit.Forms
if (_ocrFixEngine.IsDictionaryLoaded)
{
if (checkBoxAutoFixCommonErrors.Checked)
- line = _ocrFixEngine.FixOcrErrors(line, index, _lastLine, true, checkBoxGuessUnknownWords.Checked);
+ line = _ocrFixEngine.FixOcrErrors(line, index, _lastLine, true, GetAutoGuessLevel());
int correctWords;
int wordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(line, out correctWords);
int oldCorrectWords = correctWords;
@@ -5942,7 +5947,7 @@ namespace Nikse.SubtitleEdit.Forms
_ocrFixEngine.UnknownWordsFound.Clear();
string newUnfixedText = TesseractResizeAndRetry(bitmap);
- string newText = _ocrFixEngine.FixOcrErrors(newUnfixedText, index, _lastLine, true, checkBoxGuessUnknownWords.Checked);
+ string newText = _ocrFixEngine.FixOcrErrors(newUnfixedText, index, _lastLine, true, GetAutoGuessLevel());
int newWordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(newText, out correctWords);
if (wordsNotFound == 1 && newWordsNotFound == 1 && newUnfixedText.EndsWith("!!") && textWithOutFixes.EndsWith("u") && newText.Length > 1)
@@ -6015,7 +6020,7 @@ namespace Nikse.SubtitleEdit.Forms
int modiWordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(oneColorText, out modiCorrectWords);
string modiTextOcrFixed = oneColorText;
if (checkBoxAutoFixCommonErrors.Checked)
- modiTextOcrFixed = _ocrFixEngine.FixOcrErrors(oneColorText, index, _lastLine, false, checkBoxGuessUnknownWords.Checked);
+ modiTextOcrFixed = _ocrFixEngine.FixOcrErrors(oneColorText, index, _lastLine, false, GetAutoGuessLevel());
int modiOcrCorrectedCorrectWords;
int modiOcrCorrectedWordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(modiTextOcrFixed, out modiOcrCorrectedCorrectWords);
if (modiOcrCorrectedWordsNotFound <= modiWordsNotFound)
@@ -6031,7 +6036,7 @@ namespace Nikse.SubtitleEdit.Forms
wordsNotFound = modiWordsNotFound;
correctWords = modiCorrectWords;
if (checkBoxAutoFixCommonErrors.Checked)
- line = _ocrFixEngine.FixOcrErrors(line, index, _lastLine, true, checkBoxGuessUnknownWords.Checked);
+ line = _ocrFixEngine.FixOcrErrors(line, index, _lastLine, true, GetAutoGuessLevel());
}
else if (wordsNotFound == modiWordsNotFound && oneColorText.EndsWith("!") && (line.EndsWith("l") || line.EndsWith("fl")))
{
@@ -6039,7 +6044,7 @@ namespace Nikse.SubtitleEdit.Forms
wordsNotFound = modiWordsNotFound;
correctWords = modiCorrectWords;
if (checkBoxAutoFixCommonErrors.Checked)
- line = _ocrFixEngine.FixOcrErrors(line, index, _lastLine, true, checkBoxGuessUnknownWords.Checked);
+ line = _ocrFixEngine.FixOcrErrors(line, index, _lastLine, true, GetAutoGuessLevel());
}
}
}
@@ -6063,7 +6068,7 @@ namespace Nikse.SubtitleEdit.Forms
int modiWordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(unItalicText, out modiCorrectWords);
string modiTextOcrFixed = unItalicText;
if (checkBoxAutoFixCommonErrors.Checked)
- modiTextOcrFixed = _ocrFixEngine.FixOcrErrors(unItalicText, index, _lastLine, false, checkBoxGuessUnknownWords.Checked);
+ modiTextOcrFixed = _ocrFixEngine.FixOcrErrors(unItalicText, index, _lastLine, false, GetAutoGuessLevel());
int modiOcrCorrectedCorrectWords;
int modiOcrCorrectedWordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(modiTextOcrFixed, out modiOcrCorrectedCorrectWords);
if (modiOcrCorrectedWordsNotFound <= modiWordsNotFound)
@@ -6295,7 +6300,7 @@ namespace Nikse.SubtitleEdit.Forms
{
line = line.Replace("'.", ":");
}
- line = _ocrFixEngine.FixOcrErrors(line, index, _lastLine, true, checkBoxGuessUnknownWords.Checked);
+ line = _ocrFixEngine.FixOcrErrors(line, index, _lastLine, true, GetAutoGuessLevel());
}
line = "" + line + "";
}
@@ -6392,7 +6397,7 @@ namespace Nikse.SubtitleEdit.Forms
{
string modiTextOcrFixed = modiText;
if (checkBoxAutoFixCommonErrors.Checked)
- modiTextOcrFixed = _ocrFixEngine.FixOcrErrors(modiText, index, _lastLine, false, checkBoxGuessUnknownWords.Checked);
+ modiTextOcrFixed = _ocrFixEngine.FixOcrErrors(modiText, index, _lastLine, false, GetAutoGuessLevel());
int modiOcrCorrectedWordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(modiTextOcrFixed, out correctWords);
if (modiOcrCorrectedWordsNotFound <= modiWordsNotFound)
modiText = modiTextOcrFixed;
@@ -6405,11 +6410,11 @@ namespace Nikse.SubtitleEdit.Forms
}
// take the best option - before ocr fixing, which we do again to save suggestions and prompt for user input
- line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, checkBoxGuessUnknownWords.Checked);
+ line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel());
}
else
{ // fix some error manually (modi not available)
- line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, checkBoxGuessUnknownWords.Checked);
+ line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, index, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel());
}
}
@@ -6475,7 +6480,7 @@ namespace Nikse.SubtitleEdit.Forms
else
{ // no dictionary :(
if (checkBoxAutoFixCommonErrors.Checked)
- line = _ocrFixEngine.FixOcrErrors(line, index, _lastLine, true, checkBoxGuessUnknownWords.Checked);
+ line = _ocrFixEngine.FixOcrErrors(line, index, _lastLine, true, GetAutoGuessLevel());
if (badWords >= numberOfWords)
subtitleListView1.SetBackgroundColor(index, Color.Red);
@@ -8417,5 +8422,13 @@ namespace Nikse.SubtitleEdit.Forms
form.Show(this);
}
+ private OcrFixEngine.AutoGuessLevel GetAutoGuessLevel()
+ {
+ var autoGuessLevel = OcrFixEngine.AutoGuessLevel.None;
+ if (checkBoxGuessUnknownWords.Checked)
+ autoGuessLevel = OcrFixEngine.AutoGuessLevel.Aggressive;
+ return autoGuessLevel;
+ }
+
}
}
diff --git a/src/Logic/OCR/OcrFixEngine.cs b/src/Logic/OCR/OcrFixEngine.cs
index 91b6b70fa..efd60b168 100644
--- a/src/Logic/OCR/OcrFixEngine.cs
+++ b/src/Logic/OCR/OcrFixEngine.cs
@@ -14,6 +14,13 @@ namespace Nikse.SubtitleEdit.Logic.OCR
{
public class OcrFixEngine
{
+ public enum AutoGuessLevel
+ {
+ None,
+ Cautious,
+ Aggressive
+ }
+
// Dictionaries/spellchecking/fixing
Dictionary _wordReplaceList;
Dictionary _partialLineWordBoundaryReplaceList;
@@ -386,7 +393,7 @@ namespace Nikse.SubtitleEdit.Logic.OCR
return list;
}
- public string FixOcrErrors(string text, int index, string lastLine, bool logSuggestions, bool useAutoGuess)
+ public string FixOcrErrors(string text, int index, string lastLine, bool logSuggestions, AutoGuessLevel autoGuess)
{
var sb = new StringBuilder();
var word = new StringBuilder();
@@ -452,7 +459,7 @@ namespace Nikse.SubtitleEdit.Logic.OCR
text = FixCommenOcrLineErrors(sb.ToString(), lastLine);
int wordsNotFound;
- text = FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, text, index, null, true, false, logSuggestions, useAutoGuess);
+ text = FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, text, index, null, true, false, logSuggestions, autoGuess);
if (Configuration.Settings.Tools.OcrFixUseHardcodedRules)
{
text = FixLowercaseIToUppercaseI(text, lastLine);
@@ -1424,7 +1431,7 @@ namespace Nikse.SubtitleEdit.Logic.OCR
return newText;
}
- public string FixUnknownWordsViaGuessOrPrompt(out int wordsNotFound, string line, int index, Bitmap bitmap, bool autoFix, bool promptForFixingErrors, bool log, bool useAutoGuess)
+ public string FixUnknownWordsViaGuessOrPrompt(out int wordsNotFound, string line, int index, Bitmap bitmap, bool autoFix, bool promptForFixingErrors, bool log, AutoGuessLevel autoGuess)
{
var localIgnoreWords = new List();
wordsNotFound = 0;
@@ -1549,10 +1556,10 @@ namespace Nikse.SubtitleEdit.Logic.OCR
UnknownWordsFound.Add(string.Format("#{0}: {1}", index + 1, nf));
}
- if (autoFix && useAutoGuess)
+ if (autoFix && autoGuess != AutoGuessLevel.None)
{
var guesses = new List();
- if (word.Length > 5)
+ if (word.Length > 5 && autoGuess == AutoGuessLevel.Aggressive)
{
guesses = (List)CreateGuessesFromLetters(word);
@@ -1566,7 +1573,7 @@ namespace Nikse.SubtitleEdit.Logic.OCR
if (DoSpell(word.ToLower()))
guesses.Insert(0, wordWithCasingChanged);
}
- else
+ else if (Configuration.Settings.Tools.OcrFixUseHardcodedRules)
{
if (word[0] == 'L')
guesses.Add("I" + word.Substring(1));
@@ -1585,6 +1592,8 @@ namespace Nikse.SubtitleEdit.Logic.OCR
guesses.Add(word.Replace("$", "s"));
if (!word.EndsWith("€") && !word.StartsWith("€"))
guesses.Add(word.Replace("€", "e"));
+ guesses.Add(word.Replace("/", "l"));
+ guesses.Add(word.Replace(")/", "y"));
}
foreach (string guess in guesses)
{
diff --git a/src/Test/FixCommonErrorsTest.cs b/src/Test/FixCommonErrorsTest.cs
index 748ac8295..5c6f86015 100644
--- a/src/Test/FixCommonErrorsTest.cs
+++ b/src/Test/FixCommonErrorsTest.cs
@@ -368,16 +368,15 @@ namespace Test
Assert.AreEqual(target._subtitle.Paragraphs[0].Text, "(laughing/clapping)");
}
- //Auto-guess unknown words in "Fix common errors" is now disabled
- //[TestMethod]
- //[DeploymentItem("SubtitleEdit.exe")]
- //public void FixCommonOcrErrorsSlashIsL()
- //{
- // var target = GetFixCommonErrorsLib();
- // InitializeFixCommonErrorsLine(target, "The font is ita/ic!");
- // target.FixOcrErrorsViaReplaceList("eng");
- // Assert.AreEqual(target._subtitle.Paragraphs[0].Text, "The font is italic!"); // will fail if English dictionary is not found
- //}
+ [TestMethod]
+ [DeploymentItem("SubtitleEdit.exe")]
+ public void FixCommonOcrErrorsSlashIsL() // requires hardcoded rules enabled
+ {
+ var target = GetFixCommonErrorsLib();
+ InitializeFixCommonErrorsLine(target, "The font is ita/ic!");
+ target.FixOcrErrorsViaReplaceList("eng");
+ Assert.AreEqual(target._subtitle.Paragraphs[0].Text, "The font is italic!"); // will fail if English dictionary is not found
+ }
[TestMethod]
[DeploymentItem("SubtitleEdit.exe")]