From c2bd52ce43fdecf1caf24367d55b1cd18ba4ef19 Mon Sep 17 00:00:00 2001 From: Waldi Ravens Date: Sat, 12 Dec 2015 18:56:18 +0100 Subject: [PATCH 1/2] Minor refact (OCR/OcrFixEngine) --- src/Logic/OCR/OcrFixEngine.cs | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/src/Logic/OCR/OcrFixEngine.cs b/src/Logic/OCR/OcrFixEngine.cs index feb939c6a..f3965221e 100644 --- a/src/Logic/OCR/OcrFixEngine.cs +++ b/src/Logic/OCR/OcrFixEngine.cs @@ -46,7 +46,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr private static readonly Regex RegexAloneIasL = new Regex(@"\bl\b", RegexOptions.Compiled); private static readonly Regex RegexLowercaseL = new Regex("[A-ZÆØÅÄÖÉÈÀÙÂÊÎÔÛËÏ]l[A-ZÆØÅÄÖÉÈÀÙÂÊÎÔÛËÏ]", RegexOptions.Compiled); private static readonly Regex RegexUppercaseI = new Regex("[a-zæøåöääöéèàùâêîôûëï]I.", RegexOptions.Compiled); - private static readonly Regex RegexNumber1 = new Regex(@"\d\ 1", RegexOptions.Compiled); + private static readonly Regex RegexNumber1 = new Regex(@"(?<=\d) 1(?!/\d)", RegexOptions.Compiled); public bool Abort { get; set; } public List AutoGuessesUsed { get; set; } @@ -445,7 +445,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr private static string FixFrenchLApostrophe(string text, string tag, string lastLine) { bool endingBeforeThis = string.IsNullOrEmpty(lastLine) || lastLine.EndsWith('.') || lastLine.EndsWith('!') || lastLine.EndsWith('?') || - lastLine.EndsWith(".") || lastLine.EndsWith("!", StringComparison.Ordinal) || lastLine.EndsWith("?", StringComparison.Ordinal) || + lastLine.EndsWith(".", StringComparison.Ordinal) || lastLine.EndsWith("!", StringComparison.Ordinal) || lastLine.EndsWith("?", StringComparison.Ordinal) || lastLine.EndsWith(".", StringComparison.Ordinal) || lastLine.EndsWith("!", StringComparison.Ordinal) || lastLine.EndsWith("?", StringComparison.Ordinal); if (text.StartsWith(tag.TrimStart(), StringComparison.Ordinal) && text.Length > 3) { @@ -855,23 +855,11 @@ namespace Nikse.SubtitleEdit.Logic.Ocr // change '1' to '1' if (input.Contains('1')) { - Match match = RegexNumber1.Match(input); + var match = RegexNumber1.Match(input); while (match.Success) { - bool doFix = true; - - if (match.Index + 4 < input.Length && input[match.Index + 3] == '/' && char.IsDigit(input[match.Index + 4])) - doFix = false; - - if (doFix) - { - input = input.Substring(0, match.Index + 1) + input.Substring(match.Index + 2); - match = RegexNumber1.Match(input); - } - else - { - match = RegexNumber1.Match(input, match.Index + 1); - } + input = input.Remove(match.Index, 1); + match = RegexNumber1.Match(input, match.Index); } } From 12ed11a4508ca54198283c1560537b07243ef95e Mon Sep 17 00:00:00 2001 From: Waldi Ravens Date: Wed, 16 Dec 2015 16:22:49 +0100 Subject: [PATCH 2/2] Fixed typo (OCR/OcrFixEngine) --- src/Logic/OCR/OcrFixEngine.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Logic/OCR/OcrFixEngine.cs b/src/Logic/OCR/OcrFixEngine.cs index f3965221e..4eda5cc89 100644 --- a/src/Logic/OCR/OcrFixEngine.cs +++ b/src/Logic/OCR/OcrFixEngine.cs @@ -366,7 +366,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr text = ReplaceWordsBeforeLineFixes(text); - text = FixCommenOcrLineErrors(text, lastLine); + text = FixCommonOcrLineErrors(text, lastLine); string lastWord = null; for (int i = 0; i < text.Length; i++) @@ -415,7 +415,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr sb.Append(fixedWord); } - text = FixCommenOcrLineErrors(sb.ToString(), lastLine); + text = FixCommonOcrLineErrors(sb.ToString(), lastLine); int wordsNotFound; text = FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, text, index, null, true, false, logSuggestions, autoGuess); if (Configuration.Settings.Tools.OcrFixUseHardcodedRules) @@ -581,7 +581,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr return word; } - private string FixCommenOcrLineErrors(string input, string lastLine) + private string FixCommonOcrLineErrors(string input, string lastLine) { input = FixOcrErrorViaLineReplaceList(input); input = FixOcrErrorsViaHardcodedRules(input, lastLine, _abbreviationList);