mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-10-28 06:52:35 +01:00
Merge pull request #1453 from xylographe/xrfct
Minor refact (OCR/OcrFixEngine)
This commit is contained in:
commit
22c8b8f920
@ -46,7 +46,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
private static readonly Regex RegexAloneIasL = new Regex(@"\bl\b", RegexOptions.Compiled);
|
||||
private static readonly Regex RegexLowercaseL = new Regex("[A-ZÆØÅÄÖÉÈÀÙÂÊÎÔÛËÏ]l[A-ZÆØÅÄÖÉÈÀÙÂÊÎÔÛËÏ]", RegexOptions.Compiled);
|
||||
private static readonly Regex RegexUppercaseI = new Regex("[a-zæøåöääöéèàùâêîôûëï]I.", RegexOptions.Compiled);
|
||||
private static readonly Regex RegexNumber1 = new Regex(@"\d\ 1", RegexOptions.Compiled);
|
||||
private static readonly Regex RegexNumber1 = new Regex(@"(?<=\d) 1(?!/\d)", RegexOptions.Compiled);
|
||||
|
||||
public bool Abort { get; set; }
|
||||
public List<string> AutoGuessesUsed { get; set; }
|
||||
@ -366,7 +366,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
|
||||
text = ReplaceWordsBeforeLineFixes(text);
|
||||
|
||||
text = FixCommenOcrLineErrors(text, lastLine);
|
||||
text = FixCommonOcrLineErrors(text, lastLine);
|
||||
|
||||
string lastWord = null;
|
||||
for (int i = 0; i < text.Length; i++)
|
||||
@ -415,7 +415,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
sb.Append(fixedWord);
|
||||
}
|
||||
|
||||
text = FixCommenOcrLineErrors(sb.ToString(), lastLine);
|
||||
text = FixCommonOcrLineErrors(sb.ToString(), lastLine);
|
||||
int wordsNotFound;
|
||||
text = FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, text, index, null, true, false, logSuggestions, autoGuess);
|
||||
if (Configuration.Settings.Tools.OcrFixUseHardcodedRules)
|
||||
@ -445,7 +445,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
private static string FixFrenchLApostrophe(string text, string tag, string lastLine)
|
||||
{
|
||||
bool endingBeforeThis = string.IsNullOrEmpty(lastLine) || lastLine.EndsWith('.') || lastLine.EndsWith('!') || lastLine.EndsWith('?') ||
|
||||
lastLine.EndsWith(".</i>") || lastLine.EndsWith("!</i>", StringComparison.Ordinal) || lastLine.EndsWith("?</i>", StringComparison.Ordinal) ||
|
||||
lastLine.EndsWith(".</i>", StringComparison.Ordinal) || lastLine.EndsWith("!</i>", StringComparison.Ordinal) || lastLine.EndsWith("?</i>", StringComparison.Ordinal) ||
|
||||
lastLine.EndsWith(".</font>", StringComparison.Ordinal) || lastLine.EndsWith("!</font>", StringComparison.Ordinal) || lastLine.EndsWith("?</font>", StringComparison.Ordinal);
|
||||
if (text.StartsWith(tag.TrimStart(), StringComparison.Ordinal) && text.Length > 3)
|
||||
{
|
||||
@ -581,7 +581,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
return word;
|
||||
}
|
||||
|
||||
private string FixCommenOcrLineErrors(string input, string lastLine)
|
||||
private string FixCommonOcrLineErrors(string input, string lastLine)
|
||||
{
|
||||
input = FixOcrErrorViaLineReplaceList(input);
|
||||
input = FixOcrErrorsViaHardcodedRules(input, lastLine, _abbreviationList);
|
||||
@ -855,23 +855,11 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
|
||||
// change '<number><space>1' to '<number>1'
|
||||
if (input.Contains('1'))
|
||||
{
|
||||
Match match = RegexNumber1.Match(input);
|
||||
var match = RegexNumber1.Match(input);
|
||||
while (match.Success)
|
||||
{
|
||||
bool doFix = true;
|
||||
|
||||
if (match.Index + 4 < input.Length && input[match.Index + 3] == '/' && char.IsDigit(input[match.Index + 4]))
|
||||
doFix = false;
|
||||
|
||||
if (doFix)
|
||||
{
|
||||
input = input.Substring(0, match.Index + 1) + input.Substring(match.Index + 2);
|
||||
match = RegexNumber1.Match(input);
|
||||
}
|
||||
else
|
||||
{
|
||||
match = RegexNumber1.Match(input, match.Index + 1);
|
||||
}
|
||||
input = input.Remove(match.Index, 1);
|
||||
match = RegexNumber1.Match(input, match.Index);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user