mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-10-27 22:42:38 +01:00
Fix bug in OCR auto guesses +fix new italic space detect for nOcr
This commit is contained in:
parent
728597ef45
commit
32b8d875dc
@ -338,55 +338,62 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
|
|||||||
return newText;
|
return newText;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static string AddToGuessList(List<string> list, string word, int index, string letter, string replaceLetters)
|
private static void AddToGuessList(List<string> list, string guess)
|
||||||
{
|
{
|
||||||
if (string.IsNullOrEmpty(word) || index < 0 || index + letter.Length - 1 >= word.Length)
|
if (string.IsNullOrEmpty(guess))
|
||||||
{
|
{
|
||||||
return word;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
string s = word.Remove(index, letter.Length);
|
if (!list.Contains(guess))
|
||||||
if (index >= s.Length)
|
|
||||||
{
|
{
|
||||||
s += replaceLetters;
|
list.Add(guess);
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
s = s.Insert(index, replaceLetters);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!list.Contains(s))
|
|
||||||
{
|
|
||||||
list.Add(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
return s;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public IEnumerable<string> CreateGuessesFromLetters(string word)
|
public IEnumerable<string> CreateGuessesFromLetters(string word)
|
||||||
{
|
{
|
||||||
var list = new List<string>();
|
var list = new List<string>();
|
||||||
|
var previousGuesses = new List<string>();
|
||||||
foreach (string letter in _partialWordReplaceList.Keys)
|
foreach (string letter in _partialWordReplaceList.Keys)
|
||||||
{
|
{
|
||||||
string s = word;
|
var indexes = new List<int>();
|
||||||
int i = 0;
|
for (int i = 1; i < word.Length - letter.Length; i++)
|
||||||
while (s.Contains(letter) && i < 10)
|
|
||||||
{
|
{
|
||||||
int index = s.FastIndexOf(letter);
|
if (word.Substring(i).StartsWith(letter, StringComparison.Ordinal))
|
||||||
s = AddToGuessList(list, s, index, letter, _partialWordReplaceList[letter]);
|
{
|
||||||
AddToGuessList(list, word, index, letter, _partialWordReplaceList[letter]);
|
indexes.Add(i);
|
||||||
i++;
|
var guess = word.Remove(i, letter.Length).Insert(i, _partialWordReplaceList[letter]);
|
||||||
|
AddToGuessList(list, guess);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
s = word;
|
|
||||||
i = 0;
|
if (indexes.Count > 1)
|
||||||
while (s.Contains(letter) && i < 10)
|
|
||||||
{
|
{
|
||||||
int index = s.LastIndexOf(letter, StringComparison.Ordinal);
|
var multiGuess = word;
|
||||||
s = AddToGuessList(list, s, index, letter, _partialWordReplaceList[letter]);
|
for (int i = indexes.Count-1; i >= 0; i--)
|
||||||
AddToGuessList(list, word, index, letter, _partialWordReplaceList[letter]);
|
{
|
||||||
i++;
|
var idx = indexes[i];
|
||||||
|
multiGuess = multiGuess.Remove(idx, letter.Length).Insert(idx, _partialWordReplaceList[letter]);
|
||||||
|
AddToGuessList(list, multiGuess);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
foreach (var previousGuess in previousGuesses)
|
||||||
|
{
|
||||||
|
for (int i = 1; i < previousGuess.Length - letter.Length; i++)
|
||||||
|
{
|
||||||
|
if (previousGuess.Substring(i).StartsWith(letter, StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
var guess = previousGuess.Remove(i, letter.Length).Insert(i, _partialWordReplaceList[letter]);
|
||||||
|
AddToGuessList(list, guess);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
previousGuesses = new List<string>(list);
|
||||||
}
|
}
|
||||||
|
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -251,9 +251,11 @@
|
|||||||
this.Controls.Add(this.buttonOK);
|
this.Controls.Add(this.buttonOK);
|
||||||
this.Controls.Add(this.buttonCancel);
|
this.Controls.Add(this.buttonCancel);
|
||||||
this.KeyPreview = true;
|
this.KeyPreview = true;
|
||||||
|
this.MinimumSize = new System.Drawing.Size(840, 460);
|
||||||
this.Name = "VobSubNOcrCharacterInspect";
|
this.Name = "VobSubNOcrCharacterInspect";
|
||||||
this.ShowIcon = false;
|
this.ShowIcon = false;
|
||||||
this.ShowInTaskbar = false;
|
this.ShowInTaskbar = false;
|
||||||
|
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
|
||||||
this.Text = "VobSubNOcrCharacterInspect";
|
this.Text = "VobSubNOcrCharacterInspect";
|
||||||
this.KeyDown += new System.Windows.Forms.KeyEventHandler(this.VobSubNOcrCharacterInspect_KeyDown);
|
this.KeyDown += new System.Windows.Forms.KeyEventHandler(this.VobSubNOcrCharacterInspect_KeyDown);
|
||||||
this.groupBoxInspectItems.ResumeLayout(false);
|
this.groupBoxInspectItems.ResumeLayout(false);
|
||||||
|
@ -3122,7 +3122,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
|||||||
var expandedResult = NOcrFindExpandedMatch(parentBitmap, targetItem, nOcrDb.OcrCharactersExpanded);
|
var expandedResult = NOcrFindExpandedMatch(parentBitmap, targetItem, nOcrDb.OcrCharactersExpanded);
|
||||||
if (expandedResult != null)
|
if (expandedResult != null)
|
||||||
{
|
{
|
||||||
return new CompareMatch(expandedResult.Text, expandedResult.Italic, expandedResult.ExpandCount, null, expandedResult);
|
return new CompareMatch(expandedResult.Text, expandedResult.Italic, expandedResult.ExpandCount, null, expandedResult) { ImageSplitterItem = targetItem };
|
||||||
}
|
}
|
||||||
|
|
||||||
var result = NOcrFindBestMatchNew(targetItem, targetItem.Y - targetItem.ParentY, out var italic, nOcrDb, deepSeek);
|
var result = NOcrFindBestMatchNew(targetItem, targetItem.Y - targetItem.ParentY, out var italic, nOcrDb, deepSeek);
|
||||||
@ -4399,11 +4399,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
|||||||
line = _nocrThreadResults[listViewIndex];
|
line = _nocrThreadResults[listViewIndex];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var matches = new List<CompareMatch>();
|
||||||
|
var nbmpInput = new NikseBitmap(bitmap);
|
||||||
|
|
||||||
if (string.IsNullOrEmpty(line))
|
if (string.IsNullOrEmpty(line))
|
||||||
{
|
{
|
||||||
var nbmpInput = new NikseBitmap(bitmap);
|
|
||||||
|
|
||||||
var matches = new List<CompareMatch>();
|
|
||||||
|
|
||||||
int minLineHeight = GetLastBinOcrLowercaseHeight() - 3;
|
int minLineHeight = GetLastBinOcrLowercaseHeight() - 3;
|
||||||
if (minLineHeight < 5)
|
if (minLineHeight < 5)
|
||||||
@ -4512,7 +4512,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
|||||||
//string name = SaveCompareItem(item.NikseBitmap, text, _vobSubOcrNOcrCharacter.IsItalic, 0);
|
//string name = SaveCompareItem(item.NikseBitmap, text, _vobSubOcrNOcrCharacter.IsItalic, 0);
|
||||||
//var addition = new ImageCompareAddition(name, text, item.NikseBitmap, _vobSubOcrNOcrCharacter.IsItalic, listViewIndex);
|
//var addition = new ImageCompareAddition(name, text, item.NikseBitmap, _vobSubOcrNOcrCharacter.IsItalic, listViewIndex);
|
||||||
//_lastAdditions.Add(addition);
|
//_lastAdditions.Add(addition);
|
||||||
matches.Add(new CompareMatch(text, _vobSubOcrNOcrCharacter.IsItalic, 0, null));
|
matches.Add(new CompareMatch(text, _vobSubOcrNOcrCharacter.IsItalic, 0, null) { ImageSplitterItem = item });
|
||||||
}
|
}
|
||||||
else if (result == DialogResult.Abort)
|
else if (result == DialogResult.Abort)
|
||||||
{
|
{
|
||||||
@ -4527,7 +4527,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
|||||||
}
|
}
|
||||||
else // found image match
|
else // found image match
|
||||||
{
|
{
|
||||||
matches.Add(new CompareMatch(match.Text, match.Italic, 0, null));
|
matches.Add(new CompareMatch(match.Text, match.Italic, 0, null) { ImageSplitterItem = item });
|
||||||
if (match.ExpandCount > 0)
|
if (match.ExpandCount > 0)
|
||||||
{
|
{
|
||||||
index += match.ExpandCount - 1;
|
index += match.ExpandCount - 1;
|
||||||
@ -4553,6 +4553,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
|||||||
}
|
}
|
||||||
|
|
||||||
line = MatchesToItalicStringConverter.GetStringWithItalicTags(matches);
|
line = MatchesToItalicStringConverter.GetStringWithItalicTags(matches);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
line = FixNocrHardcodedStuff(line);
|
line = FixNocrHardcodedStuff(line);
|
||||||
@ -4575,6 +4577,54 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
|||||||
line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, listViewIndex, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel());
|
line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, listViewIndex, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// smaller space pixels for italic
|
||||||
|
if (wordsNotFound > 0 && line.Contains("<i>", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
AddItalicCouldBeSpace(matches, nbmpInput, _unItalicFactor, _numericUpDownPixelsIsSpace);
|
||||||
|
}
|
||||||
|
if (wordsNotFound > 0 && line.Contains("<i>", StringComparison.Ordinal) && matches.Any(p => p?.ImageSplitterItem?.CouldBeSpaceBefore == true))
|
||||||
|
{
|
||||||
|
int j = 0;
|
||||||
|
while (j < matches.Count)
|
||||||
|
{
|
||||||
|
var match = matches[j];
|
||||||
|
if (match.ImageSplitterItem?.CouldBeSpaceBefore == true)
|
||||||
|
{
|
||||||
|
match.ImageSplitterItem.CouldBeSpaceBefore = false;
|
||||||
|
if (match.Italic)
|
||||||
|
{
|
||||||
|
matches.Insert(j, new CompareMatch(" ", false, 0, string.Empty, new ImageSplitterItem(" ")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
var tempLine = MatchesToItalicStringConverter.GetStringWithItalicTags(matches);
|
||||||
|
var oldAutoGuessesUsed = new List<LogItem>(_ocrFixEngine.AutoGuessesUsed);
|
||||||
|
var oldUnknownWordsFound = new List<LogItem>(_ocrFixEngine.UnknownWordsFound);
|
||||||
|
_ocrFixEngine.AutoGuessesUsed.Clear();
|
||||||
|
_ocrFixEngine.UnknownWordsFound.Clear();
|
||||||
|
if (checkBoxAutoFixCommonErrors.Checked)
|
||||||
|
{
|
||||||
|
tempLine = _ocrFixEngine.FixOcrErrors(tempLine, listViewIndex, _lastLine, true, GetAutoGuessLevel());
|
||||||
|
}
|
||||||
|
|
||||||
|
int tempWordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(tempLine, out var tempCorrectWords);
|
||||||
|
if (tempWordsNotFound <= wordsNotFound && tempCorrectWords > correctWords)
|
||||||
|
{
|
||||||
|
wordsNotFound = tempWordsNotFound;
|
||||||
|
correctWords = tempCorrectWords;
|
||||||
|
line = tempLine;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_ocrFixEngine.AutoGuessesUsed = oldAutoGuessesUsed;
|
||||||
|
_ocrFixEngine.UnknownWordsFound = oldUnknownWordsFound;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (_ocrFixEngine.Abort)
|
if (_ocrFixEngine.Abort)
|
||||||
{
|
{
|
||||||
ButtonStopClick(null, null);
|
ButtonStopClick(null, null);
|
||||||
@ -4590,7 +4640,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
|||||||
|
|
||||||
_ocrFixEngine.AutoGuessesUsed.Clear();
|
_ocrFixEngine.AutoGuessesUsed.Clear();
|
||||||
|
|
||||||
// Log unkown words guess (found via spelling dictionaries)
|
// Log unknown words guess (found via spelling dictionaries)
|
||||||
LogUnknownWords();
|
LogUnknownWords();
|
||||||
|
|
||||||
ColorLineByNumberOfUnknownWords(listViewIndex, wordsNotFound, line);
|
ColorLineByNumberOfUnknownWords(listViewIndex, wordsNotFound, line);
|
||||||
@ -4608,6 +4658,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
|||||||
|
|
||||||
private string FixNocrHardcodedStuff(string input)
|
private string FixNocrHardcodedStuff(string input)
|
||||||
{
|
{
|
||||||
|
if (!Configuration.Settings.Tools.OcrFixUseHardcodedRules)
|
||||||
|
{
|
||||||
|
return input;
|
||||||
|
}
|
||||||
|
|
||||||
var line = input;
|
var line = input;
|
||||||
|
|
||||||
if (LanguageString.StartsWith("en", StringComparison.OrdinalIgnoreCase))
|
if (LanguageString.StartsWith("en", StringComparison.OrdinalIgnoreCase))
|
||||||
@ -9177,6 +9232,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
|||||||
{
|
{
|
||||||
InspectImageCompareMatchesForCurrentImageToolStripMenuItem_Click(null, null);
|
InspectImageCompareMatchesForCurrentImageToolStripMenuItem_Click(null, null);
|
||||||
}
|
}
|
||||||
|
else if (subtitleListView1.SelectedItems.Count > 0 && _ocrMethodIndex == _ocrMethodNocr)
|
||||||
|
{
|
||||||
|
toolStripMenuItemInspectNOcrMatches_Click(null, null);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void comboBoxTesseractEngineMode_SelectedIndexChanged(object sender, EventArgs e)
|
private void comboBoxTesseractEngineMode_SelectedIndexChanged(object sender, EventArgs e)
|
||||||
|
Loading…
Reference in New Issue
Block a user