mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-22 19:22:53 +01:00
Some OCR improvements + hack for buggy nhunspell
git-svn-id: https://subtitleedit.googlecode.com/svn/trunk@378 99eadd0c-20b8-1223-b5c4-2a2b2df33de2
This commit is contained in:
parent
b5360a5b97
commit
fa15215da9
@ -92,49 +92,42 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
private static void HighLightWord(RichTextBox richTextBoxParagraph, string word)
|
||||
{
|
||||
bool startApos = false;
|
||||
if (word.StartsWith("'") && word.Length > 1)
|
||||
if (word != null && richTextBoxParagraph.Text.Contains(word))
|
||||
{
|
||||
startApos = true;
|
||||
word = word.Substring(1);
|
||||
}
|
||||
|
||||
Regex regex = Utilities.MakeWordSearchRegex(word);
|
||||
Match match = regex.Match(richTextBoxParagraph.Text);
|
||||
if (!match.Success)
|
||||
for (int i = 0; i < richTextBoxParagraph.Text.Length; i++)
|
||||
{
|
||||
regex = Utilities.MakeWordSearchRegexWithNumbers(word);
|
||||
match = regex.Match(richTextBoxParagraph.Text);
|
||||
}
|
||||
|
||||
while (match.Success)
|
||||
if (richTextBoxParagraph.Text.Substring(i).StartsWith(word))
|
||||
{
|
||||
if (startApos)
|
||||
bool startOk = i == 0;
|
||||
if (!startOk)
|
||||
startOk = (" <>-\"”“[]'‘`´¶()♪¿¡.…—!?,:;/" + Environment.NewLine).Contains(richTextBoxParagraph.Text.Substring(i - 1, 1));
|
||||
if (startOk)
|
||||
{
|
||||
richTextBoxParagraph.SelectionStart = match.Index-1;
|
||||
richTextBoxParagraph.SelectionLength = match.Length+1;
|
||||
while (richTextBoxParagraph.SelectedText != "'" + match.Value && richTextBoxParagraph.SelectionStart > 0)
|
||||
bool endOK = (i + word.Length == richTextBoxParagraph.Text.Length);
|
||||
if (!endOK)
|
||||
endOK = (" <>-\"”“[]'‘`´¶()♪¿¡.…—!?,:;/" + Environment.NewLine).Contains(richTextBoxParagraph.Text.Substring(i + word.Length, 1));
|
||||
if (endOK)
|
||||
{
|
||||
richTextBoxParagraph.SelectionStart = i+ 1;
|
||||
richTextBoxParagraph.SelectionLength = word.Length;
|
||||
while (richTextBoxParagraph.SelectedText != word && richTextBoxParagraph.SelectionStart > 0)
|
||||
{
|
||||
richTextBoxParagraph.SelectionStart = richTextBoxParagraph.SelectionStart - 1;
|
||||
richTextBoxParagraph.SelectionLength = match.Length+1;
|
||||
richTextBoxParagraph.SelectionLength = word.Length;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (richTextBoxParagraph.SelectedText == word)
|
||||
{
|
||||
richTextBoxParagraph.SelectionStart = match.Index + 1;
|
||||
richTextBoxParagraph.SelectionLength = match.Length;
|
||||
while (richTextBoxParagraph.SelectedText != match.Value && richTextBoxParagraph.SelectionStart > 0)
|
||||
{
|
||||
richTextBoxParagraph.SelectionStart = richTextBoxParagraph.SelectionStart - 1;
|
||||
richTextBoxParagraph.SelectionLength = match.Length;
|
||||
}
|
||||
}
|
||||
richTextBoxParagraph.SelectionColor = Color.Red;
|
||||
match = match.NextMatch();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
richTextBoxParagraph.SelectionLength = 0;
|
||||
richTextBoxParagraph.SelectionStart = 0;
|
||||
}
|
||||
}
|
||||
|
||||
private void ButtonEditWholeTextClick(object sender, EventArgs e)
|
||||
{
|
||||
|
@ -550,7 +550,11 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
{
|
||||
_mainWindow.FocusParagraph(_currentIndex);
|
||||
|
||||
List<string> suggestions = _hunspell.Suggest(_currentWord);
|
||||
List<string> suggestions = new List<string>();
|
||||
|
||||
if (_currentWord.Length > 4 || !_currentWord.Contains("'")) //TODO: get fixed nhunspell
|
||||
suggestions = _hunspell.Suggest(_currentWord); //TODO: 0.9.6 fails on "Lt'S"
|
||||
|
||||
if (AutoFixNames && _currentWord.Length > 1 && suggestions.Contains(_currentWord.Substring(0, 1).ToUpper() + _currentWord.Substring(1)))
|
||||
{
|
||||
ChangeWord = _currentWord.Substring(0, 1).ToUpper() + _currentWord.Substring(1);
|
||||
|
45
src/Forms/VobSubOcr.Designer.cs
generated
45
src/Forms/VobSubOcr.Designer.cs
generated
@ -67,6 +67,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
this.buttonStop = new System.Windows.Forms.Button();
|
||||
this.buttonStartOcr = new System.Windows.Forms.Button();
|
||||
this.groupBoxOcrAutoFix = new System.Windows.Forms.GroupBox();
|
||||
this.comboBoxDictionaries = new System.Windows.Forms.ComboBox();
|
||||
this.checkBoxGuessUnknownWords = new System.Windows.Forms.CheckBox();
|
||||
this.tabControlLogs = new System.Windows.Forms.TabControl();
|
||||
this.tabPageAllFixes = new System.Windows.Forms.TabPage();
|
||||
@ -94,7 +95,6 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
this.checkBoxShowOnlyForced = new System.Windows.Forms.CheckBox();
|
||||
this.checkBoxUseTimeCodesFromIdx = new System.Windows.Forms.CheckBox();
|
||||
this.folderBrowserDialog1 = new System.Windows.Forms.FolderBrowserDialog();
|
||||
this.comboBoxDictionaries = new System.Windows.Forms.ComboBox();
|
||||
this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView();
|
||||
((System.ComponentModel.ISupportInitialize)(this.pictureBoxSubtitleImage)).BeginInit();
|
||||
this.contextMenuStripListview.SuspendLayout();
|
||||
@ -271,7 +271,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.Enabled = false;
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.Location = new System.Drawing.Point(22, 74);
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.Name = "checkBoxUseModiInTesseractForUnknownWords";
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.Size = new System.Drawing.Size(167, 17);
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.Size = new System.Drawing.Size(165, 17);
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.TabIndex = 39;
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.Text = "Try MODI for unknown words";
|
||||
this.checkBoxUseModiInTesseractForUnknownWords.UseVisualStyleBackColor = true;
|
||||
@ -346,7 +346,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
this.checkBoxRightToLeft.AutoSize = true;
|
||||
this.checkBoxRightToLeft.Location = new System.Drawing.Point(128, 112);
|
||||
this.checkBoxRightToLeft.Name = "checkBoxRightToLeft";
|
||||
this.checkBoxRightToLeft.Size = new System.Drawing.Size(83, 17);
|
||||
this.checkBoxRightToLeft.Size = new System.Drawing.Size(80, 17);
|
||||
this.checkBoxRightToLeft.TabIndex = 40;
|
||||
this.checkBoxRightToLeft.Text = "Right to left";
|
||||
this.checkBoxRightToLeft.UseVisualStyleBackColor = true;
|
||||
@ -520,6 +520,18 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
this.groupBoxOcrAutoFix.TabStop = false;
|
||||
this.groupBoxOcrAutoFix.Text = "OCR auto correction / spellchecking";
|
||||
//
|
||||
// comboBoxDictionaries
|
||||
//
|
||||
this.comboBoxDictionaries.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
|
||||
| System.Windows.Forms.AnchorStyles.Right)));
|
||||
this.comboBoxDictionaries.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
|
||||
this.comboBoxDictionaries.FormattingEnabled = true;
|
||||
this.comboBoxDictionaries.Location = new System.Drawing.Point(127, 15);
|
||||
this.comboBoxDictionaries.Name = "comboBoxDictionaries";
|
||||
this.comboBoxDictionaries.Size = new System.Drawing.Size(171, 21);
|
||||
this.comboBoxDictionaries.TabIndex = 41;
|
||||
this.comboBoxDictionaries.SelectedIndexChanged += new System.EventHandler(this.comboBoxDictionaries_SelectedIndexChanged);
|
||||
//
|
||||
// checkBoxGuessUnknownWords
|
||||
//
|
||||
this.checkBoxGuessUnknownWords.AutoSize = true;
|
||||
@ -527,7 +539,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
this.checkBoxGuessUnknownWords.CheckState = System.Windows.Forms.CheckState.Checked;
|
||||
this.checkBoxGuessUnknownWords.Location = new System.Drawing.Point(11, 83);
|
||||
this.checkBoxGuessUnknownWords.Name = "checkBoxGuessUnknownWords";
|
||||
this.checkBoxGuessUnknownWords.Size = new System.Drawing.Size(164, 17);
|
||||
this.checkBoxGuessUnknownWords.Size = new System.Drawing.Size(162, 17);
|
||||
this.checkBoxGuessUnknownWords.TabIndex = 39;
|
||||
this.checkBoxGuessUnknownWords.Text = "Try to guess unknown words";
|
||||
this.checkBoxGuessUnknownWords.UseVisualStyleBackColor = true;
|
||||
@ -633,7 +645,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
this.checkBoxPromptForUnknownWords.CheckState = System.Windows.Forms.CheckState.Checked;
|
||||
this.checkBoxPromptForUnknownWords.Location = new System.Drawing.Point(11, 61);
|
||||
this.checkBoxPromptForUnknownWords.Name = "checkBoxPromptForUnknownWords";
|
||||
this.checkBoxPromptForUnknownWords.Size = new System.Drawing.Size(255, 17);
|
||||
this.checkBoxPromptForUnknownWords.Size = new System.Drawing.Size(246, 17);
|
||||
this.checkBoxPromptForUnknownWords.TabIndex = 38;
|
||||
this.checkBoxPromptForUnknownWords.Text = "Prompt for unknown words (requires dictionary)";
|
||||
this.checkBoxPromptForUnknownWords.UseVisualStyleBackColor = true;
|
||||
@ -645,7 +657,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
this.checkBoxAutoBreakLines.CheckState = System.Windows.Forms.CheckState.Checked;
|
||||
this.checkBoxAutoBreakLines.Location = new System.Drawing.Point(11, 105);
|
||||
this.checkBoxAutoBreakLines.Name = "checkBoxAutoBreakLines";
|
||||
this.checkBoxAutoBreakLines.Size = new System.Drawing.Size(208, 17);
|
||||
this.checkBoxAutoBreakLines.Size = new System.Drawing.Size(200, 17);
|
||||
this.checkBoxAutoBreakLines.TabIndex = 37;
|
||||
this.checkBoxAutoBreakLines.Text = "Auto break subtitle, if line number > 2";
|
||||
this.checkBoxAutoBreakLines.UseVisualStyleBackColor = true;
|
||||
@ -666,7 +678,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
this.checkBoxAutoFixCommonErrors.CheckState = System.Windows.Forms.CheckState.Checked;
|
||||
this.checkBoxAutoFixCommonErrors.Location = new System.Drawing.Point(11, 39);
|
||||
this.checkBoxAutoFixCommonErrors.Name = "checkBoxAutoFixCommonErrors";
|
||||
this.checkBoxAutoFixCommonErrors.Size = new System.Drawing.Size(139, 17);
|
||||
this.checkBoxAutoFixCommonErrors.Size = new System.Drawing.Size(137, 17);
|
||||
this.checkBoxAutoFixCommonErrors.TabIndex = 34;
|
||||
this.checkBoxAutoFixCommonErrors.Text = "Fix common OCR errors";
|
||||
this.checkBoxAutoFixCommonErrors.UseVisualStyleBackColor = true;
|
||||
@ -692,7 +704,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
this.checkBoxEmphasis2Transparent.AutoSize = true;
|
||||
this.checkBoxEmphasis2Transparent.Location = new System.Drawing.Point(437, 19);
|
||||
this.checkBoxEmphasis2Transparent.Name = "checkBoxEmphasis2Transparent";
|
||||
this.checkBoxEmphasis2Transparent.Size = new System.Drawing.Size(85, 17);
|
||||
this.checkBoxEmphasis2Transparent.Size = new System.Drawing.Size(83, 17);
|
||||
this.checkBoxEmphasis2Transparent.TabIndex = 6;
|
||||
this.checkBoxEmphasis2Transparent.Text = "Transparent";
|
||||
this.checkBoxEmphasis2Transparent.UseVisualStyleBackColor = true;
|
||||
@ -703,7 +715,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
this.checkBoxEmphasis1Transparent.AutoSize = true;
|
||||
this.checkBoxEmphasis1Transparent.Location = new System.Drawing.Point(304, 19);
|
||||
this.checkBoxEmphasis1Transparent.Name = "checkBoxEmphasis1Transparent";
|
||||
this.checkBoxEmphasis1Transparent.Size = new System.Drawing.Size(85, 17);
|
||||
this.checkBoxEmphasis1Transparent.Size = new System.Drawing.Size(83, 17);
|
||||
this.checkBoxEmphasis1Transparent.TabIndex = 5;
|
||||
this.checkBoxEmphasis1Transparent.Text = "Transparent";
|
||||
this.checkBoxEmphasis1Transparent.UseVisualStyleBackColor = true;
|
||||
@ -714,7 +726,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
this.checkBoxPatternTransparent.AutoSize = true;
|
||||
this.checkBoxPatternTransparent.Location = new System.Drawing.Point(167, 19);
|
||||
this.checkBoxPatternTransparent.Name = "checkBoxPatternTransparent";
|
||||
this.checkBoxPatternTransparent.Size = new System.Drawing.Size(85, 17);
|
||||
this.checkBoxPatternTransparent.Size = new System.Drawing.Size(83, 17);
|
||||
this.checkBoxPatternTransparent.TabIndex = 4;
|
||||
this.checkBoxPatternTransparent.Text = "Transparent";
|
||||
this.checkBoxPatternTransparent.UseVisualStyleBackColor = true;
|
||||
@ -799,19 +811,6 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
this.checkBoxUseTimeCodesFromIdx.UseVisualStyleBackColor = true;
|
||||
this.checkBoxUseTimeCodesFromIdx.CheckedChanged += new System.EventHandler(this.checkBoxUseTimeCodesFromIdx_CheckedChanged);
|
||||
//
|
||||
// comboBoxDictionaries
|
||||
//
|
||||
this.comboBoxDictionaries.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
|
||||
| System.Windows.Forms.AnchorStyles.Right)));
|
||||
this.comboBoxDictionaries.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
|
||||
this.comboBoxDictionaries.FormattingEnabled = true;
|
||||
this.comboBoxDictionaries.Location = new System.Drawing.Point(127, 15);
|
||||
this.comboBoxDictionaries.Name = "comboBoxDictionaries";
|
||||
this.comboBoxDictionaries.Size = new System.Drawing.Size(171, 21);
|
||||
this.comboBoxDictionaries.TabIndex = 41;
|
||||
this.comboBoxDictionaries.Visible = false;
|
||||
this.comboBoxDictionaries.SelectedIndexChanged += new System.EventHandler(this.comboBoxDictionaries_SelectedIndexChanged);
|
||||
//
|
||||
// subtitleListView1
|
||||
//
|
||||
this.subtitleListView1.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
|
||||
|
@ -145,8 +145,10 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
groupBoxSubtitleImage.Text = string.Empty;
|
||||
labelFixesMade.Text = string.Empty;
|
||||
labelFixesMade.Left = checkBoxAutoFixCommonErrors.Left + checkBoxAutoFixCommonErrors.Width;
|
||||
labelDictionaryLoaded.Text = string.Empty;
|
||||
comboBoxDictionaries.Visible = false;
|
||||
|
||||
labelDictionaryLoaded.Text = string.Format(Configuration.Settings.Language.VobSubOcr.DictionaryX, string.Empty);
|
||||
comboBoxDictionaries.Left = labelDictionaryLoaded.Left + labelDictionaryLoaded.Width;
|
||||
|
||||
groupBoxImageCompareMethod.Text = language.OcrViaImageCompare;
|
||||
groupBoxModiMethod.Text = language.OcrViaModi;
|
||||
checkBoxAutoFixCommonErrors.Text = language.FixOcrErrors;
|
||||
@ -1272,54 +1274,13 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
private string OcrViaTessnet(Bitmap bitmap, int index)
|
||||
{
|
||||
if (_ocrFixEngine == null)
|
||||
{
|
||||
_languageId = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Id;
|
||||
_ocrFixEngine = new OcrFixEngine(_languageId, this);
|
||||
if (_ocrFixEngine.IsDictionaryLoaded)
|
||||
{
|
||||
labelDictionaryLoaded.Text = string.Format(Configuration.Settings.Language.VobSubOcr.DictionaryX, string.Empty); // _ocrFixEngine.DictionaryCulture.NativeName);
|
||||
|
||||
string loadedDictionaryName = _ocrFixEngine.SpellCheckDictionaryName;
|
||||
int i = 0;
|
||||
comboBoxDictionaries.SelectedIndexChanged -= comboBoxDictionaries_SelectedIndexChanged;
|
||||
foreach (string item in comboBoxDictionaries.Items)
|
||||
{
|
||||
if (item.Contains("[" + loadedDictionaryName + "]"))
|
||||
comboBoxDictionaries.SelectedIndex = i;
|
||||
i++;
|
||||
}
|
||||
comboBoxDictionaries.SelectedIndexChanged += comboBoxDictionaries_SelectedIndexChanged;
|
||||
comboBoxDictionaries.Left = labelDictionaryLoaded.Left + labelDictionaryLoaded.Width;
|
||||
comboBoxDictionaries.Width = groupBoxOcrAutoFix.Width - (comboBoxDictionaries.Left + 5);
|
||||
comboBoxDictionaries.Visible = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
labelDictionaryLoaded.Text = string.Format(Configuration.Settings.Language.VobSubOcr.DictionaryX, Configuration.Settings.Language.General.None);
|
||||
comboBoxDictionaries.SelectedIndex = 0;
|
||||
}
|
||||
|
||||
if (_modiEnabled && checkBoxUseModiInTesseractForUnknownWords.Checked)
|
||||
{
|
||||
string tesseractLanguageText = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Text;
|
||||
int i = 0;
|
||||
foreach (var modiLanguage in comboBoxModiLanguage.Items)
|
||||
{
|
||||
if ((modiLanguage as ModiLanguage).Text == tesseractLanguageText)
|
||||
comboBoxModiLanguage.SelectedIndex = i;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
comboBoxModiLanguage.SelectedIndex = -1;
|
||||
}
|
||||
LoadOcrFixEngine();
|
||||
|
||||
int badWords = 0;
|
||||
string textWithOutFixes = Tesseract3DoOcrViaExe(bitmap, _languageId);
|
||||
|
||||
if (textWithOutFixes.ToString().Trim().Length == 0)
|
||||
{
|
||||
textWithOutFixes = TesseractResizeAndRetry(bitmap);
|
||||
}
|
||||
|
||||
int numberOfWords = textWithOutFixes.ToString().Split((" " + Environment.NewLine).ToCharArray(), StringSplitOptions.RemoveEmptyEntries).Length;
|
||||
|
||||
@ -1344,10 +1305,11 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
}
|
||||
}
|
||||
|
||||
if (wordsNotFound > 0 || correctWords == 0 || textWithOutFixes.ToString().Replace("~", string.Empty).Trim().Length == 0)
|
||||
if (wordsNotFound > 0 || correctWords == 0 || textWithOutFixes != null && textWithOutFixes.ToString().Replace("~", string.Empty).Trim().Length == 0)
|
||||
{
|
||||
_ocrFixEngine.AutoGuessesUsed.Clear();
|
||||
_ocrFixEngine.UnknownWordsFound.Clear();
|
||||
|
||||
if (_modiEnabled && checkBoxUseModiInTesseractForUnknownWords.Checked)
|
||||
{
|
||||
// which is best - modi or tesseract - we find out here
|
||||
@ -1726,6 +1688,45 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
{
|
||||
Configuration.Settings.VobSubOcr.TesseractLastLanguage = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Id;
|
||||
_ocrFixEngine = null;
|
||||
LoadOcrFixEngine();
|
||||
}
|
||||
|
||||
private void LoadOcrFixEngine()
|
||||
{
|
||||
_languageId = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Id;
|
||||
_ocrFixEngine = new OcrFixEngine(_languageId, this);
|
||||
if (_ocrFixEngine.IsDictionaryLoaded)
|
||||
{
|
||||
string loadedDictionaryName = _ocrFixEngine.SpellCheckDictionaryName;
|
||||
int i = 0;
|
||||
comboBoxDictionaries.SelectedIndexChanged -= comboBoxDictionaries_SelectedIndexChanged;
|
||||
foreach (string item in comboBoxDictionaries.Items)
|
||||
{
|
||||
if (item.Contains("[" + loadedDictionaryName + "]"))
|
||||
comboBoxDictionaries.SelectedIndex = i;
|
||||
i++;
|
||||
}
|
||||
comboBoxDictionaries.SelectedIndexChanged += comboBoxDictionaries_SelectedIndexChanged;
|
||||
comboBoxDictionaries.Left = labelDictionaryLoaded.Left + labelDictionaryLoaded.Width;
|
||||
comboBoxDictionaries.Width = groupBoxOcrAutoFix.Width - (comboBoxDictionaries.Left + 5);
|
||||
}
|
||||
else
|
||||
{
|
||||
comboBoxDictionaries.SelectedIndex = 0;
|
||||
}
|
||||
|
||||
if (_modiEnabled && checkBoxUseModiInTesseractForUnknownWords.Checked)
|
||||
{
|
||||
string tesseractLanguageText = (comboBoxTesseractLanguages.SelectedItem as TesseractLanguage).Text;
|
||||
int i = 0;
|
||||
foreach (var modiLanguage in comboBoxModiLanguage.Items)
|
||||
{
|
||||
if ((modiLanguage as ModiLanguage).Text == tesseractLanguageText)
|
||||
comboBoxModiLanguage.SelectedIndex = i;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
comboBoxModiLanguage.SelectedIndex = -1;
|
||||
}
|
||||
|
||||
private void ComboBoxOcrMethodSelectedIndexChanged(object sender, EventArgs e)
|
||||
|
@ -698,12 +698,7 @@ namespace Nikse.SubtitleEdit.Logic.OCR
|
||||
{
|
||||
if (newText.Contains(from))
|
||||
{
|
||||
var regex = new Regex(@"\b" + from + @"\b");
|
||||
Match match = regex.Match(newText);
|
||||
if (match.Success)
|
||||
{
|
||||
newText = newText.Remove(match.Index, match.Value.Length).Insert(match.Index, _partialLineReplaceList[from]);
|
||||
}
|
||||
newText = ReplaceWord(newText, from, _partialLineReplaceList[from]);
|
||||
}
|
||||
}
|
||||
return newText;
|
||||
@ -724,7 +719,6 @@ namespace Nikse.SubtitleEdit.Logic.OCR
|
||||
bool correct = _hunspell.Spell(word);
|
||||
if (!correct)
|
||||
correct = _hunspell.Spell(word.Trim('\''));
|
||||
|
||||
if (!correct)
|
||||
{
|
||||
wordsNotFound++;
|
||||
@ -734,10 +728,13 @@ namespace Nikse.SubtitleEdit.Logic.OCR
|
||||
if (autoFix && useAutoGuess)
|
||||
{
|
||||
List<string> guesses = new List<string>();
|
||||
|
||||
if (word.Length > 5)
|
||||
{
|
||||
guesses = (List<string>)CreateGuessesFromLetters(word);
|
||||
|
||||
string wordWithCasingChanged = GetWordWithDominatedCasing(word);
|
||||
if (_hunspell.Spell(word.ToLower()))
|
||||
guesses.Insert(0, wordWithCasingChanged);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -755,14 +752,14 @@ namespace Nikse.SubtitleEdit.Logic.OCR
|
||||
{
|
||||
if (IsWordOrWordsCorrect(_hunspell, guess))
|
||||
{
|
||||
var regex = new Regex(@"\b" + word + @"\b");
|
||||
Match match = regex.Match(line);
|
||||
if (match.Success)
|
||||
string replacedLine = ReplaceWord(line, word, guess);
|
||||
if (replacedLine != line)
|
||||
{
|
||||
if (log)
|
||||
AutoGuessesUsed.Add(string.Format("#{0}: {1} -> {2} in line via '{3}': {4}", index + 1, word, guess, "OCRFixReplaceList.xml", line.Replace(Environment.NewLine, " ")));
|
||||
|
||||
line = line.Remove(match.Index, match.Value.Length).Insert(match.Index, guess);
|
||||
//line = line.Remove(match.Index, match.Value.Length).Insert(match.Index, guess);
|
||||
line = replacedLine;
|
||||
wordsNotFound--;
|
||||
correct = true;
|
||||
break;
|
||||
@ -772,7 +769,11 @@ namespace Nikse.SubtitleEdit.Logic.OCR
|
||||
}
|
||||
if (!correct && promptForFixingErrors)
|
||||
{
|
||||
List<string> suggestions = _hunspell.Suggest(word);
|
||||
List<string> suggestions = new List<string>();
|
||||
|
||||
if (word.Length > 4 || !word.Contains("'")) //TODO: get fixed nhunspell
|
||||
suggestions = _hunspell.Suggest(word); // 0.9.6 fails on "Lt'S"
|
||||
|
||||
SpellcheckOcrTextResult res = SpellcheckOcrText(line, bitmap, words, i, word, suggestions);
|
||||
if (res.FixedWholeLine)
|
||||
{
|
||||
@ -790,6 +791,25 @@ namespace Nikse.SubtitleEdit.Logic.OCR
|
||||
return line;
|
||||
}
|
||||
|
||||
private string GetWordWithDominatedCasing(string word)
|
||||
{
|
||||
string uppercaseLetters = Utilities.GetLetters(true, false, false);
|
||||
string lowercaseLetters = Utilities.GetLetters(false, true, false);
|
||||
int lowercase = 0;
|
||||
int uppercase = 0;
|
||||
for (int i = 0; i < word.Length; i++)
|
||||
{
|
||||
if (lowercaseLetters.Contains(word.Substring(i, 1)))
|
||||
lowercase++;
|
||||
else if (uppercaseLetters.Contains(word.Substring(i, 1)))
|
||||
uppercase++;
|
||||
}
|
||||
if (uppercase > lowercase)
|
||||
return word.ToUpper();
|
||||
else
|
||||
return word.ToLower();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Spellcheck for ocr
|
||||
/// </summary>
|
||||
@ -874,44 +894,43 @@ namespace Nikse.SubtitleEdit.Logic.OCR
|
||||
}
|
||||
if (result.Fixed)
|
||||
{
|
||||
var regEx = Utilities.MakeWordSearchRegex(word);
|
||||
Match match = regEx.Match(line);
|
||||
if (match.Success)
|
||||
{
|
||||
result.Line = line.Remove(match.Index, word.Length).Insert(match.Index, result.Word);
|
||||
}
|
||||
else // some word containing a number or other strange character
|
||||
{
|
||||
if (line.EndsWith(" " + word))
|
||||
{
|
||||
result.Line = line.Substring(0, line.Length - word.Length) + result.Word;
|
||||
}
|
||||
else if (line.StartsWith(word + " ") || line.StartsWith(word + ",") || line.StartsWith(word + "."))
|
||||
{
|
||||
result.Line = result.Word + line.Substring(word.Length);
|
||||
}
|
||||
else
|
||||
{
|
||||
regEx = Utilities.MakeWordSearchRegexWithNumbers(word);
|
||||
match = regEx.Match(line);
|
||||
if (match.Success)
|
||||
{
|
||||
int startIndex = match.Index;
|
||||
if (match.Value.StartsWith(" "))
|
||||
startIndex++;
|
||||
result.Line = line.Remove(startIndex, word.Length).Insert(startIndex, result.Word);
|
||||
}
|
||||
else
|
||||
{
|
||||
result.Fixed = false;
|
||||
MessageBox.Show("Unable to find word via regex: " + word);
|
||||
}
|
||||
}
|
||||
}
|
||||
result.Line = ReplaceWord(line, word, result.Word);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private string ReplaceWord(string text, string word, string newWord)
|
||||
{
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if (word != null && text != null && text.Contains(word))
|
||||
{
|
||||
int appendFrom = 0;
|
||||
for (int i = 0; i < text.Length; i++)
|
||||
{
|
||||
if (text.Substring(i).StartsWith(word) && i >= appendFrom)
|
||||
{
|
||||
bool startOk = i == 0;
|
||||
if (!startOk)
|
||||
startOk = (" <>-\"”“[]'‘`´¶()♪¿¡.…—!?,:;/" + Environment.NewLine).Contains(text.Substring(i - 1, 1));
|
||||
if (startOk)
|
||||
{
|
||||
bool endOK = (i + word.Length == text.Length);
|
||||
if (!endOK)
|
||||
endOK = (" <>-\"”“[]'‘`´¶()♪¿¡.…—!?,:;/" + Environment.NewLine).Contains(text.Substring(i + word.Length, 1));
|
||||
if (endOK)
|
||||
{
|
||||
sb.Append(newWord);
|
||||
appendFrom = i + word.Length;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (i >= appendFrom)
|
||||
sb.Append(text.Substring(i, 1));
|
||||
}
|
||||
}
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
private void SaveWordToWordList(string word)
|
||||
{
|
||||
try
|
||||
|
@ -37,7 +37,7 @@ namespace Nikse.SubtitleEdit.Logic.SubtitleFormats
|
||||
|
||||
public override string Name
|
||||
{
|
||||
get { return "D-Cinema Subtitle"; }
|
||||
get { return "D-Cinema"; }
|
||||
}
|
||||
|
||||
public override bool HasLineNumber
|
||||
|
@ -43,16 +43,6 @@ namespace Nikse.SubtitleEdit.Logic.SubtitleFormats
|
||||
|
||||
public override string ToText(Subtitle subtitle, string title)
|
||||
{
|
||||
|
||||
|
||||
const string paragraphWriteFormat = "{0:00}:{1:00}:{2:00}.{3:00}, {4:00}:{5:00}:{6:00}.{7:00}{8}{9}";
|
||||
|
||||
//00:00:07.00, 00:00:12.00
|
||||
//Welche Auswirkung Mikroversicherungen auf unsere Klienten hat? Lassen wir sie für sich selber sprechen!
|
||||
//
|
||||
//00:00:22.00, 00:00:27.00
|
||||
//Arme Menschen in Uganda leben oft in schlechten Unterkünften.
|
||||
|
||||
var sb = new StringBuilder();
|
||||
|
||||
sb.AppendLine(" " + subtitle.Paragraphs.Count.ToString() + " 4 1234 ");
|
||||
@ -95,7 +85,7 @@ SRPSKI
|
||||
"{2}" + Environment.NewLine +
|
||||
"{3}", p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds, firstLine, secondLine));
|
||||
}
|
||||
return sb.ToString().Trim(); //.Replace(Environment.NewLine, "\n");
|
||||
return sb.ToString().Trim();
|
||||
}
|
||||
|
||||
private int RoundTo2Cifres(int milliseconds)
|
||||
|
@ -1062,6 +1062,7 @@ namespace Nikse.SubtitleEdit.Logic
|
||||
}
|
||||
}
|
||||
sb.Append("*" + new Pac().Extension + ";");
|
||||
sb.Append("*" + new Cavena890().Extension + ";");
|
||||
sb.Append("*.sup");
|
||||
sb.Append("|" + Configuration.Settings.Language.General.AllFiles + "|*.*");
|
||||
return sb.ToString();
|
||||
|
@ -517,6 +517,7 @@
|
||||
<Compile Include="Logic\StripableText.cs" />
|
||||
<Compile Include="Logic\SubtitleFormats\AdobeEncore.cs" />
|
||||
<Compile Include="Logic\SubtitleFormats\AdobeEncoreTabs.cs" />
|
||||
<Compile Include="Logic\SubtitleFormats\Cavena890.cs" />
|
||||
<Compile Include="Logic\SubtitleFormats\DCSubtitle.cs" />
|
||||
<Compile Include="Logic\SubtitleFormats\FinalCutProTextXml.cs" />
|
||||
<Compile Include="Logic\SubtitleFormats\FinalCutProXml.cs" />
|
||||
|
Loading…
Reference in New Issue
Block a user