Work on OCR

This commit is contained in:
Nikolaj Olsson 2020-06-12 07:48:32 +02:00
parent 78113946b8
commit 94754fc3de
10 changed files with 84 additions and 25 deletions

View File

@ -635,7 +635,15 @@
<WordPart from="í" to="i" />
</PartialWords>
<WholeLines />
<PartialLinesAlways />
<PartialLinesAlways>
<LinePart from="Apollo 1 3" to="Apollo 13" />
<LinePart from=",.," to="..." />
<LinePart from=" l " to=" I " />
<LinePart from=" l." to=" I." />
<LinePart from=" l?" to=" I?" />
<LinePart from=" l!" to=" I!" />
<LinePart from=" . " to=". " />
</PartialLinesAlways>
<PartialLines />
<BeginLines>
<Beginning from="-] " to="- I " />
@ -643,7 +651,12 @@
<Beginning from="] " to="I " />
<Beginning from="-| " to="- I " />
<Beginning from="- | " to="- I " />
<Beginning from="l " to="I " />
<Beginning from="- l " to="- I " />
</BeginLines>
<EndLines />
<RegularExpressions />
<RegularExpressions>
<RegEx find="\b\|\b" replaceWith="I" />
<RegEx find="\b\l\b" replaceWith="I" />
</RegularExpressions>
</OCRFixReplaceList>

View File

@ -32,6 +32,7 @@
this.buttonOK = new System.Windows.Forms.Button();
this.buttonCancel = new System.Windows.Forms.Button();
this.groupBoxInspectItems = new System.Windows.Forms.GroupBox();
this.labelExpandCount = new System.Windows.Forms.Label();
this.labelImageSize = new System.Windows.Forms.Label();
this.pictureBoxInspectItem = new System.Windows.Forms.PictureBox();
this.listBoxInspectItems = new System.Windows.Forms.ListBox();
@ -167,7 +168,6 @@
this.toolStripMenuItemMusicSymbol1 = new System.Windows.Forms.ToolStripMenuItem();
this.toolStripMenuItemMusicSymbol2 = new System.Windows.Forms.ToolStripMenuItem();
this.labelStatus = new System.Windows.Forms.Label();
this.labelExpandCount = new System.Windows.Forms.Label();
this.groupBoxInspectItems.SuspendLayout();
((System.ComponentModel.ISupportInitialize)(this.pictureBoxInspectItem)).BeginInit();
this.contextMenuStripAddBetterMultiMatch.SuspendLayout();
@ -215,6 +215,15 @@
this.groupBoxInspectItems.TabStop = false;
this.groupBoxInspectItems.Text = "Inspect items";
//
// labelExpandCount
//
this.labelExpandCount.AutoSize = true;
this.labelExpandCount.Location = new System.Drawing.Point(252, 96);
this.labelExpandCount.Name = "labelExpandCount";
this.labelExpandCount.Size = new System.Drawing.Size(93, 13);
this.labelExpandCount.TabIndex = 32;
this.labelExpandCount.Text = "labelExpandCount";
//
// labelImageSize
//
this.labelImageSize.AutoSize = true;
@ -353,6 +362,7 @@
this.checkBoxItalic.TabIndex = 2;
this.checkBoxItalic.Text = "Is &italic";
this.checkBoxItalic.UseVisualStyleBackColor = true;
this.checkBoxItalic.CheckedChanged += new System.EventHandler(this.checkBoxItalic_CheckedChanged);
//
// labelTextAssociatedWithImage
//
@ -388,7 +398,7 @@
// textBoxText
//
this.textBoxText.ContextMenuStrip = this.contextMenuStripLetters;
this.textBoxText.Font = new System.Drawing.Font("Tahoma", 9.75F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.textBoxText.Font = new System.Drawing.Font("Tahoma", 9.75F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.textBoxText.Location = new System.Drawing.Point(14, 35);
this.textBoxText.Name = "textBoxText";
this.textBoxText.Size = new System.Drawing.Size(100, 23);
@ -1229,15 +1239,6 @@
this.labelStatus.TabIndex = 38;
this.labelStatus.Text = "labelStatus";
//
// labelExpandCount
//
this.labelExpandCount.AutoSize = true;
this.labelExpandCount.Location = new System.Drawing.Point(252, 96);
this.labelExpandCount.Name = "labelExpandCount";
this.labelExpandCount.Size = new System.Drawing.Size(93, 13);
this.labelExpandCount.TabIndex = 32;
this.labelExpandCount.Text = "labelExpandCount";
//
// VobSubNOcrCharacterInspect
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);

View File

@ -422,5 +422,19 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
DialogResult = DialogResult.Cancel;
}
}
private void checkBoxItalic_CheckedChanged(object sender, EventArgs e)
{
if (checkBoxItalic.Checked)
{
labelTextAssociatedWithImage.Font = new Font(labelTextAssociatedWithImage.Font.FontFamily, labelTextAssociatedWithImage.Font.Size, FontStyle.Italic);
textBoxText.Font = new Font(textBoxText.Font.FontFamily, textBoxText.Font.Size, FontStyle.Italic | FontStyle.Bold);
}
else
{
labelTextAssociatedWithImage.Font = new Font(labelTextAssociatedWithImage.Font.FontFamily, labelTextAssociatedWithImage.Font.Size);
textBoxText.Font = new Font(textBoxText.Font.FontFamily, textBoxText.Font.Size, FontStyle.Bold);
}
}
}
}

View File

@ -305,7 +305,7 @@
// checkBoxItalic
//
this.checkBoxItalic.AutoSize = true;
this.checkBoxItalic.Location = new System.Drawing.Point(15, 61);
this.checkBoxItalic.Location = new System.Drawing.Point(15, 62);
this.checkBoxItalic.Name = "checkBoxItalic";
this.checkBoxItalic.Size = new System.Drawing.Size(58, 17);
this.checkBoxItalic.TabIndex = 2;
@ -335,9 +335,10 @@
//
// textBoxText
//
this.textBoxText.Font = new System.Drawing.Font("Tahoma", 9.75F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.textBoxText.Location = new System.Drawing.Point(14, 35);
this.textBoxText.Name = "textBoxText";
this.textBoxText.Size = new System.Drawing.Size(87, 20);
this.textBoxText.Size = new System.Drawing.Size(87, 23);
this.textBoxText.TabIndex = 1;
this.textBoxText.TextChanged += new System.EventHandler(this.textBoxText_TextChanged);
//

View File

@ -471,6 +471,17 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
_nOcrChar.Italic = checkBoxItalic.Checked;
Changed = true;
}
if (checkBoxItalic.Checked)
{
labelTextAssociatedWithImage.Font = new Font(labelTextAssociatedWithImage.Font.FontFamily, labelTextAssociatedWithImage.Font.Size, FontStyle.Italic);
textBoxText.Font = new Font(textBoxText.Font.FontFamily, textBoxText.Font.Size, FontStyle.Italic | FontStyle.Bold);
}
else
{
labelTextAssociatedWithImage.Font = new Font(labelTextAssociatedWithImage.Font.FontFamily, labelTextAssociatedWithImage.Font.Size);
textBoxText.Font = new Font(textBoxText.Font.FontFamily, textBoxText.Font.Size, FontStyle.Bold);
}
}
private void textBoxText_TextChanged(object sender, EventArgs e)

View File

@ -231,6 +231,7 @@
this.checkBoxItalic.TabIndex = 2;
this.checkBoxItalic.Text = "Is &italic";
this.checkBoxItalic.UseVisualStyleBackColor = true;
this.checkBoxItalic.CheckedChanged += new System.EventHandler(this.checkBoxItalic_CheckedChanged);
//
// labelDoubleSize
//
@ -294,7 +295,7 @@
// textBoxText
//
this.textBoxText.ContextMenuStrip = this.contextMenuStripLetters;
this.textBoxText.Font = new System.Drawing.Font("Tahoma", 9.75F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.textBoxText.Font = new System.Drawing.Font("Tahoma", 9.75F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.textBoxText.Location = new System.Drawing.Point(14, 35);
this.textBoxText.Name = "textBoxText";
this.textBoxText.Size = new System.Drawing.Size(100, 23);

View File

@ -627,5 +627,19 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
}
}
}
private void checkBoxItalic_CheckedChanged(object sender, EventArgs e)
{
if (checkBoxItalic.Checked)
{
labelTextAssociatedWithImage.Font = new Font(labelTextAssociatedWithImage.Font.FontFamily, labelTextAssociatedWithImage.Font.Size, FontStyle.Italic);
textBoxText.Font = new Font(textBoxText.Font.FontFamily, textBoxText.Font.Size, FontStyle.Italic | FontStyle.Bold);
}
else
{
labelTextAssociatedWithImage.Font = new Font(labelTextAssociatedWithImage.Font.FontFamily, labelTextAssociatedWithImage.Font.Size);
textBoxText.Font = new Font(textBoxText.Font.FontFamily, textBoxText.Font.Size, FontStyle.Bold);
}
}
}
}

View File

@ -265,10 +265,10 @@
// textBoxCharacters
//
this.textBoxCharacters.ContextMenuStrip = this.contextMenuStripLetters;
this.textBoxCharacters.Font = new System.Drawing.Font("Tahoma", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.textBoxCharacters.Font = new System.Drawing.Font("Tahoma", 9.75F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.textBoxCharacters.Location = new System.Drawing.Point(188, 58);
this.textBoxCharacters.Name = "textBoxCharacters";
this.textBoxCharacters.Size = new System.Drawing.Size(107, 21);
this.textBoxCharacters.Size = new System.Drawing.Size(107, 23);
this.textBoxCharacters.TabIndex = 22;
this.textBoxCharacters.KeyDown += new System.Windows.Forms.KeyEventHandler(this.textBoxCharacters_KeyDown);
this.textBoxCharacters.KeyUp += new System.Windows.Forms.KeyEventHandler(this.textBoxCharacters_KeyUp);
@ -1213,7 +1213,7 @@
//
this.checkBoxAutoSubmitOfFirstChar.AutoSize = true;
this.checkBoxAutoSubmitOfFirstChar.Font = new System.Drawing.Font("Tahoma", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.checkBoxAutoSubmitOfFirstChar.Location = new System.Drawing.Point(188, 84);
this.checkBoxAutoSubmitOfFirstChar.Location = new System.Drawing.Point(188, 87);
this.checkBoxAutoSubmitOfFirstChar.Name = "checkBoxAutoSubmitOfFirstChar";
this.checkBoxAutoSubmitOfFirstChar.Size = new System.Drawing.Size(144, 17);
this.checkBoxAutoSubmitOfFirstChar.TabIndex = 37;
@ -1341,7 +1341,7 @@
this.listBoxlinesBackground.Name = "listBoxlinesBackground";
this.listBoxlinesBackground.Size = new System.Drawing.Size(151, 95);
this.listBoxlinesBackground.TabIndex = 19;
this.listBoxlinesBackground.SelectedIndexChanged += new System.EventHandler(this.listBoxlinesBackground_SelectedIndexChanged);
this.listBoxlinesBackground.SelectedIndexChanged += new System.EventHandler(this.listBoxLinesBackground_SelectedIndexChanged);
this.listBoxlinesBackground.KeyDown += new System.Windows.Forms.KeyEventHandler(this.listBoxLinesBackground_KeyDown);
//
// contextMenuStripLinesBackground

View File

@ -905,13 +905,13 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
if (checkBoxItalic.Checked)
{
labelCharactersAsText.Font = new Font(labelCharactersAsText.Font.FontFamily, labelCharactersAsText.Font.Size, FontStyle.Italic);
textBoxCharacters.Font = new Font(textBoxCharacters.Font.FontFamily, textBoxCharacters.Font.Size, FontStyle.Italic);
textBoxCharacters.Font = new Font(textBoxCharacters.Font.FontFamily, textBoxCharacters.Font.Size, FontStyle.Italic | FontStyle.Bold);
labelItalicOn.Visible = true;
}
else
{
labelCharactersAsText.Font = new Font(labelCharactersAsText.Font.FontFamily, labelCharactersAsText.Font.Size);
textBoxCharacters.Font = new Font(textBoxCharacters.Font.FontFamily, textBoxCharacters.Font.Size);
textBoxCharacters.Font = new Font(textBoxCharacters.Font.FontFamily, textBoxCharacters.Font.Size, FontStyle.Bold);
labelItalicOn.Visible = false;
}
}
@ -921,7 +921,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
pictureBoxCharacter.Invalidate();
}
private void listBoxlinesBackground_SelectedIndexChanged(object sender, EventArgs e)
private void listBoxLinesBackground_SelectedIndexChanged(object sender, EventArgs e)
{
pictureBoxCharacter.Invalidate();
}

View File

@ -1204,7 +1204,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
{
lastLine = HtmlUtil.RemoveHtmlTags(lastLine);
var st = new StrippableText(text);
if (lastLine == null || (!lastLine.EndsWith("...", StringComparison.Ordinal) && !EndsWithAbbreviation(lastLine, abbreviationList)))
if (lastLine == null || !lastLine.EndsWith("...", StringComparison.Ordinal) && !EndsWithAbbreviation(lastLine, abbreviationList))
{
if (st.StrippedText.Length > 0 && !char.IsUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.EndsWith('(') &&
!st.Pre.Contains("...", StringComparison.Ordinal) &&
@ -1213,7 +1213,11 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
if (!HtmlUtil.StartsWithUrl(st.StrippedText))
{
var uppercaseLetter = char.ToUpper(st.StrippedText[0]);
if (st.StrippedText.Length > 1 && uppercaseLetter == 'L' && @"abcdfghjklmnpqrstvwxz".Contains(st.StrippedText[1]))
if (st.StrippedText.Length > 1 && uppercaseLetter == 'L' && (st.StrippedText[1] == ' ' || char.IsLower(st.StrippedText[1])))
{
uppercaseLetter = 'I';
}
if (st.StrippedText.Length == 1 && uppercaseLetter == 'L')
{
uppercaseLetter = 'I';
}