Added new subtitle format + working on ocr

This commit is contained in:
nikse.dk 2014-02-27 17:02:10 +01:00
parent 4b2fd3e9bc
commit 79e77ebad3
9 changed files with 925 additions and 157 deletions

View File

@ -140,6 +140,8 @@ namespace Nikse.SubtitleEdit.Forms
this.checkBoxTransportStreamGrayscale = new System.Windows.Forms.CheckBox();
this.checkBoxAutoTransparentBackground = new System.Windows.Forms.CheckBox();
this.pictureBoxSubtitleImage = new System.Windows.Forms.PictureBox();
this.contextMenuStripImage = new System.Windows.Forms.ContextMenuStrip(this.components);
this.toolStripMenuItemImageSaveAs = new System.Windows.Forms.ToolStripMenuItem();
this.checkBoxShowOnlyForced = new System.Windows.Forms.CheckBox();
this.checkBoxUseTimeCodesFromIdx = new System.Windows.Forms.CheckBox();
this.folderBrowserDialog1 = new System.Windows.Forms.FolderBrowserDialog();
@ -148,8 +150,6 @@ namespace Nikse.SubtitleEdit.Forms
this.textBoxCurrentText = new Nikse.SubtitleEdit.Controls.SETextBox();
this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView();
this.timerHideStatus = new System.Windows.Forms.Timer(this.components);
this.contextMenuStripImage = new System.Windows.Forms.ContextMenuStrip(this.components);
this.toolStripMenuItemImageSaveAs = new System.Windows.Forms.ToolStripMenuItem();
this.contextMenuStripListview.SuspendLayout();
this.groupBoxOcrMethod.SuspendLayout();
this.groupBoxImageCompareMethod.SuspendLayout();
@ -178,10 +178,10 @@ namespace Nikse.SubtitleEdit.Forms
((System.ComponentModel.ISupportInitialize)(this.numericUpDownAutoTransparentAlphaMax)).BeginInit();
this.groupBoxTransportStream.SuspendLayout();
((System.ComponentModel.ISupportInitialize)(this.pictureBoxSubtitleImage)).BeginInit();
this.contextMenuStripImage.SuspendLayout();
this.splitContainerBottom.Panel1.SuspendLayout();
this.splitContainerBottom.Panel2.SuspendLayout();
this.splitContainerBottom.SuspendLayout();
this.contextMenuStripImage.SuspendLayout();
this.SuspendLayout();
//
// contextMenuStripListview
@ -395,11 +395,11 @@ namespace Nikse.SubtitleEdit.Forms
//
// groupBoxOcrMethod
//
this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod);
this.groupBoxOcrMethod.Controls.Add(this.comboBoxOcrMethod);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod);
this.groupBoxOcrMethod.Controls.Add(this.GroupBoxTesseractMethod);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxModiMethod);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR);
this.groupBoxOcrMethod.Location = new System.Drawing.Point(13, 5);
this.groupBoxOcrMethod.Name = "groupBoxOcrMethod";
this.groupBoxOcrMethod.Size = new System.Drawing.Size(392, 192);
@ -1374,6 +1374,20 @@ namespace Nikse.SubtitleEdit.Forms
this.pictureBoxSubtitleImage.TabIndex = 3;
this.pictureBoxSubtitleImage.TabStop = false;
//
// contextMenuStripImage
//
this.contextMenuStripImage.Items.AddRange(new System.Windows.Forms.ToolStripItem[] {
this.toolStripMenuItemImageSaveAs});
this.contextMenuStripImage.Name = "contextMenuStripUnknownWords";
this.contextMenuStripImage.Size = new System.Drawing.Size(158, 26);
//
// toolStripMenuItemImageSaveAs
//
this.toolStripMenuItemImageSaveAs.Name = "toolStripMenuItemImageSaveAs";
this.toolStripMenuItemImageSaveAs.Size = new System.Drawing.Size(157, 22);
this.toolStripMenuItemImageSaveAs.Text = "Save image as...";
this.toolStripMenuItemImageSaveAs.Click += new System.EventHandler(this.toolStripMenuItemImageSaveAs_Click);
//
// checkBoxShowOnlyForced
//
this.checkBoxShowOnlyForced.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right)));
@ -1458,6 +1472,9 @@ namespace Nikse.SubtitleEdit.Forms
this.subtitleListView1.Name = "subtitleListView1";
this.subtitleListView1.OwnerDraw = true;
this.subtitleListView1.Size = new System.Drawing.Size(631, 183);
this.subtitleListView1.SubtitleFontBold = false;
this.subtitleListView1.SubtitleFontName = "Tahoma";
this.subtitleListView1.SubtitleFontSize = 8;
this.subtitleListView1.TabIndex = 0;
this.subtitleListView1.UseCompatibleStateImageBehavior = false;
this.subtitleListView1.UseSyntaxColoring = true;
@ -1470,20 +1487,6 @@ namespace Nikse.SubtitleEdit.Forms
this.timerHideStatus.Interval = 2000;
this.timerHideStatus.Tick += new System.EventHandler(this.timerHideStatus_Tick);
//
// contextMenuStripImage
//
this.contextMenuStripImage.Items.AddRange(new System.Windows.Forms.ToolStripItem[] {
this.toolStripMenuItemImageSaveAs});
this.contextMenuStripImage.Name = "contextMenuStripUnknownWords";
this.contextMenuStripImage.Size = new System.Drawing.Size(158, 26);
//
// toolStripMenuItemImageSaveAs
//
this.toolStripMenuItemImageSaveAs.Name = "toolStripMenuItemImageSaveAs";
this.toolStripMenuItemImageSaveAs.Size = new System.Drawing.Size(157, 22);
this.toolStripMenuItemImageSaveAs.Text = "Save image as...";
this.toolStripMenuItemImageSaveAs.Click += new System.EventHandler(this.toolStripMenuItemImageSaveAs_Click);
//
// VobSubOcr
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
@ -1546,11 +1549,11 @@ namespace Nikse.SubtitleEdit.Forms
this.groupBoxTransportStream.ResumeLayout(false);
this.groupBoxTransportStream.PerformLayout();
((System.ComponentModel.ISupportInitialize)(this.pictureBoxSubtitleImage)).EndInit();
this.contextMenuStripImage.ResumeLayout(false);
this.splitContainerBottom.Panel1.ResumeLayout(false);
this.splitContainerBottom.Panel1.PerformLayout();
this.splitContainerBottom.Panel2.ResumeLayout(false);
this.splitContainerBottom.ResumeLayout(false);
this.contextMenuStripImage.ResumeLayout(false);
this.ResumeLayout(false);
this.PerformLayout();

View File

@ -1369,7 +1369,7 @@ namespace Nikse.SubtitleEdit.Forms
returnBmp.MakeTransparent();
}
if (_binaryOcrDb == null)
if (_binaryOcrDb == null && _nocrChars == null)
return returnBmp;
var n = new NikseBitmap(returnBmp);
@ -1785,128 +1785,128 @@ namespace Nikse.SubtitleEdit.Forms
}
}
// matches 2 or 3 pixels to the left
foreach (NOcrChar oc in nOcrChars)
{
if (!oc.IsSensitive)
{
if (Math.Abs(oc.WidthPercent - widthPercent) < 15 && oc.Width > 14 && oc.Height > 19 && nbmp.Width > 20 && nbmp.Height > 14 && Math.Abs(oc.MarginTop - topMargin) < nbmp.Height / 4)
{
bool ok = true;
index = 0;
while (index < oc.LinesForeground.Count && ok)
{
NOcrPoint op = oc.LinesForeground[index];
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height))
{
Point p = new Point(point.X - 2, point.Y);
Point p1 = new Point(point.X - 1, point.Y);
if (p.X >= 0 && p.Y >= 0 && p.X < nbmp.Width && p.Y < nbmp.Height && p1.X >= 0)
{
Color c = nbmp.GetPixel(p.X, p.Y);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
}
else
{
c = nbmp.GetPixel(p1.X, p1.Y);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
}
else
{
ok = false;
break;
}
}
}
}
index++;
}
index = 0;
while (index < oc.LinesBackground.Count && ok)
{
NOcrPoint op = oc.LinesBackground[index];
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height))
{
Point p = new Point(point.X - 2, point.Y);
Point p1 = new Point(point.X - 1, point.Y);
if (p.X >= 0 && p.Y >= 0 && p.X < nbmp.Width && point.Y < nbmp.Height && p1.X >= 0)
{
Color c = nbmp.GetPixel(p.X, p.Y);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
c = nbmp.GetPixel(p1.X, p1.Y);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
ok = false;
break;
}
}
}
}
index++;
}
if (ok)
return oc;
}
}
}
//// matches 2 or 3 pixels to the left
//foreach (NOcrChar oc in nOcrChars)
//{
// if (!oc.IsSensitive)
// {
// if (Math.Abs(oc.WidthPercent - widthPercent) < 15 && oc.Width > 14 && oc.Height > 19 && nbmp.Width > 20 && nbmp.Height > 14 && Math.Abs(oc.MarginTop - topMargin) < nbmp.Height / 4)
// {
// bool ok = true;
// index = 0;
// while (index < oc.LinesForeground.Count && ok)
// {
// NOcrPoint op = oc.LinesForeground[index];
// foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height))
// {
// Point p = new Point(point.X - 2, point.Y);
// Point p1 = new Point(point.X - 1, point.Y);
// if (p.X >= 0 && p.Y >= 0 && p.X < nbmp.Width && p.Y < nbmp.Height && p1.X >= 0)
// {
// Color c = nbmp.GetPixel(p.X, p.Y);
// if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
// {
// }
// else
// {
// c = nbmp.GetPixel(p1.X, p1.Y);
// if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
// {
// }
// else
// {
// ok = false;
// break;
// }
// }
// }
// }
// index++;
// }
// index = 0;
// while (index < oc.LinesBackground.Count && ok)
// {
// NOcrPoint op = oc.LinesBackground[index];
// foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height))
// {
// Point p = new Point(point.X - 2, point.Y);
// Point p1 = new Point(point.X - 1, point.Y);
// if (p.X >= 0 && p.Y >= 0 && p.X < nbmp.Width && point.Y < nbmp.Height && p1.X >= 0)
// {
// Color c = nbmp.GetPixel(p.X, p.Y);
// if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
// {
// c = nbmp.GetPixel(p1.X, p1.Y);
// if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
// {
// ok = false;
// break;
// }
// }
// }
// }
// index++;
// }
// if (ok)
// return oc;
// }
// }
//}
// matches 5 pixels lower
int yLower = 5;
widthPercent = (nbmp.Height - yLower) * 100.0 / nbmp.Width;
foreach (NOcrChar oc in nOcrChars)
{
if (!oc.IsSensitive)
{
if (Math.Abs(oc.WidthPercent - widthPercent) < 20 && oc.Width > 12 && oc.Height > 19 && nbmp.Width > 19 && nbmp.Height > 12 && Math.Abs(oc.MarginTop - topMargin) < 15)
{
bool ok = true;
index = 0;
while (index < oc.LinesForeground.Count && ok)
{
NOcrPoint op = oc.LinesForeground[index];
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height - yLower))
{
if (point.X >= 0 && point.Y + yLower >= 0 && point.X < nbmp.Width && point.Y + yLower < nbmp.Height)
{
Color c = nbmp.GetPixel(point.X, point.Y + yLower);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
}
else
{
ok = false;
break;
}
}
}
index++;
}
index = 0;
while (index < oc.LinesBackground.Count && ok)
{
NOcrPoint op = oc.LinesBackground[index];
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height - yLower))
{
if (point.X >= 0 && point.Y + yLower >= 0 && point.X < nbmp.Width && point.Y + yLower < nbmp.Height)
{
Color c = nbmp.GetPixel(point.X, point.Y + yLower);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
ok = false;
break;
}
}
}
index++;
}
if (ok)
return oc;
}
}
}
//// matches 5 pixels lower
//int yLower = 5;
//widthPercent = (nbmp.Height - yLower) * 100.0 / nbmp.Width;
//foreach (NOcrChar oc in nOcrChars)
//{
// if (!oc.IsSensitive)
// {
// if (Math.Abs(oc.WidthPercent - widthPercent) < 20 && oc.Width > 12 && oc.Height > 19 && nbmp.Width > 19 && nbmp.Height > 12 && Math.Abs(oc.MarginTop - topMargin) < 15)
// {
// bool ok = true;
// index = 0;
// while (index < oc.LinesForeground.Count && ok)
// {
// NOcrPoint op = oc.LinesForeground[index];
// foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height - yLower))
// {
// if (point.X >= 0 && point.Y + yLower >= 0 && point.X < nbmp.Width && point.Y + yLower < nbmp.Height)
// {
// Color c = nbmp.GetPixel(point.X, point.Y + yLower);
// if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
// {
// }
// else
// {
// ok = false;
// break;
// }
// }
// }
// index++;
// }
// index = 0;
// while (index < oc.LinesBackground.Count && ok)
// {
// NOcrPoint op = oc.LinesBackground[index];
// foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height - yLower))
// {
// if (point.X >= 0 && point.Y + yLower >= 0 && point.X < nbmp.Width && point.Y + yLower < nbmp.Height)
// {
// Color c = nbmp.GetPixel(point.X, point.Y + yLower);
// if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
// {
// ok = false;
// break;
// }
// }
// }
// index++;
// }
// if (ok)
// return oc;
// }
// }
//}
if (deepSeek) // if we do now draw then just try anything...
@ -2276,6 +2276,386 @@ namespace Nikse.SubtitleEdit.Forms
return null;
}
private static NOcrChar NOcrFindBestMatchNew(Bitmap parentBitmap, ImageSplitterItem targetItem, int topMargin, out bool italic, List<NOcrChar> nOcrChars, double unItalicFactor, bool tryItalicScaling, bool deepSeek)
{
italic = false;
var nbmp = targetItem.NikseBitmap;
int index = 0;
foreach (NOcrChar oc in nOcrChars)
{
if (Math.Abs(oc.Width - nbmp.Width) < 3 && Math.Abs(oc.Height - nbmp.Height) < 3 && Math.Abs(oc.MarginTop - topMargin) < 3)
{ // only very accurate matches
bool ok = true;
index = 0;
while (index < oc.LinesForeground.Count && ok)
{
NOcrPoint op = oc.LinesForeground[index];
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height))
{
if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height)
{
Color c = nbmp.GetPixel(point.X, point.Y);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
}
else
{
Point p = new Point(point.X - 1, point.Y);
if (p.X < 0)
p.X = 1;
c = nbmp.GetPixel(p.X, p.Y);
if (nbmp.Width > 20 && c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
}
else
{
ok = false;
break;
}
}
}
}
index++;
}
index = 0;
while (index < oc.LinesBackground.Count && ok)
{
NOcrPoint op = oc.LinesBackground[index];
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height))
{
if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height)
{
Color c = nbmp.GetPixel(point.X, point.Y);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
Point p = new Point(point.X, point.Y);
if (oc.Width > 19 && point.X > 0)
p.X = p.X - 1;
c = nbmp.GetPixel(p.X, p.Y);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
ok = false;
break;
}
}
}
}
index++;
}
if (ok)
return oc;
}
}
foreach (NOcrChar oc in nOcrChars)
{
int marginTopDiff = Math.Abs(oc.MarginTop - topMargin);
if (Math.Abs(oc.Width - nbmp.Width) < 4 && Math.Abs(oc.Height - nbmp.Height) < 4 && marginTopDiff > 4 && marginTopDiff < 9)
{ // only very accurate matches - but not for margin top
bool ok = true;
index = 0;
while (index < oc.LinesForeground.Count && ok)
{
NOcrPoint op = oc.LinesForeground[index];
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height))
{
if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height)
{
Color c = nbmp.GetPixel(point.X, point.Y);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
}
else
{
ok = false;
break;
}
}
}
index++;
}
index = 0;
while (index < oc.LinesBackground.Count && ok)
{
NOcrPoint op = oc.LinesBackground[index];
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height))
{
if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height)
{
Color c = nbmp.GetPixel(point.X, point.Y);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
ok = false;
break;
}
}
}
index++;
}
if (ok)
return oc;
}
}
// try some resize if aspect ratio is about the same
double widthPercent = nbmp.Height * 100.0 / nbmp.Width;
foreach (NOcrChar oc in nOcrChars)
{
if (!oc.IsSensitive)
{
if (Math.Abs(oc.WidthPercent - widthPercent) < 15 && oc.Width > 12 && oc.Height > 19 && nbmp.Width > 19 && nbmp.Height > 12 && Math.Abs(oc.MarginTop - topMargin) < nbmp.Height / 4)
{
bool ok = true;
index = 0;
while (index < oc.LinesForeground.Count && ok)
{
NOcrPoint op = oc.LinesForeground[index];
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height))
{
if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height)
{
Color c = nbmp.GetPixel(point.X, point.Y);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
}
else
{
ok = false;
break;
}
}
}
index++;
}
index = 0;
while (index < oc.LinesBackground.Count && ok)
{
NOcrPoint op = oc.LinesBackground[index];
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height))
{
if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height)
{
Color c = nbmp.GetPixel(point.X, point.Y);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
ok = false;
break;
}
}
}
index++;
}
if (ok)
return oc;
}
}
}
if (deepSeek) // if we do now draw then just try anything...
{
widthPercent = nbmp.Height * 100.0 / nbmp.Width;
foreach (NOcrChar oc in nOcrChars)
{
if (!oc.IsSensitive)
{
if (Math.Abs(oc.WidthPercent - widthPercent) < 40 && nbmp.Height > 11) // && oc.Height > 12 && oc.Width > 16 && nbmp.Width > 16 && nbmp.Height > 12 && Math.Abs(oc.MarginTop - topMargin) < 15)
{
bool ok = true;
foreach (NOcrPoint op in oc.LinesForeground)
{
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height))
{
if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height)
{
Color c = nbmp.GetPixel(point.X, point.Y);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
}
else
{
ok = false;
break;
}
}
}
}
foreach (NOcrPoint op in oc.LinesBackground)
{
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height))
{
if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height)
{
Color c = nbmp.GetPixel(point.X, point.Y);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
ok = false;
break;
}
}
}
}
if (ok)
return oc;
}
}
}
foreach (NOcrChar oc in nOcrChars)
{
if (Math.Abs(oc.WidthPercent - widthPercent) < 40 && oc.Height > 12 && oc.Width > 19 && nbmp.Width > 19 && nbmp.Height > 12 && Math.Abs(oc.MarginTop - topMargin) < 15)
{
bool ok = true;
foreach (NOcrPoint op in oc.LinesForeground)
{
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width - 3, nbmp.Height))
{
if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height)
{
Color c = nbmp.GetPixel(point.X, point.Y);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
}
else
{
ok = false;
break;
}
}
}
}
foreach (NOcrPoint op in oc.LinesBackground)
{
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width - 3, nbmp.Height))
{
if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height)
{
Color c = nbmp.GetPixel(point.X, point.Y);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
ok = false;
break;
}
}
}
}
if (ok)
return oc;
}
}
foreach (NOcrChar oc in nOcrChars)
{
if (Math.Abs(oc.WidthPercent - widthPercent) < 40 && oc.Height > 12 && oc.Width > 19 && nbmp.Width > 19 && nbmp.Height > 12 && Math.Abs(oc.MarginTop - topMargin) < 15)
{
bool ok = true;
foreach (NOcrPoint op in oc.LinesForeground)
{
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height - 4))
{
if (point.X >= 0 && point.Y + 4 >= 0 && point.X < nbmp.Width && point.Y + 4 < nbmp.Height)
{
Color c = nbmp.GetPixel(point.X, point.Y + 4);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
}
else
{
ok = false;
break;
}
}
}
}
foreach (NOcrPoint op in oc.LinesBackground)
{
foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height - 4))
{
if (point.X >= 0 && point.Y + 4 >= 0 && point.X < nbmp.Width && point.Y + 4 < nbmp.Height)
{
Color c = nbmp.GetPixel(point.X, point.Y + 4);
if (c.A > 150 && c.R + c.G + c.B > NocrMinColor)
{
ok = false;
break;
}
}
}
}
if (ok)
return oc;
}
}
}
if (tryItalicScaling)
{
// int left = targetItem.X;
// int width = targetItem.Bitmap.Width;
// //if (left > 3)
// //{
// // left -= 3;
// // width += 3;
// //}
// var temp = ImageSplitter.Copy(parentBitmap, new Rectangle(left, targetItem.Y, width , targetItem.Bitmap.Height));
// var bitmap2 = UnItalic(temp, unItalicFactor);
// //var nbmpUnItalic = new NikseBitmap(unItalicedBmp);
// //nbmpUnItalic.ReplaceNonWhiteWithTransparent();
// //Bitmap bitmap2 = nbmpUnItalic.GetBitmap();
//// bitmap2.Save(@"D:\Download\__" + Guid.NewGuid().ToString() + ".bmp");
// var list = ImageSplitter.SplitBitmapToLetters(bitmap2, 10, false, false);
// var matches = new List<NOcrChar>();
// bool unitalicOk = true;
// foreach (var spi in list)
// {
// var m = NOcrFindBestMatch(parentBitmap, spi, topMargin, out italic, nOcrChars, unItalicFactor, false, true);
// if (m == null)
// {
// if (spi.Bitmap.Width > 2)
// {
// unitalicOk = false;
// break;
// }
// }
// else
// {
// matches.Add(m);
// }
// }
// if (unitalicOk && matches.Count > 0)
// {
// italic = true;
// if (matches.Count == 1)
// {
// return matches[0];
// }
// else if (matches.Count > 1)
// {
// NOcrChar c = new NOcrChar(matches[0]);
// c.LinesBackground.Clear();
// c.LinesForeground.Clear();
// c.Text = string.Empty;
// foreach (var m in matches)
// c.Text += m.Text;
// return c;
// }
// }
}
return null;
}
private static NOcrChar MakeItalicNOcrChar(NOcrChar oldChar, int movePixelsLeft, double unItalicFactor)
{
var c = new NOcrChar();
@ -2339,6 +2719,52 @@ namespace Nikse.SubtitleEdit.Forms
return new CompareMatch(result.Text, result.Italic, 0, null, result);
}
internal CompareMatch GetNOcrCompareMatchNew(ImageSplitterItem targetItem, Bitmap parentBitmap, List<NOcrChar> nOcrChars, double unItalicFactor, bool tryItalicScaling, bool deepSeek)
{
bool italic;
//var expandedResult = NOcrFindExpandedMatch(parentBitmap, targetItem, targetItem.Y - targetItem.ParentY, nOcrChars);
//if (expandedResult != null)
// return new CompareMatch(expandedResult.Text, expandedResult.Italic, expandedResult.ExpandCount, null, expandedResult);
var result = NOcrFindBestMatchNew(parentBitmap, targetItem, targetItem.Y - targetItem.ParentY, out italic, nOcrChars, unItalicFactor, tryItalicScaling, deepSeek);
if (result == null)
{
if (checkBoxNOcrCorrect.Checked)
return null;
else
return new CompareMatch("*", false, 0, null);
}
// Fix uppercase/lowercase issues (not I/l)
if (result.Text == "e")
_nocrLastLowercaseHeight = targetItem.NikseBitmap.Height;
else if (_nocrLastLowercaseHeight == -1 && result.Text == "a")
_nocrLastLowercaseHeight = targetItem.NikseBitmap.Height;
if (result.Text == "E" || result.Text == "H" || result.Text == "R" || result.Text == "D" || result.Text == "T")
_nocrLastUppercaseHeight = targetItem.NikseBitmap.Height;
else if (_nocrLastUppercaseHeight == -1 && result.Text == "M")
_nocrLastUppercaseHeight = targetItem.NikseBitmap.Height;
if (result.Text == "V" || result.Text == "W" || result.Text == "U" || result.Text == "S" || result.Text == "Z" || result.Text == "O" || result.Text == "X" || result.Text == "Ø" || result.Text == "C")
{
if (_nocrLastLowercaseHeight > 3 && targetItem.NikseBitmap.Height - _nocrLastLowercaseHeight < 2)
result.Text = result.Text.ToLower();
}
else if (result.Text == "v" || result.Text == "w" || result.Text == "u" || result.Text == "s" || result.Text == "z" || result.Text == "o" || result.Text == "x" || result.Text == "ø" || result.Text == "c")
{
if (_nocrLastUppercaseHeight > 3 && _nocrLastUppercaseHeight - targetItem.NikseBitmap.Height < 2)
result.Text = result.Text.ToUpper();
}
if (italic)
return new CompareMatch(result.Text, true, 0, null, result);
else
return new CompareMatch(result.Text, result.Italic, 0, null, result);
}
internal static CompareMatch GetNOcrCompareMatch(ImageSplitterItem targetItem, Bitmap parentBitmap, NOcrThreadParameter p)
{
bool italic;
@ -3307,7 +3733,7 @@ namespace Nikse.SubtitleEdit.Forms
private string SaveCompareItem(NikseBitmap newTarget, string text, bool isItalic, int expandCount)
{
string path = Configuration.VobSubCompareFolder + comboBoxCharacterDatabase.SelectedItem + Path.DirectorySeparatorChar;
string path = Configuration.OcrFolder + comboBoxCharacterDatabase.SelectedItem + "_";
string databaseName = path + "Images.db";
FileStream f;
long pos = 0;
@ -3329,6 +3755,8 @@ namespace Nikse.SubtitleEdit.Forms
}
string name = pos.ToString(CultureInfo.InvariantCulture);
if (_compareBitmaps == null)
_compareBitmaps = new List<CompareItem>();
_compareBitmaps.Add(new CompareItem(new ManagedBitmap(newTarget), name, isItalic, expandCount, text));
XmlElement element = _compareDoc.CreateElement("Item");
@ -3348,8 +3776,14 @@ namespace Nikse.SubtitleEdit.Forms
element.Attributes.Append(italic);
}
element.InnerText = pos.ToString(CultureInfo.InvariantCulture);
_compareDoc.DocumentElement.AppendChild(element);
_compareDoc.Save(path + "Images.xml");
if (_compareDoc == null)
{
_compareDoc = new XmlDocument();
_compareDoc.LoadXml("<OcrBitmaps></OcrBitmaps>");
}
//_compareDoc.DocumentElement.AppendChild(element);
//_compareDoc.Save(path + "Images.xml");
return name;
}
@ -3663,6 +4097,15 @@ namespace Nikse.SubtitleEdit.Forms
{
CompareMatch bestGuess;
CompareMatch match = GetCompareMatchNew(item, parentBitmap, out bestGuess, list, index);
if (match == null)
{
if (_nocrChars == null)
_nocrChars = LoadNOcr(_binaryOcrDb.FileName.Replace(".db", ".xml"));
if (_nocrChars != null)
match = GetNOcrCompareMatchNew(item, bitmap, _nocrChars, _unItalicFactor, true, true);
}
if (match == null)
{
_vobSubOcrCharacter.Initialize(bitmap, item, _manualOcrDialogPosition, _italicCheckedLast, false, bestGuess, _lastAdditions, this);
@ -3982,7 +4425,12 @@ namespace Nikse.SubtitleEdit.Forms
nbmpInput.ReplaceNonWhiteWithTransparent();
var matches = new List<CompareMatch>();
List<ImageSplitterItem> list = NikseBitmapImageSplitter.SplitBitmapToLetters(nbmpInput, (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value, checkBoxRightToLeft.Checked, Configuration.Settings.VobSubOcr.TopToBottom);
int minLineHeight = _binOcrLastLowercaseHeight - 3;
if (minLineHeight < 5)
minLineHeight = 5;
List<ImageSplitterItem> list = NikseBitmapImageSplitter.SplitBitmapToLettersNew(nbmpInput, (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value, checkBoxRightToLeft.Checked, Configuration.Settings.VobSubOcr.TopToBottom, minLineHeight);
// List<ImageSplitterItem> list = NikseBitmapImageSplitter.SplitBitmapToLettersNew(nbmpInput, (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value, checkBoxRightToLeft.Checked, Configuration.Settings.VobSubOcr.TopToBottom, minLineHeight);
foreach (ImageSplitterItem item in list)
{
@ -4068,7 +4516,8 @@ namespace Nikse.SubtitleEdit.Forms
}
else
{
CompareMatch match = GetNOcrCompareMatch(item, bitmap, _nocrChars, _unItalicFactor, checkBoxNOcrItalic.Checked, !checkBoxNOcrCorrect.Checked);
// CompareMatch match = GetNOcrCompareMatch(item, bitmap, _nocrChars, _unItalicFactor, checkBoxNOcrItalic.Checked, !checkBoxNOcrCorrect.Checked);
CompareMatch match = GetNOcrCompareMatchNew(item, bitmap, _nocrChars, _unItalicFactor, checkBoxNOcrItalic.Checked, !checkBoxNOcrCorrect.Checked);
if (match == null)
{
_vobSubOcrNOcrCharacter.Initialize(bitmap, item, _manualOcrDialogPosition, _italicCheckedLast, false, null, _lastAdditions, this);
@ -6741,10 +7190,9 @@ namespace Nikse.SubtitleEdit.Forms
comboBoxNOcrLanguage.Items.Clear();
int index = 0;
int selIndex = 0;
foreach (string fileName in Directory.GetFiles(Configuration.DictionariesFolder, "nOCR_*.xml"))
foreach (string fileName in Directory.GetFiles(Configuration.OcrFolder, "*.xml"))
{
string s = Path.GetFileNameWithoutExtension(fileName);
s = s.Remove(0, 5);
if (s.Length > 0 && !s.ToLower().EndsWith("_user"))
{
if (s == Configuration.Settings.VobSubOcr.LineOcrLastLanguages)
@ -6773,7 +7221,7 @@ namespace Nikse.SubtitleEdit.Forms
if (comboBoxNOcrLanguage.SelectedIndex < 0)
return null;
return Configuration.DictionariesFolder + "nOCR_" + comboBoxNOcrLanguage.Items[comboBoxNOcrLanguage.SelectedIndex] + ".xml";
return Configuration.OcrFolder + comboBoxNOcrLanguage.Items[comboBoxNOcrLanguage.SelectedIndex] + ".xml";
}

View File

@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
namespace Nikse.SubtitleEdit.Logic.OCR
{
@ -15,12 +16,13 @@ namespace Nikse.SubtitleEdit.Logic.OCR
public List<NOcrPoint> LinesBackground { get; private set; }
public string Id { get; set; }
public int ExpandCount { get; set; }
public bool LoadedOk { get; private set; }
public Double WidthPercent
{
get
{
return Height * 100 / Width;
return Height * 100.0 / Width;
}
}
@ -63,9 +65,110 @@ namespace Nikse.SubtitleEdit.Logic.OCR
{
get
{
return Text == "." || Text == "," || Text == "'" || Text == "-" || Text == "\"";
return Text == "." || Text == "," || Text == "'" || Text == "-" || Text == ":" || Text == "\"";
}
}
public NOcrChar(Stream stream)
{
try
{
var buffer = new byte[8];
int read = stream.Read(buffer, 0, buffer.Length);
if (read < buffer.Length)
{
LoadedOk = false;
return;
}
Width = buffer[0] << 8 | buffer[1];
Height = buffer[2] << 8 | buffer[3];
MarginTop = buffer[4] << 8 | buffer[5];
Italic = buffer[6] != 0;
ExpandCount = buffer[7];
int textLen = buffer[8];
if (textLen > 0)
{
buffer = new byte[textLen];
stream.Read(buffer, 0, buffer.Length);
Text = System.Text.Encoding.UTF8.GetString(buffer);
}
LinesForeground = ReadPoints(stream);
LinesBackground = ReadPoints(stream);
LoadedOk = true;
}
catch
{
LoadedOk = false;
}
}
private List<NOcrPoint> ReadPoints(Stream stream)
{
var list = new List<NOcrPoint>();
int length = stream.ReadByte() << 8 | stream.ReadByte();
var buffer = new byte[8];
for (int i = 0; i < length; i++)
{
stream.Read(buffer, 0, buffer.Length);
var point = new NOcrPoint
{
Start = new Point(buffer[0] << 8 | buffer[1], buffer[2] << 8 | buffer[3]),
End = new Point(buffer[4] << 8 | buffer[5], buffer[2] << 8 | buffer[3])
};
list.Add(point);
}
return list;
}
internal void Save(Stream stream)
{
WriteInt16(stream, (ushort)Width);
WriteInt16(stream, (ushort)Height);
WriteInt16(stream, (ushort)MarginTop);
stream.WriteByte(Convert.ToByte(Italic));
stream.WriteByte(Convert.ToByte(ExpandCount));
if (Text == null)
{
stream.WriteByte(0);
}
else
{
var textBuffer = System.Text.Encoding.UTF8.GetBytes(Text);
stream.WriteByte((byte)textBuffer.Length);
stream.Write(textBuffer, 0, textBuffer.Length);
}
WritePoints(stream, LinesBackground);
WritePoints(stream, LinesForeground);
}
private void WritePoints(Stream stream, List<NOcrPoint> points)
{
WriteInt16(stream, (ushort)points.Count);
foreach (var nOcrPoint in points)
{
WriteInt16(stream, (ushort)nOcrPoint.Start.X);
WriteInt16(stream, (ushort)nOcrPoint.Start.Y);
WriteInt16(stream, (ushort)nOcrPoint.End.X);
WriteInt16(stream, (ushort)nOcrPoint.End.Y);
}
}
private static void WriteInt16(Stream stream, ushort val)
{
var buffer = new byte[2];
buffer[0] = (byte)((val & 0xFF00) >> 8);
buffer[1] = (byte)(val & 0x00FF);
stream.Write(buffer, 0, buffer.Length);
}
}
}

75
src/Logic/OCR/NOcrDb.cs Normal file
View File

@ -0,0 +1,75 @@
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
namespace Nikse.SubtitleEdit.Logic.OCR
{
public class NOcrDb
{
public string FileName { get; private set; }
public List<NOcrChar> OcrCharacters = new List<NOcrChar>();
public NOcrDb(string fileName)
{
FileName = fileName;
LoadOcrCharacters();
}
public void Save()
{
using (Stream fs = File.OpenWrite(FileName))
{
using (Stream gz = new GZipStream(fs, CompressionMode.Compress))
{
foreach (var ocrChar in OcrCharacters)
ocrChar.Save(gz);
}
}
}
public void LoadOcrCharacters()
{
var list = new List<NOcrChar>();
if (!File.Exists(FileName))
{
OcrCharacters = list;
return;
}
using (Stream fs = File.OpenRead(FileName))
{
using (Stream gz = new GZipStream(fs, CompressionMode.Decompress))
{
bool done = false;
while (!done)
{
var ocrChar = new NOcrChar(gz);
if (ocrChar.LoadedOk)
{
list.Add(ocrChar);
}
else
{
done = true;
}
}
}
}
OcrCharacters = list;
}
public int FindExactMatch(NOcrChar ocrChar)
{
return -1;
}
public void Add(NOcrChar ocrChar)
{
OcrCharacters.Add(ocrChar);
}
}
}

View File

@ -16,7 +16,8 @@ namespace Nikse.SubtitleEdit.Logic.OCR
{
// Dictionaries/spellchecking/fixing
Dictionary<string, string> _wordReplaceList;
Dictionary<string, string> _partialLineReplaceList;
Dictionary<string, string> _partialLineWordBoundaryReplaceList;
Dictionary<string, string> _partialLineAlwaysReplaceList;
Dictionary<string, string> _beginLineReplaceList;
Dictionary<string, string> _endLineReplaceList;
Dictionary<string, string> _wholeLineReplaceList;
@ -85,7 +86,8 @@ namespace Nikse.SubtitleEdit.Logic.OCR
private void LoadReplaceLists(string languageId)
{
_wordReplaceList = new Dictionary<string, string>();
_partialLineReplaceList = new Dictionary<string, string>();
_partialLineWordBoundaryReplaceList = new Dictionary<string, string>();
_partialLineAlwaysReplaceList = new Dictionary<string, string>();
_beginLineReplaceList = new Dictionary<string, string>();
_endLineReplaceList = new Dictionary<string, string>();
_wholeLineReplaceList = new Dictionary<string, string>();
@ -109,7 +111,8 @@ namespace Nikse.SubtitleEdit.Logic.OCR
_wordReplaceList = LoadReplaceList(doc, "WholeWords");
_partialWordReplaceListAlways = LoadReplaceList(doc, "PartialWordsAlways");
_partialWordReplaceList = LoadReplaceList(doc, "PartialWords");
_partialLineReplaceList = LoadReplaceList(doc, "PartialLines");
_partialLineWordBoundaryReplaceList = LoadReplaceList(doc, "PartialLines");
_partialLineAlwaysReplaceList = LoadReplaceList(doc, "PartialAlwaysLines");
_beginLineReplaceList = LoadReplaceList(doc, "BeginLines");
_endLineReplaceList = LoadReplaceList(doc, "EndLines");
_wholeLineReplaceList = LoadReplaceList(doc, "WholeLines");
@ -1383,10 +1386,16 @@ namespace Nikse.SubtitleEdit.Logic.OCR
}
newText += post;
foreach (string from in _partialLineReplaceList.Keys)
foreach (string from in _partialLineWordBoundaryReplaceList.Keys)
{
if (newText.Contains(from))
newText = newText.Replace(from, _partialLineReplaceList[from]); // ReplaceWord(newText, from, _partialLineReplaceList[from]);
newText = ReplaceWord(newText, from, _partialLineWordBoundaryReplaceList[from]);
}
foreach (string from in _partialLineAlwaysReplaceList.Keys)
{
if (newText.Contains(from))
newText = newText.Replace(from, _partialLineAlwaysReplaceList[from]);
}
foreach (string findWhat in _regExList.Keys)

View File

@ -205,6 +205,7 @@ namespace Nikse.SubtitleEdit.Logic.SubtitleFormats
new UnknownSubtitle66(),
new UnknownSubtitle67(),
new UnknownSubtitle68(),
new UnknownSubtitle69(),
};
string path = Configuration.PluginsDirectory;

View File

@ -0,0 +1,117 @@
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
namespace Nikse.SubtitleEdit.Logic.SubtitleFormats
{
public class UnknownSubtitle69: SubtitleFormat
{
static readonly Regex RegexTimeCode = new Regex(@"^\d+\) \d\d:\d\d:\d\d:\d\d \d\d:\d\d:\d\d:\d\d Durée : \d\d:\d\d", RegexOptions.Compiled); //10:00:02F00
public override string Extension
{
get { return ".txt"; }
}
public override string Name
{
get { return "Unknown 69"; }
}
public override bool IsTimeBased
{
get { return true; }
}
public override bool IsMine(List<string> lines, string fileName)
{
var subtitle = new Subtitle();
LoadSubtitle(subtitle, lines, fileName);
return subtitle.Paragraphs.Count > _errorCount;
}
public override string ToText(Subtitle subtitle, string title)
{
//1) 00:00:06:14 00:00:07:07 Durée : 00:18 Lisibilité : 011 Intervalle : 06:14 Nbc : 018
//text
//line2
//2) 00:00:07:14 00:00:09:02 Durée : 01:13 Lisibilité : 023 Intervalle : 00:07 Nbc : 026
//text
var sb = new StringBuilder();
string paragraphWriteFormat = "{0}) {1} {2} Durée : {3} Lisibilité : {4} Intervalle : {5} Nbc : {6}" + Environment.NewLine + "{7}";
int count = 1;
foreach (Paragraph p in subtitle.Paragraphs)
{
string text = Utilities.RemoveHtmlTags(p.Text, true);
string start = p.StartTime.ToHHMMSSFF();
string end = p.EndTime.ToHHMMSSFF();
string duration = string.Format("{0:00}:{1:00}", p.Duration.Seconds, MillisecondsToFramesMaxFrameRate(p.Duration.Milliseconds));
string readability = "011";
string interval = "06:14";
string nbc = text.Length.ToString().PadLeft(3, '0');
sb.AppendLine(string.Format(paragraphWriteFormat, count, start, end, duration, readability, interval, nbc, text));
sb.AppendLine();
count++;
}
return sb.ToString().Trim();
}
public override void LoadSubtitle(Subtitle subtitle, List<string> lines, string fileName)
{
_errorCount = 0;
subtitle.Paragraphs.Clear();
var text = new StringBuilder();
Paragraph p = null;
for (int i = 0; i < lines.Count; i++)
{
string line = lines[i].Trim();
if (line.Length == 0)
{
if (p != null)
p.Text = text.ToString().Trim();
}
else if (RegexTimeCode.IsMatch(line))
{
var timeParts = line.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
if (timeParts.Length > 4)
{
try
{
string start = timeParts[1];
string end = timeParts[2];
p = new Paragraph();
p.StartTime = DecodeTimeCode(start);
p.EndTime = DecodeTimeCode(end);
subtitle.Paragraphs.Add(p);
text = new StringBuilder();
}
catch
{
_errorCount++;
}
}
}
else if (line.Trim().Length > 0)
{
text.AppendLine(line);
if (text.Length > 5000)
return;
}
}
if (p != null)
p.Text = text.ToString().Trim();
subtitle.Renumber(1);
}
private TimeCode DecodeTimeCode(string timePart)
{
string s = timePart.Substring(0, 11);
var parts = s.Split(":F".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
return new TimeCode(int.Parse(parts[0]), int.Parse(parts[1]), int.Parse(parts[2]), FramesToMillisecondsMax999(int.Parse(parts[3])));
}
}
}

View File

@ -828,6 +828,7 @@
<Compile Include="Logic\OCR\Binary\BinaryOcrBitmap.cs" />
<Compile Include="Logic\OCR\Binary\BinaryOcrDb.cs" />
<Compile Include="Logic\OCR\ModiLanguage.cs" />
<Compile Include="Logic\OCR\NOcrDb.cs" />
<Compile Include="Logic\OCR\OcrAlphabet.cs" />
<Compile Include="Logic\OCR\NOcrChar.cs" />
<Compile Include="Logic\OCR\OcrCharacter.cs" />
@ -887,6 +888,7 @@
<Compile Include="Logic\SubtitleFormats\UnknownSubtitle67.cs" />
<Compile Include="Logic\SubtitleFormats\SwiftTextLineNumber .cs" />
<Compile Include="Logic\SubtitleFormats\Titra.cs" />
<Compile Include="Logic\SubtitleFormats\UnknownSubtitle69.cs" />
<Compile Include="Logic\SubtitleFormats\WebVTTFileWithLineNumber.cs" />
<Compile Include="Logic\SubtitleFormats\FinalCutProXmlGap.cs" />
<Compile Include="Logic\SubtitleFormats\UnknownSubtitle66.cs" />

View File

@ -578,8 +578,18 @@ namespace Test
Assert.AreEqual(target._subtitle.Paragraphs[0].Text, "- I'll ring her." + Environment.NewLine + "- ...In a lot of trouble.");
}
[TestMethod]
[DeploymentItem("SubtitleEdit.exe")]
public void FixOcrErrorsNoChange()
{
var target = new FixCommonErrors_Accessor();
InitializeFixCommonErrorsLine(target, "Yeah, see, that's not mine.");
target.FixOcrErrorsViaReplaceList("eng");
Assert.AreEqual(target._subtitle.Paragraphs[0].Text, "Yeah, see, that's not mine.");
}
}
}