Implemented ocr compare images in one file to speed up loading - thx Zoltan :)

git-svn-id: https://subtitleedit.googlecode.com/svn/trunk@2068 99eadd0c-20b8-1223-b5c4-2a2b2df33de2
This commit is contained in:
niksedk 2013-09-09 17:32:59 +00:00
parent cd3620a8ab
commit 28823e8318
3 changed files with 150 additions and 54 deletions

View File

@ -44,10 +44,10 @@ namespace Nikse.SubtitleEdit.Forms
pictureBox1.SizeMode = PictureBoxSizeMode.AutoSize;
_directoryPath = Configuration.VobSubCompareFolder + databaseFolderName + Path.DirectorySeparatorChar;
if (!File.Exists(_directoryPath + "CompareDescription.xml"))
if (!File.Exists(_directoryPath + "Images.xml"))
_compareDoc.LoadXml("<OcrBitmaps></OcrBitmaps>");
else
_compareDoc.Load(_directoryPath + "CompareDescription.xml");
_compareDoc.Load(_directoryPath + "Images.xml");
Refill(Additions);
@ -93,7 +93,7 @@ namespace Nikse.SubtitleEdit.Forms
{
if (name == a.Name)
{
listBoxFileNames.Items.Add("[" + text +"] " + node.InnerText + ".mbmp");
listBoxFileNames.Items.Add("[" + text +"] " + node.InnerText);
_italics.Add(node.Attributes["Italic"] != null);
}
@ -119,7 +119,7 @@ namespace Nikse.SubtitleEdit.Forms
private void FillComboWithUniqueAndSortedTexts()
{
List<string> texts = new List<string>();
foreach (XmlNode node in _compareDoc.DocumentElement.SelectNodes("FileName"))
foreach (XmlNode node in _compareDoc.DocumentElement.SelectNodes("Item"))
{
if (node.Attributes.Count >= 1)
{
@ -182,21 +182,22 @@ namespace Nikse.SubtitleEdit.Forms
private void ListBoxFileNamesSelectedIndexChanged(object sender, EventArgs e)
{
checkBoxItalic.Checked = _italics[listBoxFileNames.SelectedIndex];
string fileName = _directoryPath + GetSelectedFileName();
Bitmap bmp;
if (File.Exists(fileName))
string databaseName = _directoryPath + "Images.db";
string posAsString = GetSelectedFileName();
Bitmap bmp = null;
if (File.Exists(databaseName))
{
ManagedBitmap tmp = new ManagedBitmap(fileName);
try
{
labelImageInfo.Text = string.Format(Configuration.Settings.Language.VobSubEditCharacters.Image + " - {0}x{1}", tmp.Width, tmp.Height);
using (var f = new FileStream(databaseName, FileMode.Open))
{
int pos = Convert.ToInt32(databaseName);
f.Position = pos;
ManagedBitmap mbmp = new ManagedBitmap(f);
bmp = mbmp.ToOldBitmap();
}
catch
{
}
bmp = tmp.ToOldBitmap();
}
else
if (bmp == null)
{
bmp = new Bitmap(1,1);
labelImageInfo.Text = Configuration.Settings.Language.VobSubEditCharacters.ImageFileNotFound;
@ -233,7 +234,7 @@ namespace Nikse.SubtitleEdit.Forms
string target = GetSelectedFileName();
target = target.Substring(0, target.Length - 4);
XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + target + "']");
XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("Item[.='" + target + "']");
if (node != null)
{
string newText = textBoxText.Text;
@ -308,7 +309,7 @@ namespace Nikse.SubtitleEdit.Forms
int oldComboBoxIndex = comboBoxTexts.SelectedIndex;
string target = GetSelectedFileName();
target = target.Substring(0, target.Length - 4);
XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + target + "']");
XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("Item[.='" + target + "']");
if (node != null)
{
_compareDoc.DocumentElement.RemoveChild(node);

View File

@ -723,38 +723,81 @@ namespace Nikse.SubtitleEdit.Forms
_compareDoc.LoadXml("<OcrBitmaps></OcrBitmaps>");
else
_compareDoc.Load(path + "CompareDescription.xml");
foreach (string bmpFileName in Directory.GetFiles(path, "*.bmp"))
string databaseName = path + "Images.db";
if (!File.Exists(databaseName))
{
string newName = bmpFileName.Replace(".bmp", ".mbmp");
if (!File.Exists(newName))
using (var f = new FileStream(databaseName, FileMode.Create))
{
Bitmap b = new Bitmap(bmpFileName);
ManagedBitmap m = new ManagedBitmap(b);
b.Dispose();
m.Save(newName);
}
}
foreach (string bmpFileName in Directory.GetFiles(path, "*.mbmp"))
{
string name = Path.GetFileNameWithoutExtension(bmpFileName);
XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + name + "']");
if (node != null)
{
bool isItalic = node.Attributes["Italic"] != null;
int expandCount = 0;
if (node.Attributes["Expand"] != null)
foreach (string bmpFileName in Directory.GetFiles(path, "*.bmp"))
{
if (!int.TryParse(node.Attributes["Expand"].InnerText, out expandCount))
expandCount = 0;
string name = Path.GetFileNameWithoutExtension(bmpFileName);
XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + name + "']");
if (node != null)
{
node.InnerText = f.Position.ToString(CultureInfo.InvariantCulture);
var b = new Bitmap(bmpFileName);
var m = new ManagedBitmap(b);
b.Dispose();
m.AppendToStream(f);
}
}
f.Close();
}
_compareDoc.Save(path + "Images.xml");
string text = File.ReadAllText(path + "Images.xml");
File.WriteAllText(path + "Images.xml", text.Replace("<FileName", "<Item").Replace("</FileName>", "</Item>"));
}
if (File.Exists(databaseName))
{
_compareDoc.Load(path + "Images.xml");
using (var f = new FileStream(databaseName, FileMode.Open))
{
foreach (XmlNode node in _compareDoc.DocumentElement.SelectNodes("Item"))
{
try //if (node.Attributes["Pos"] != null)
{
string name = node.InnerText;
int pos = Convert.ToInt32(name);
bool isItalic = node.Attributes["Italic"] != null;
int expandCount = 0;
if (node.Attributes["Expand"] != null)
{
if (!int.TryParse(node.Attributes["Expand"].InnerText, out expandCount))
expandCount = 0;
}
f.Position = pos;
ManagedBitmap mbmp = new ManagedBitmap(f);
_compareBitmaps.Add(new CompareItem(mbmp, name, isItalic, expandCount));
}
catch
{
//MessageBox.Show(node.OuterXml);
}
}
ManagedBitmap mbmp = new ManagedBitmap(bmpFileName);
_compareBitmaps.Add(new CompareItem(mbmp, name, isItalic, expandCount));
}
}
// foreach (string bmpFileName in Directory.GetFiles(path, "*.mbmp"))
// {
// string name = Path.GetFileNameWithoutExtension(bmpFileName);
// XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + name + "']");
// if (node != null)
// {
// bool isItalic = node.Attributes["Italic"] != null;
// int expandCount = 0;
// if (node.Attributes["Expand"] != null)
// {
// if (!int.TryParse(node.Attributes["Expand"].InnerText, out expandCount))
// expandCount = 0;
// }
// ManagedBitmap mbmp = new ManagedBitmap(bmpFileName);
// _compareBitmaps.Add(new CompareItem(mbmp, name, isItalic, expandCount));
// }
// }
}
private void DisposeImageCompareBitmaps()
@ -2241,7 +2284,7 @@ namespace Nikse.SubtitleEdit.Forms
maxDiff = 12.9; // let bluray sup have a 12.9% diff
if (differencePercentage < maxDiff) //_vobSubOcrSettings.AllowDifferenceInPercent) // should be around 1.0...
{
XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + _compareBitmaps[smallestIndex].Name + "']");
XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("Item[.='" + _compareBitmaps[smallestIndex].Name + "']");
if (node != null && _bluRaySubtitlesOriginal != null && "ceoil".Contains(node.Attributes["Text"].InnerText) && differencePercentage > 12)
node = null;
if (node != null)
@ -2271,7 +2314,7 @@ namespace Nikse.SubtitleEdit.Forms
}
}
XmlNode nodeGuess = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + _compareBitmaps[smallestIndex].Name + "']");
XmlNode nodeGuess = _compareDoc.DocumentElement.SelectSingleNode("Item[.='" + _compareBitmaps[smallestIndex].Name + "']");
if (nodeGuess != null)
{
bool isItalicGuess = nodeGuess.Attributes["Italic"] != null;
@ -2281,7 +2324,7 @@ namespace Nikse.SubtitleEdit.Forms
if (!int.TryParse(nodeGuess.Attributes["Expand"].InnerText, out expandCountGuess))
expandCountGuess = 0;
}
secondBestGuess = new CompareMatch(nodeGuess.Attributes["Text"].InnerText, isItalicGuess, expandCountGuess, _compareBitmaps[smallestIndex].Name);
secondBestGuess = new CompareMatch(nodeGuess.InnerText, isItalicGuess, expandCountGuess, _compareBitmaps[smallestIndex].Name);
}
}
@ -2314,7 +2357,7 @@ namespace Nikse.SubtitleEdit.Forms
if (compareItem.NumberOfForegroundColors == -1)
compareItem.NumberOfForegroundColors = CalculateNumberOfForegroundColors(compareItem.Bitmap);
if (Math.Abs(compareItem.NumberOfForegroundColors - numberOfForegroundColors) < minForeColorMatch)
if (Math.Abs(compareItem.NumberOfForegroundColors - numberOfForegroundColors) < 50)
{
int dif = ImageSplitter.IsBitmapsAlike(compareItem.Bitmap, target);
if (dif < smallestDifference)
@ -2645,13 +2688,30 @@ namespace Nikse.SubtitleEdit.Forms
private string SaveCompareItem(Bitmap newTarget, string text, bool isItalic, int expandCount)
{
string path = Configuration.VobSubCompareFolder + comboBoxCharacterDatabase.SelectedItem + Path.DirectorySeparatorChar;
string name = Guid.NewGuid().ToString();
string fileName = path + name + ".bmp";
new ManagedBitmap(newTarget).Save(fileName.Replace(".bmp", ".mbmp"));
string databaseName = path + "Images.db";
FileStream f;
long pos = 0;
if (!File.Exists(databaseName))
{
using (f = new FileStream(databaseName, FileMode.Create))
{
pos = f.Position;
new ManagedBitmap(newTarget).AppendToStream(f);
}
}
else
{
using (f = new FileStream(databaseName, FileMode.Append))
{
pos = f.Position;
new ManagedBitmap(newTarget).AppendToStream(f);
}
}
string name = pos.ToString(CultureInfo.InvariantCulture);
_compareBitmaps.Add(new CompareItem(new ManagedBitmap(newTarget), name, isItalic, expandCount));
XmlElement element = _compareDoc.CreateElement("FileName");
XmlElement element = _compareDoc.CreateElement("Item");
XmlAttribute attribute = _compareDoc.CreateAttribute("Text");
attribute.InnerText = text;
element.Attributes.Append(attribute);
@ -2667,9 +2727,9 @@ namespace Nikse.SubtitleEdit.Forms
italic.InnerText = "true";
element.Attributes.Append(italic);
}
element.InnerText = name;
element.InnerText = pos.ToString(CultureInfo.InvariantCulture);
_compareDoc.DocumentElement.AppendChild(element);
_compareDoc.Save(path + "CompareDescription.xml");
_compareDoc.Save(path + "Images.xml");
return name;
}

View File

@ -39,6 +39,24 @@ namespace Nikse.SubtitleEdit.Logic
}
}
public ManagedBitmap(Stream stream)
{
byte[] buffer = new byte[8];
stream.Read(buffer, 0, buffer.Length);
// System.Windows.Forms.MessageBox.Show(System.Text.Encoding.UTF8.GetString(buffer, 0, 4));
Width = buffer[4] << 8 | buffer[5];
Height = buffer[6] << 8 | buffer[7];
_colors = new Color[Width * Height];
buffer = new byte[Width * Height * 4];
stream.Read(buffer, 0, buffer.Length);
int start = 0;
for (int i = 0; i < _colors.Length; i++)
{
_colors[i] = Color.FromArgb(buffer[start], buffer[start + 1], buffer[start + 2], buffer[start + 3]);
start += 4;
}
}
public ManagedBitmap(Bitmap oldBitmap)
{
NikseBitmap nbmp = new NikseBitmap(oldBitmap);
@ -77,6 +95,23 @@ namespace Nikse.SubtitleEdit.Logic
}
}
public void AppendToStream(Stream targetStream)
{
using (MemoryStream outFile = new MemoryStream())
{
byte[] buffer = System.Text.Encoding.UTF8.GetBytes("MBMP");
outFile.Write(buffer, 0, buffer.Length);
WriteInt16(outFile, (short)Width);
WriteInt16(outFile, (short)Height);
foreach (Color c in _colors)
{
WriteColor(outFile, c);
}
buffer = outFile.ToArray();
targetStream.Write(buffer, 0, buffer.Length);
}
}
private int ReadInt16(Stream stream)
{
byte b0 = (byte)stream.ReadByte();