Implemented ocr compare images in one file to speed up loading - thx Zoltan :)

git-svn-id: https://subtitleedit.googlecode.com/svn/trunk@2068 99eadd0c-20b8-1223-b5c4-2a2b2df33de2
This commit is contained in:
niksedk 2013-09-09 17:32:59 +00:00
parent cd3620a8ab
commit 28823e8318
3 changed files with 150 additions and 54 deletions

View File

@ -44,10 +44,10 @@ namespace Nikse.SubtitleEdit.Forms
pictureBox1.SizeMode = PictureBoxSizeMode.AutoSize; pictureBox1.SizeMode = PictureBoxSizeMode.AutoSize;
_directoryPath = Configuration.VobSubCompareFolder + databaseFolderName + Path.DirectorySeparatorChar; _directoryPath = Configuration.VobSubCompareFolder + databaseFolderName + Path.DirectorySeparatorChar;
if (!File.Exists(_directoryPath + "CompareDescription.xml")) if (!File.Exists(_directoryPath + "Images.xml"))
_compareDoc.LoadXml("<OcrBitmaps></OcrBitmaps>"); _compareDoc.LoadXml("<OcrBitmaps></OcrBitmaps>");
else else
_compareDoc.Load(_directoryPath + "CompareDescription.xml"); _compareDoc.Load(_directoryPath + "Images.xml");
Refill(Additions); Refill(Additions);
@ -93,7 +93,7 @@ namespace Nikse.SubtitleEdit.Forms
{ {
if (name == a.Name) if (name == a.Name)
{ {
listBoxFileNames.Items.Add("[" + text +"] " + node.InnerText + ".mbmp"); listBoxFileNames.Items.Add("[" + text +"] " + node.InnerText);
_italics.Add(node.Attributes["Italic"] != null); _italics.Add(node.Attributes["Italic"] != null);
} }
@ -119,7 +119,7 @@ namespace Nikse.SubtitleEdit.Forms
private void FillComboWithUniqueAndSortedTexts() private void FillComboWithUniqueAndSortedTexts()
{ {
List<string> texts = new List<string>(); List<string> texts = new List<string>();
foreach (XmlNode node in _compareDoc.DocumentElement.SelectNodes("FileName")) foreach (XmlNode node in _compareDoc.DocumentElement.SelectNodes("Item"))
{ {
if (node.Attributes.Count >= 1) if (node.Attributes.Count >= 1)
{ {
@ -182,21 +182,22 @@ namespace Nikse.SubtitleEdit.Forms
private void ListBoxFileNamesSelectedIndexChanged(object sender, EventArgs e) private void ListBoxFileNamesSelectedIndexChanged(object sender, EventArgs e)
{ {
checkBoxItalic.Checked = _italics[listBoxFileNames.SelectedIndex]; checkBoxItalic.Checked = _italics[listBoxFileNames.SelectedIndex];
string fileName = _directoryPath + GetSelectedFileName(); string databaseName = _directoryPath + "Images.db";
Bitmap bmp; string posAsString = GetSelectedFileName();
if (File.Exists(fileName)) Bitmap bmp = null;
if (File.Exists(databaseName))
{ {
ManagedBitmap tmp = new ManagedBitmap(fileName); using (var f = new FileStream(databaseName, FileMode.Open))
try {
{ int pos = Convert.ToInt32(databaseName);
labelImageInfo.Text = string.Format(Configuration.Settings.Language.VobSubEditCharacters.Image + " - {0}x{1}", tmp.Width, tmp.Height); f.Position = pos;
ManagedBitmap mbmp = new ManagedBitmap(f);
bmp = mbmp.ToOldBitmap();
} }
catch
{
}
bmp = tmp.ToOldBitmap();
} }
else
if (bmp == null)
{ {
bmp = new Bitmap(1,1); bmp = new Bitmap(1,1);
labelImageInfo.Text = Configuration.Settings.Language.VobSubEditCharacters.ImageFileNotFound; labelImageInfo.Text = Configuration.Settings.Language.VobSubEditCharacters.ImageFileNotFound;
@ -233,7 +234,7 @@ namespace Nikse.SubtitleEdit.Forms
string target = GetSelectedFileName(); string target = GetSelectedFileName();
target = target.Substring(0, target.Length - 4); target = target.Substring(0, target.Length - 4);
XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + target + "']"); XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("Item[.='" + target + "']");
if (node != null) if (node != null)
{ {
string newText = textBoxText.Text; string newText = textBoxText.Text;
@ -308,7 +309,7 @@ namespace Nikse.SubtitleEdit.Forms
int oldComboBoxIndex = comboBoxTexts.SelectedIndex; int oldComboBoxIndex = comboBoxTexts.SelectedIndex;
string target = GetSelectedFileName(); string target = GetSelectedFileName();
target = target.Substring(0, target.Length - 4); target = target.Substring(0, target.Length - 4);
XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + target + "']"); XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("Item[.='" + target + "']");
if (node != null) if (node != null)
{ {
_compareDoc.DocumentElement.RemoveChild(node); _compareDoc.DocumentElement.RemoveChild(node);

View File

@ -723,38 +723,81 @@ namespace Nikse.SubtitleEdit.Forms
_compareDoc.LoadXml("<OcrBitmaps></OcrBitmaps>"); _compareDoc.LoadXml("<OcrBitmaps></OcrBitmaps>");
else else
_compareDoc.Load(path + "CompareDescription.xml"); _compareDoc.Load(path + "CompareDescription.xml");
foreach (string bmpFileName in Directory.GetFiles(path, "*.bmp")) string databaseName = path + "Images.db";
if (!File.Exists(databaseName))
{ {
string newName = bmpFileName.Replace(".bmp", ".mbmp"); using (var f = new FileStream(databaseName, FileMode.Create))
if (!File.Exists(newName))
{ {
Bitmap b = new Bitmap(bmpFileName); foreach (string bmpFileName in Directory.GetFiles(path, "*.bmp"))
ManagedBitmap m = new ManagedBitmap(b);
b.Dispose();
m.Save(newName);
}
}
foreach (string bmpFileName in Directory.GetFiles(path, "*.mbmp"))
{
string name = Path.GetFileNameWithoutExtension(bmpFileName);
XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + name + "']");
if (node != null)
{
bool isItalic = node.Attributes["Italic"] != null;
int expandCount = 0;
if (node.Attributes["Expand"] != null)
{ {
if (!int.TryParse(node.Attributes["Expand"].InnerText, out expandCount)) string name = Path.GetFileNameWithoutExtension(bmpFileName);
expandCount = 0;
XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + name + "']");
if (node != null)
{
node.InnerText = f.Position.ToString(CultureInfo.InvariantCulture);
var b = new Bitmap(bmpFileName);
var m = new ManagedBitmap(b);
b.Dispose();
m.AppendToStream(f);
}
}
f.Close();
}
_compareDoc.Save(path + "Images.xml");
string text = File.ReadAllText(path + "Images.xml");
File.WriteAllText(path + "Images.xml", text.Replace("<FileName", "<Item").Replace("</FileName>", "</Item>"));
}
if (File.Exists(databaseName))
{
_compareDoc.Load(path + "Images.xml");
using (var f = new FileStream(databaseName, FileMode.Open))
{
foreach (XmlNode node in _compareDoc.DocumentElement.SelectNodes("Item"))
{
try //if (node.Attributes["Pos"] != null)
{
string name = node.InnerText;
int pos = Convert.ToInt32(name);
bool isItalic = node.Attributes["Italic"] != null;
int expandCount = 0;
if (node.Attributes["Expand"] != null)
{
if (!int.TryParse(node.Attributes["Expand"].InnerText, out expandCount))
expandCount = 0;
}
f.Position = pos;
ManagedBitmap mbmp = new ManagedBitmap(f);
_compareBitmaps.Add(new CompareItem(mbmp, name, isItalic, expandCount));
}
catch
{
//MessageBox.Show(node.OuterXml);
}
} }
ManagedBitmap mbmp = new ManagedBitmap(bmpFileName);
_compareBitmaps.Add(new CompareItem(mbmp, name, isItalic, expandCount));
} }
} }
// foreach (string bmpFileName in Directory.GetFiles(path, "*.mbmp"))
// {
// string name = Path.GetFileNameWithoutExtension(bmpFileName);
// XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + name + "']");
// if (node != null)
// {
// bool isItalic = node.Attributes["Italic"] != null;
// int expandCount = 0;
// if (node.Attributes["Expand"] != null)
// {
// if (!int.TryParse(node.Attributes["Expand"].InnerText, out expandCount))
// expandCount = 0;
// }
// ManagedBitmap mbmp = new ManagedBitmap(bmpFileName);
// _compareBitmaps.Add(new CompareItem(mbmp, name, isItalic, expandCount));
// }
// }
} }
private void DisposeImageCompareBitmaps() private void DisposeImageCompareBitmaps()
@ -2241,7 +2284,7 @@ namespace Nikse.SubtitleEdit.Forms
maxDiff = 12.9; // let bluray sup have a 12.9% diff maxDiff = 12.9; // let bluray sup have a 12.9% diff
if (differencePercentage < maxDiff) //_vobSubOcrSettings.AllowDifferenceInPercent) // should be around 1.0... if (differencePercentage < maxDiff) //_vobSubOcrSettings.AllowDifferenceInPercent) // should be around 1.0...
{ {
XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + _compareBitmaps[smallestIndex].Name + "']"); XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("Item[.='" + _compareBitmaps[smallestIndex].Name + "']");
if (node != null && _bluRaySubtitlesOriginal != null && "ceoil".Contains(node.Attributes["Text"].InnerText) && differencePercentage > 12) if (node != null && _bluRaySubtitlesOriginal != null && "ceoil".Contains(node.Attributes["Text"].InnerText) && differencePercentage > 12)
node = null; node = null;
if (node != null) if (node != null)
@ -2271,7 +2314,7 @@ namespace Nikse.SubtitleEdit.Forms
} }
} }
XmlNode nodeGuess = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + _compareBitmaps[smallestIndex].Name + "']"); XmlNode nodeGuess = _compareDoc.DocumentElement.SelectSingleNode("Item[.='" + _compareBitmaps[smallestIndex].Name + "']");
if (nodeGuess != null) if (nodeGuess != null)
{ {
bool isItalicGuess = nodeGuess.Attributes["Italic"] != null; bool isItalicGuess = nodeGuess.Attributes["Italic"] != null;
@ -2281,7 +2324,7 @@ namespace Nikse.SubtitleEdit.Forms
if (!int.TryParse(nodeGuess.Attributes["Expand"].InnerText, out expandCountGuess)) if (!int.TryParse(nodeGuess.Attributes["Expand"].InnerText, out expandCountGuess))
expandCountGuess = 0; expandCountGuess = 0;
} }
secondBestGuess = new CompareMatch(nodeGuess.Attributes["Text"].InnerText, isItalicGuess, expandCountGuess, _compareBitmaps[smallestIndex].Name); secondBestGuess = new CompareMatch(nodeGuess.InnerText, isItalicGuess, expandCountGuess, _compareBitmaps[smallestIndex].Name);
} }
} }
@ -2314,7 +2357,7 @@ namespace Nikse.SubtitleEdit.Forms
if (compareItem.NumberOfForegroundColors == -1) if (compareItem.NumberOfForegroundColors == -1)
compareItem.NumberOfForegroundColors = CalculateNumberOfForegroundColors(compareItem.Bitmap); compareItem.NumberOfForegroundColors = CalculateNumberOfForegroundColors(compareItem.Bitmap);
if (Math.Abs(compareItem.NumberOfForegroundColors - numberOfForegroundColors) < minForeColorMatch) if (Math.Abs(compareItem.NumberOfForegroundColors - numberOfForegroundColors) < 50)
{ {
int dif = ImageSplitter.IsBitmapsAlike(compareItem.Bitmap, target); int dif = ImageSplitter.IsBitmapsAlike(compareItem.Bitmap, target);
if (dif < smallestDifference) if (dif < smallestDifference)
@ -2645,13 +2688,30 @@ namespace Nikse.SubtitleEdit.Forms
private string SaveCompareItem(Bitmap newTarget, string text, bool isItalic, int expandCount) private string SaveCompareItem(Bitmap newTarget, string text, bool isItalic, int expandCount)
{ {
string path = Configuration.VobSubCompareFolder + comboBoxCharacterDatabase.SelectedItem + Path.DirectorySeparatorChar; string path = Configuration.VobSubCompareFolder + comboBoxCharacterDatabase.SelectedItem + Path.DirectorySeparatorChar;
string name = Guid.NewGuid().ToString(); string databaseName = path + "Images.db";
string fileName = path + name + ".bmp"; FileStream f;
new ManagedBitmap(newTarget).Save(fileName.Replace(".bmp", ".mbmp")); long pos = 0;
if (!File.Exists(databaseName))
{
using (f = new FileStream(databaseName, FileMode.Create))
{
pos = f.Position;
new ManagedBitmap(newTarget).AppendToStream(f);
}
}
else
{
using (f = new FileStream(databaseName, FileMode.Append))
{
pos = f.Position;
new ManagedBitmap(newTarget).AppendToStream(f);
}
}
string name = pos.ToString(CultureInfo.InvariantCulture);
_compareBitmaps.Add(new CompareItem(new ManagedBitmap(newTarget), name, isItalic, expandCount)); _compareBitmaps.Add(new CompareItem(new ManagedBitmap(newTarget), name, isItalic, expandCount));
XmlElement element = _compareDoc.CreateElement("FileName"); XmlElement element = _compareDoc.CreateElement("Item");
XmlAttribute attribute = _compareDoc.CreateAttribute("Text"); XmlAttribute attribute = _compareDoc.CreateAttribute("Text");
attribute.InnerText = text; attribute.InnerText = text;
element.Attributes.Append(attribute); element.Attributes.Append(attribute);
@ -2667,9 +2727,9 @@ namespace Nikse.SubtitleEdit.Forms
italic.InnerText = "true"; italic.InnerText = "true";
element.Attributes.Append(italic); element.Attributes.Append(italic);
} }
element.InnerText = name; element.InnerText = pos.ToString(CultureInfo.InvariantCulture);
_compareDoc.DocumentElement.AppendChild(element); _compareDoc.DocumentElement.AppendChild(element);
_compareDoc.Save(path + "CompareDescription.xml"); _compareDoc.Save(path + "Images.xml");
return name; return name;
} }

View File

@ -39,6 +39,24 @@ namespace Nikse.SubtitleEdit.Logic
} }
} }
public ManagedBitmap(Stream stream)
{
byte[] buffer = new byte[8];
stream.Read(buffer, 0, buffer.Length);
// System.Windows.Forms.MessageBox.Show(System.Text.Encoding.UTF8.GetString(buffer, 0, 4));
Width = buffer[4] << 8 | buffer[5];
Height = buffer[6] << 8 | buffer[7];
_colors = new Color[Width * Height];
buffer = new byte[Width * Height * 4];
stream.Read(buffer, 0, buffer.Length);
int start = 0;
for (int i = 0; i < _colors.Length; i++)
{
_colors[i] = Color.FromArgb(buffer[start], buffer[start + 1], buffer[start + 2], buffer[start + 3]);
start += 4;
}
}
public ManagedBitmap(Bitmap oldBitmap) public ManagedBitmap(Bitmap oldBitmap)
{ {
NikseBitmap nbmp = new NikseBitmap(oldBitmap); NikseBitmap nbmp = new NikseBitmap(oldBitmap);
@ -77,6 +95,23 @@ namespace Nikse.SubtitleEdit.Logic
} }
} }
public void AppendToStream(Stream targetStream)
{
using (MemoryStream outFile = new MemoryStream())
{
byte[] buffer = System.Text.Encoding.UTF8.GetBytes("MBMP");
outFile.Write(buffer, 0, buffer.Length);
WriteInt16(outFile, (short)Width);
WriteInt16(outFile, (short)Height);
foreach (Color c in _colors)
{
WriteColor(outFile, c);
}
buffer = outFile.ToArray();
targetStream.Write(buffer, 0, buffer.Length);
}
}
private int ReadInt16(Stream stream) private int ReadInt16(Stream stream)
{ {
byte b0 = (byte)stream.ReadByte(); byte b0 = (byte)stream.ReadByte();