From 28823e8318707197fdbb88b1246a9ec1cc06de3d Mon Sep 17 00:00:00 2001 From: niksedk Date: Mon, 9 Sep 2013 17:32:59 +0000 Subject: [PATCH] Implemented ocr compare images in one file to speed up loading - thx Zoltan :) git-svn-id: https://subtitleedit.googlecode.com/svn/trunk@2068 99eadd0c-20b8-1223-b5c4-2a2b2df33de2 --- src/Forms/VobSubEditCharacters.cs | 37 +++++---- src/Forms/VobSubOcr.cs | 132 ++++++++++++++++++++++-------- src/Logic/ManagedBitmap.cs | 35 ++++++++ 3 files changed, 150 insertions(+), 54 deletions(-) diff --git a/src/Forms/VobSubEditCharacters.cs b/src/Forms/VobSubEditCharacters.cs index 0df9c9de3..bf3564cfc 100644 --- a/src/Forms/VobSubEditCharacters.cs +++ b/src/Forms/VobSubEditCharacters.cs @@ -44,10 +44,10 @@ namespace Nikse.SubtitleEdit.Forms pictureBox1.SizeMode = PictureBoxSizeMode.AutoSize; _directoryPath = Configuration.VobSubCompareFolder + databaseFolderName + Path.DirectorySeparatorChar; - if (!File.Exists(_directoryPath + "CompareDescription.xml")) + if (!File.Exists(_directoryPath + "Images.xml")) _compareDoc.LoadXml(""); else - _compareDoc.Load(_directoryPath + "CompareDescription.xml"); + _compareDoc.Load(_directoryPath + "Images.xml"); Refill(Additions); @@ -93,7 +93,7 @@ namespace Nikse.SubtitleEdit.Forms { if (name == a.Name) { - listBoxFileNames.Items.Add("[" + text +"] " + node.InnerText + ".mbmp"); + listBoxFileNames.Items.Add("[" + text +"] " + node.InnerText); _italics.Add(node.Attributes["Italic"] != null); } @@ -119,7 +119,7 @@ namespace Nikse.SubtitleEdit.Forms private void FillComboWithUniqueAndSortedTexts() { List texts = new List(); - foreach (XmlNode node in _compareDoc.DocumentElement.SelectNodes("FileName")) + foreach (XmlNode node in _compareDoc.DocumentElement.SelectNodes("Item")) { if (node.Attributes.Count >= 1) { @@ -182,21 +182,22 @@ namespace Nikse.SubtitleEdit.Forms private void ListBoxFileNamesSelectedIndexChanged(object sender, EventArgs e) { checkBoxItalic.Checked = _italics[listBoxFileNames.SelectedIndex]; - string fileName = _directoryPath + GetSelectedFileName(); - Bitmap bmp; - if (File.Exists(fileName)) + string databaseName = _directoryPath + "Images.db"; + string posAsString = GetSelectedFileName(); + Bitmap bmp = null; + + if (File.Exists(databaseName)) { - ManagedBitmap tmp = new ManagedBitmap(fileName); - try - { - labelImageInfo.Text = string.Format(Configuration.Settings.Language.VobSubEditCharacters.Image + " - {0}x{1}", tmp.Width, tmp.Height); + using (var f = new FileStream(databaseName, FileMode.Open)) + { + int pos = Convert.ToInt32(databaseName); + f.Position = pos; + ManagedBitmap mbmp = new ManagedBitmap(f); + bmp = mbmp.ToOldBitmap(); } - catch - { - } - bmp = tmp.ToOldBitmap(); } - else + + if (bmp == null) { bmp = new Bitmap(1,1); labelImageInfo.Text = Configuration.Settings.Language.VobSubEditCharacters.ImageFileNotFound; @@ -233,7 +234,7 @@ namespace Nikse.SubtitleEdit.Forms string target = GetSelectedFileName(); target = target.Substring(0, target.Length - 4); - XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + target + "']"); + XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("Item[.='" + target + "']"); if (node != null) { string newText = textBoxText.Text; @@ -308,7 +309,7 @@ namespace Nikse.SubtitleEdit.Forms int oldComboBoxIndex = comboBoxTexts.SelectedIndex; string target = GetSelectedFileName(); target = target.Substring(0, target.Length - 4); - XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + target + "']"); + XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("Item[.='" + target + "']"); if (node != null) { _compareDoc.DocumentElement.RemoveChild(node); diff --git a/src/Forms/VobSubOcr.cs b/src/Forms/VobSubOcr.cs index 4fd060c78..4aa7b57af 100644 --- a/src/Forms/VobSubOcr.cs +++ b/src/Forms/VobSubOcr.cs @@ -723,38 +723,81 @@ namespace Nikse.SubtitleEdit.Forms _compareDoc.LoadXml(""); else _compareDoc.Load(path + "CompareDescription.xml"); - - foreach (string bmpFileName in Directory.GetFiles(path, "*.bmp")) + + string databaseName = path + "Images.db"; + if (!File.Exists(databaseName)) { - string newName = bmpFileName.Replace(".bmp", ".mbmp"); - if (!File.Exists(newName)) + using (var f = new FileStream(databaseName, FileMode.Create)) { - Bitmap b = new Bitmap(bmpFileName); - ManagedBitmap m = new ManagedBitmap(b); - b.Dispose(); - m.Save(newName); - } - } - - - foreach (string bmpFileName in Directory.GetFiles(path, "*.mbmp")) - { - string name = Path.GetFileNameWithoutExtension(bmpFileName); - - XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + name + "']"); - if (node != null) - { - bool isItalic = node.Attributes["Italic"] != null; - int expandCount = 0; - if (node.Attributes["Expand"] != null) + foreach (string bmpFileName in Directory.GetFiles(path, "*.bmp")) { - if (!int.TryParse(node.Attributes["Expand"].InnerText, out expandCount)) - expandCount = 0; + string name = Path.GetFileNameWithoutExtension(bmpFileName); + + XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + name + "']"); + if (node != null) + { + node.InnerText = f.Position.ToString(CultureInfo.InvariantCulture); + var b = new Bitmap(bmpFileName); + var m = new ManagedBitmap(b); + b.Dispose(); + m.AppendToStream(f); + } + } + f.Close(); + } + _compareDoc.Save(path + "Images.xml"); + string text = File.ReadAllText(path + "Images.xml"); + File.WriteAllText(path + "Images.xml", text.Replace("", "")); + } + + if (File.Exists(databaseName)) + { + _compareDoc.Load(path + "Images.xml"); + using (var f = new FileStream(databaseName, FileMode.Open)) + { + foreach (XmlNode node in _compareDoc.DocumentElement.SelectNodes("Item")) + { + try //if (node.Attributes["Pos"] != null) + { + string name = node.InnerText; + int pos = Convert.ToInt32(name); + bool isItalic = node.Attributes["Italic"] != null; + int expandCount = 0; + if (node.Attributes["Expand"] != null) + { + if (!int.TryParse(node.Attributes["Expand"].InnerText, out expandCount)) + expandCount = 0; + } + f.Position = pos; + ManagedBitmap mbmp = new ManagedBitmap(f); + _compareBitmaps.Add(new CompareItem(mbmp, name, isItalic, expandCount)); + } + catch + { + //MessageBox.Show(node.OuterXml); + } } - ManagedBitmap mbmp = new ManagedBitmap(bmpFileName); - _compareBitmaps.Add(new CompareItem(mbmp, name, isItalic, expandCount)); } } + + // foreach (string bmpFileName in Directory.GetFiles(path, "*.mbmp")) + // { + // string name = Path.GetFileNameWithoutExtension(bmpFileName); + + // XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + name + "']"); + // if (node != null) + // { + // bool isItalic = node.Attributes["Italic"] != null; + // int expandCount = 0; + // if (node.Attributes["Expand"] != null) + // { + // if (!int.TryParse(node.Attributes["Expand"].InnerText, out expandCount)) + // expandCount = 0; + // } + // ManagedBitmap mbmp = new ManagedBitmap(bmpFileName); + // _compareBitmaps.Add(new CompareItem(mbmp, name, isItalic, expandCount)); + // } + // } } private void DisposeImageCompareBitmaps() @@ -2241,7 +2284,7 @@ namespace Nikse.SubtitleEdit.Forms maxDiff = 12.9; // let bluray sup have a 12.9% diff if (differencePercentage < maxDiff) //_vobSubOcrSettings.AllowDifferenceInPercent) // should be around 1.0... { - XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + _compareBitmaps[smallestIndex].Name + "']"); + XmlNode node = _compareDoc.DocumentElement.SelectSingleNode("Item[.='" + _compareBitmaps[smallestIndex].Name + "']"); if (node != null && _bluRaySubtitlesOriginal != null && "ceoil".Contains(node.Attributes["Text"].InnerText) && differencePercentage > 12) node = null; if (node != null) @@ -2271,7 +2314,7 @@ namespace Nikse.SubtitleEdit.Forms } } - XmlNode nodeGuess = _compareDoc.DocumentElement.SelectSingleNode("FileName[.='" + _compareBitmaps[smallestIndex].Name + "']"); + XmlNode nodeGuess = _compareDoc.DocumentElement.SelectSingleNode("Item[.='" + _compareBitmaps[smallestIndex].Name + "']"); if (nodeGuess != null) { bool isItalicGuess = nodeGuess.Attributes["Italic"] != null; @@ -2281,7 +2324,7 @@ namespace Nikse.SubtitleEdit.Forms if (!int.TryParse(nodeGuess.Attributes["Expand"].InnerText, out expandCountGuess)) expandCountGuess = 0; } - secondBestGuess = new CompareMatch(nodeGuess.Attributes["Text"].InnerText, isItalicGuess, expandCountGuess, _compareBitmaps[smallestIndex].Name); + secondBestGuess = new CompareMatch(nodeGuess.InnerText, isItalicGuess, expandCountGuess, _compareBitmaps[smallestIndex].Name); } } @@ -2314,7 +2357,7 @@ namespace Nikse.SubtitleEdit.Forms if (compareItem.NumberOfForegroundColors == -1) compareItem.NumberOfForegroundColors = CalculateNumberOfForegroundColors(compareItem.Bitmap); - if (Math.Abs(compareItem.NumberOfForegroundColors - numberOfForegroundColors) < minForeColorMatch) + if (Math.Abs(compareItem.NumberOfForegroundColors - numberOfForegroundColors) < 50) { int dif = ImageSplitter.IsBitmapsAlike(compareItem.Bitmap, target); if (dif < smallestDifference) @@ -2645,13 +2688,30 @@ namespace Nikse.SubtitleEdit.Forms private string SaveCompareItem(Bitmap newTarget, string text, bool isItalic, int expandCount) { string path = Configuration.VobSubCompareFolder + comboBoxCharacterDatabase.SelectedItem + Path.DirectorySeparatorChar; - string name = Guid.NewGuid().ToString(); - string fileName = path + name + ".bmp"; - new ManagedBitmap(newTarget).Save(fileName.Replace(".bmp", ".mbmp")); + string databaseName = path + "Images.db"; + FileStream f; + long pos = 0; + if (!File.Exists(databaseName)) + { + using (f = new FileStream(databaseName, FileMode.Create)) + { + pos = f.Position; + new ManagedBitmap(newTarget).AppendToStream(f); + } + } + else + { + using (f = new FileStream(databaseName, FileMode.Append)) + { + pos = f.Position; + new ManagedBitmap(newTarget).AppendToStream(f); + } + } + string name = pos.ToString(CultureInfo.InvariantCulture); _compareBitmaps.Add(new CompareItem(new ManagedBitmap(newTarget), name, isItalic, expandCount)); - XmlElement element = _compareDoc.CreateElement("FileName"); + XmlElement element = _compareDoc.CreateElement("Item"); XmlAttribute attribute = _compareDoc.CreateAttribute("Text"); attribute.InnerText = text; element.Attributes.Append(attribute); @@ -2667,9 +2727,9 @@ namespace Nikse.SubtitleEdit.Forms italic.InnerText = "true"; element.Attributes.Append(italic); } - element.InnerText = name; + element.InnerText = pos.ToString(CultureInfo.InvariantCulture); _compareDoc.DocumentElement.AppendChild(element); - _compareDoc.Save(path + "CompareDescription.xml"); + _compareDoc.Save(path + "Images.xml"); return name; } diff --git a/src/Logic/ManagedBitmap.cs b/src/Logic/ManagedBitmap.cs index 2132c63dd..1884ea423 100644 --- a/src/Logic/ManagedBitmap.cs +++ b/src/Logic/ManagedBitmap.cs @@ -39,6 +39,24 @@ namespace Nikse.SubtitleEdit.Logic } } + public ManagedBitmap(Stream stream) + { + byte[] buffer = new byte[8]; + stream.Read(buffer, 0, buffer.Length); + // System.Windows.Forms.MessageBox.Show(System.Text.Encoding.UTF8.GetString(buffer, 0, 4)); + Width = buffer[4] << 8 | buffer[5]; + Height = buffer[6] << 8 | buffer[7]; + _colors = new Color[Width * Height]; + buffer = new byte[Width * Height * 4]; + stream.Read(buffer, 0, buffer.Length); + int start = 0; + for (int i = 0; i < _colors.Length; i++) + { + _colors[i] = Color.FromArgb(buffer[start], buffer[start + 1], buffer[start + 2], buffer[start + 3]); + start += 4; + } + } + public ManagedBitmap(Bitmap oldBitmap) { NikseBitmap nbmp = new NikseBitmap(oldBitmap); @@ -77,6 +95,23 @@ namespace Nikse.SubtitleEdit.Logic } } + public void AppendToStream(Stream targetStream) + { + using (MemoryStream outFile = new MemoryStream()) + { + byte[] buffer = System.Text.Encoding.UTF8.GetBytes("MBMP"); + outFile.Write(buffer, 0, buffer.Length); + WriteInt16(outFile, (short)Width); + WriteInt16(outFile, (short)Height); + foreach (Color c in _colors) + { + WriteColor(outFile, c); + } + buffer = outFile.ToArray(); + targetStream.Write(buffer, 0, buffer.Length); + } + } + private int ReadInt16(Stream stream) { byte b0 = (byte)stream.ReadByte();