Working on user OcrFixReplaceList

This commit is contained in:
niksedk 2014-09-27 00:20:22 +02:00 committed by XhmikosR
parent 10fbbb21b9
commit 1be3dc458a
4 changed files with 331 additions and 296 deletions

View File

@ -1,17 +1,16 @@
using System;
using Nikse.SubtitleEdit.Logic;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Globalization;
using System.IO;
using System.Windows.Forms;
using System.Xml;
using Nikse.SubtitleEdit.Logic;
using System.Globalization;
namespace Nikse.SubtitleEdit.Forms
{
public partial class AddToOcrReplaceList : Form
{
private string _threeLetterISOLanguageName;
private string _threeLetterIsoLanguageName;
public AddToOcrReplaceList()
{
@ -25,8 +24,8 @@ namespace Nikse.SubtitleEdit.Forms
private void FixLargeFonts()
{
Graphics graphics = this.CreateGraphics();
SizeF textSize = graphics.MeasureString(buttonOK.Text, this.Font);
var graphics = CreateGraphics();
var textSize = graphics.MeasureString(buttonOK.Text, Font);
if (textSize.Height > buttonOK.Height - 4)
{
int newButtonHeight = (int)(textSize.Height + 7 + 0.5);
@ -38,81 +37,21 @@ namespace Nikse.SubtitleEdit.Forms
{
string key = textBoxOcrFixKey.Text.Trim();
string value = textBoxOcrFixValue.Text.Trim();
if (key.Length == 0 || value.Length == 0)
if (key.Length == 0 || value.Length == 0 || key == value)
return;
if (key == value)
return;
var ocrFixWords = new Dictionary<string, string>();
var ocrFixPartialLines = new Dictionary<string, string>();
try
{
var ci = new CultureInfo(LanguageString.Replace("_", "-"));
_threeLetterISOLanguageName = ci.ThreeLetterISOLanguageName;
_threeLetterIsoLanguageName = ci.ThreeLetterISOLanguageName;
}
catch
catch (Exception exception)
{
}
string replaceListXmlFileName = Utilities.DictionaryFolder + _threeLetterISOLanguageName + "_OCRFixReplaceList.xml";
if (File.Exists(replaceListXmlFileName))
{
var xml = new XmlDocument();
xml.Load(replaceListXmlFileName);
ocrFixWords = Logic.Ocr.OcrFixEngine.LoadReplaceList(xml, "WholeWords");
ocrFixPartialLines = Logic.Ocr.OcrFixEngine.LoadReplaceList(xml, "PartialLines");
}
Dictionary<string, string> dictionary = ocrFixWords;
string elementName = "Word";
string parentName = "WholeWords";
if (key.Contains(' '))
{
dictionary = ocrFixPartialLines;
elementName = "LinePart";
parentName = "PartialLines";
}
if (dictionary.ContainsKey(key))
{
MessageBox.Show(Configuration.Settings.Language.Settings.WordAlreadyExists);
MessageBox.Show(exception.Message);
return;
}
dictionary.Add(key, value);
//Sort
var sortedDictionary = new SortedDictionary<string, string>();
foreach (var pair in dictionary)
{
if (!sortedDictionary.ContainsKey(pair.Key))
sortedDictionary.Add(pair.Key, pair.Value);
}
var doc = new XmlDocument();
if (File.Exists(replaceListXmlFileName))
doc.Load(replaceListXmlFileName);
else
doc.LoadXml("<OCRFixReplaceList><WholeWords/><PartialWords/><PartialLines/><BeginLines/><EndLines/><WholeLines/></OCRFixReplaceList>");
XmlNode wholeWords = doc.DocumentElement.SelectSingleNode(parentName);
wholeWords.RemoveAll();
foreach (var pair in sortedDictionary)
{
XmlNode node = doc.CreateElement(elementName);
XmlAttribute wordFrom = doc.CreateAttribute("from");
wordFrom.InnerText = pair.Key;
node.Attributes.Append(wordFrom);
XmlAttribute wordTo = doc.CreateAttribute("to");
wordTo.InnerText = pair.Value;
node.Attributes.Append(wordTo);
wholeWords.AppendChild(node);
}
doc.Save(replaceListXmlFileName);
var ocrFixReplaceList = OcrFixReplaceList.FromLanguageId(_threeLetterIsoLanguageName);
ocrFixReplaceList.AddWordOrPartial(key, value);
DialogResult = DialogResult.OK;
NewSource = key;
NewTarget = value;
@ -144,7 +83,7 @@ namespace Nikse.SubtitleEdit.Forms
if (hunspellName != null && name.Equals(hunspellName, StringComparison.OrdinalIgnoreCase))
comboBoxDictionaries.SelectedIndex = comboBoxDictionaries.Items.Count - 1;
}
_threeLetterISOLanguageName = languageId;
_threeLetterIsoLanguageName = languageId;
}
public string LanguageString

View File

@ -21,13 +21,11 @@ namespace Nikse.SubtitleEdit.Forms
private int _ssaFontColor;
private string _listBoxSearchString = string.Empty;
private DateTime _listBoxSearchStringLastUsed = DateTime.Now;
private string oldVlcLocation;
private string oldVlcLocationRelative;
private readonly List<string> _wordListNamesEtc = new List<string>();
private List<string> _userWordList = new List<string>();
private Dictionary<string, string> _ocrFixWords = new Dictionary<string, string>();
private Dictionary<string, string> _ocrFixPartialLines = new Dictionary<string, string>();
private OcrFixReplaceList _ocrFixReplaceList;
private readonly string _oldVlcLocation;
private readonly string _oldVlcLocationRelative;
private class ComboBoxLanguage
{
@ -536,7 +534,7 @@ namespace Nikse.SubtitleEdit.Forms
comboBoxMergeShortLineLength.Items.Clear();
for (int i = 10; i < 100; i++)
comboBoxMergeShortLineLength.Items.Add(i.ToString());
comboBoxMergeShortLineLength.Items.Add(i.ToString(CultureInfo.InvariantCulture));
if (toolsSettings.MergeLinesShorterThan >= 10 && toolsSettings.MergeLinesShorterThan - 10 < comboBoxMergeShortLineLength.Items.Count)
comboBoxMergeShortLineLength.SelectedIndex = toolsSettings.MergeLinesShorterThan - 10;
@ -848,8 +846,8 @@ namespace Nikse.SubtitleEdit.Forms
comboBoxShortcutKey.Left = labelShortcutKey.Left + labelShortcutKey.Width + 2;
buttonUpdateShortcut.Left = comboBoxShortcutKey.Left + comboBoxShortcutKey.Width + 15;
oldVlcLocation = Configuration.Settings.General.VlcLocation;
oldVlcLocationRelative = Configuration.Settings.General.VlcLocationRelative;
_oldVlcLocation = Configuration.Settings.General.VlcLocation;
_oldVlcLocationRelative = Configuration.Settings.General.VlcLocationRelative;
labelPlatform.Text = (IntPtr.Size * 8) + "-bit";
}
@ -863,8 +861,8 @@ namespace Nikse.SubtitleEdit.Forms
private void FixLargeFonts()
{
Graphics graphics = this.CreateGraphics();
SizeF textSize = graphics.MeasureString(buttonOK.Text, this.Font);
var graphics = CreateGraphics();
var textSize = graphics.MeasureString(buttonOK.Text, Font);
if (textSize.Height > buttonOK.Height - 4)
{
int newButtonHeight = (int)(textSize.Height + 7 + 0.5);
@ -939,7 +937,7 @@ namespace Nikse.SubtitleEdit.Forms
public void Initialize(Icon icon, Image newFile, Image openFile, Image saveFile, Image saveFileAs, Image find, Image replace, Image fixCommonErrors,
Image visualSync, Image spellCheck, Image settings, Image help)
{
this.Icon = (Icon)icon.Clone();
Icon = (Icon)icon.Clone();
pictureBoxNew.Image = (Image)newFile.Clone();
pictureBoxOpen.Image = (Image)openFile.Clone();
pictureBoxSave.Image = (Image)saveFile.Clone();
@ -991,6 +989,17 @@ namespace Nikse.SubtitleEdit.Forms
if (!found)
cultures.Add(culture);
}
else if (Directory.GetFiles(dir, culture.ThreeLetterISOLanguageName + "_OCRFixReplaceList_User.xml").Length == 1)
{
bool found = false;
foreach (CultureInfo ci in cultures)
{
if (ci.ThreeLetterISOLanguageName == culture.ThreeLetterISOLanguageName)
found = true;
}
if (!found)
cultures.Add(culture);
}
}
}
@ -998,6 +1007,17 @@ namespace Nikse.SubtitleEdit.Forms
foreach (var culture in CultureInfo.GetCultures(CultureTypes.NeutralCultures))
{
if (Directory.GetFiles(dir, culture.ThreeLetterISOLanguageName + "_OCRFixReplaceList.xml").Length == 1)
{
bool found = false;
foreach (CultureInfo ci in cultures)
{
if (ci.ThreeLetterISOLanguageName == culture.ThreeLetterISOLanguageName)
found = true;
}
if (!found)
cultures.Add(culture);
}
else if (Directory.GetFiles(dir, culture.ThreeLetterISOLanguageName + "_OCRFixReplaceList_User.xml").Length == 1)
{
bool found = false;
foreach (CultureInfo ci in cultures)
@ -1655,11 +1675,9 @@ namespace Nikse.SubtitleEdit.Forms
var measuredWidth = TextDraw.MeasureTextWidth(font, sb.ToString(), false) + 1;
var measuredHeight = TextDraw.MeasureTextHeight(font, sb.ToString(), false) + 1;
float left = 5;
left = ((float)(bmp.Width - measuredWidth * 0.8 + 15) / 2);
float left = ((float)(bmp.Width - measuredWidth * 0.8 + 15) / 2);
float top = 2;
top = bmp.Height - measuredHeight - ((int)10);
float top = bmp.Height - measuredHeight - 10;
const int leftMargin = 0;
int pathPointsStart = -1;
@ -1744,33 +1762,22 @@ namespace Nikse.SubtitleEdit.Forms
if (cb == null)
return;
_ocrFixWords = new Dictionary<string, string>();
_ocrFixPartialLines = new Dictionary<string, string>();
if (reloadListBox)
listBoxOcrFixList.Items.Clear();
string replaceListXmlFileName = Utilities.DictionaryFolder + cb.CultureInfo.ThreeLetterISOLanguageName + "_OCRFixReplaceList.xml";
if (File.Exists(replaceListXmlFileName))
_ocrFixReplaceList = OcrFixReplaceList.FromLanguageId(cb.CultureInfo.ThreeLetterISOLanguageName);
if (reloadListBox)
{
var doc = new XmlDocument();
doc.Load(replaceListXmlFileName);
_ocrFixWords = Logic.Ocr.OcrFixEngine.LoadReplaceList(doc, "WholeWords");
_ocrFixPartialLines = Logic.Ocr.OcrFixEngine.LoadReplaceList(doc, "PartialLines");
if (reloadListBox)
listBoxOcrFixList.BeginUpdate();
foreach (var pair in _ocrFixReplaceList.WordReplaceList)
{
listBoxOcrFixList.BeginUpdate();
foreach (var pair in _ocrFixWords)
{
listBoxOcrFixList.Items.Add(pair.Key + " --> " + pair.Value);
}
foreach (var pair in _ocrFixPartialLines)
{
listBoxOcrFixList.Items.Add(pair.Key + " --> " + pair.Value);
}
listBoxOcrFixList.Sorted = true;
listBoxOcrFixList.EndUpdate();
listBoxOcrFixList.Items.Add(pair.Key + " --> " + pair.Value);
}
foreach (var pair in _ocrFixReplaceList.PartialLineWordBoundaryReplaceList)
{
listBoxOcrFixList.Items.Add(pair.Key + " --> " + pair.Value);
}
listBoxOcrFixList.Sorted = true;
listBoxOcrFixList.EndUpdate();
}
}
@ -2078,63 +2085,19 @@ namespace Nikse.SubtitleEdit.Forms
if (key.Length == 0 || value.Length == 0 || key == value || Utilities.IsInteger(key))
return;
Dictionary<string, string> dictionary = _ocrFixWords;
string elementName = "Word";
string parentName = "WholeWords";
if (key.Contains(' '))
{
dictionary = _ocrFixPartialLines;
elementName = "LinePart";
parentName = "PartialLines";
}
var cb = comboBoxWordListLanguage.Items[comboBoxWordListLanguage.SelectedIndex] as ComboBoxLanguage;
if (cb == null)
return;
if (dictionary.ContainsKey(key))
var added = _ocrFixReplaceList.AddWordOrPartial(key, value);
if (!added)
{
MessageBox.Show(Configuration.Settings.Language.Settings.WordAlreadyExists);
return;
}
dictionary.Add(key, value);
//Sort
var sortedDictionary = new SortedDictionary<string, string>();
foreach (var pair in dictionary)
{
if (!sortedDictionary.ContainsKey(pair.Key))
sortedDictionary.Add(pair.Key, pair.Value);
}
string replaceListXmlFileName = Utilities.DictionaryFolder + cb.CultureInfo.ThreeLetterISOLanguageName + "_OCRFixReplaceList.xml";
var doc = new XmlDocument();
if (File.Exists(replaceListXmlFileName))
doc.Load(replaceListXmlFileName);
else
doc.LoadXml("<OCRFixReplaceList><WholeWords/><PartialWords/><PartialLines/><BeginLines/><EndLines/><WholeLines/></OCRFixReplaceList>");
XmlNode wholeWords = doc.DocumentElement.SelectSingleNode(parentName);
wholeWords.RemoveAll();
foreach (var pair in sortedDictionary)
{
XmlNode node = doc.CreateElement(elementName);
XmlAttribute wordFrom = doc.CreateAttribute("from");
wordFrom.InnerText = pair.Key;
node.Attributes.Append(wordFrom);
XmlAttribute wordTo = doc.CreateAttribute("to");
wordTo.InnerText = pair.Value;
node.Attributes.Append(wordTo);
wholeWords.AppendChild(node);
}
doc.Save(replaceListXmlFileName);
LoadOcrFixList(true);
textBoxOcrFixKey.Text = string.Empty;
textBoxOcrFixValue.Text = string.Empty;
textBoxOcrFixKey.Focus();
@ -2173,7 +2136,8 @@ namespace Nikse.SubtitleEdit.Forms
int index = listBoxOcrFixList.SelectedIndex;
string text = listBoxOcrFixList.Items[index].ToString();
string key = text.Substring(0, text.IndexOf(" --> ", StringComparison.Ordinal)).Trim();
if (_ocrFixWords.ContainsKey(key))
if (_ocrFixReplaceList.WordReplaceList.ContainsKey(key) || _ocrFixReplaceList.PartialLineWordBoundaryReplaceList.ContainsKey(key))
{
DialogResult result;
if (itemsToRemoveCount == 1)
@ -2182,42 +2146,18 @@ namespace Nikse.SubtitleEdit.Forms
result = MessageBox.Show(string.Format(Configuration.Settings.Language.Main.DeleteXLinesPrompt, itemsToRemoveCount), "Subtitle Edit", MessageBoxButtons.YesNo);
if (result == DialogResult.Yes)
{
int removeCount = 0;
for (int idx = listBoxOcrFixList.SelectedIndices.Count - 1; idx >= 0; idx--)
{
index = listBoxOcrFixList.SelectedIndices[idx];
text = listBoxOcrFixList.Items[index].ToString();
key = text.Substring(0, text.IndexOf(" --> ", StringComparison.Ordinal)).Trim();
if (_ocrFixWords.ContainsKey(key))
if (_ocrFixReplaceList.WordReplaceList.ContainsKey(key) || _ocrFixReplaceList.PartialLineWordBoundaryReplaceList.ContainsKey(key))
{
_ocrFixWords.Remove(key);
removeCount++;
_ocrFixReplaceList.RemoveWordOrPartial(key);
}
listBoxOcrFixList.Items.RemoveAt(index);
}
string replaceListXmlFileName = Utilities.DictionaryFolder + cb.CultureInfo.ThreeLetterISOLanguageName + "_OCRFixReplaceList.xml";
var doc = new XmlDocument();
doc.Load(replaceListXmlFileName);
XmlNode wholeWords = doc.DocumentElement.SelectSingleNode("WholeWords");
wholeWords.RemoveAll();
foreach (var pair in _ocrFixWords)
{
XmlNode node = doc.CreateElement("Word");
XmlAttribute wordFrom = doc.CreateAttribute("from");
wordFrom.InnerText = pair.Key;
node.Attributes.Append(wordFrom);
XmlAttribute wordTo = doc.CreateAttribute("to");
wordTo.InnerText = pair.Value;
node.Attributes.Append(wordTo);
wholeWords.AppendChild(node);
}
doc.Save(replaceListXmlFileName);
LoadOcrFixList(false);
buttonRemoveOcrFix.Enabled = false;
@ -2663,8 +2603,8 @@ namespace Nikse.SubtitleEdit.Forms
private void buttonCancel_Click(object sender, EventArgs e)
{
Configuration.Settings.General.VlcLocation = oldVlcLocation;
Configuration.Settings.General.VlcLocationRelative = oldVlcLocationRelative;
Configuration.Settings.General.VlcLocation = _oldVlcLocation;
Configuration.Settings.General.VlcLocationRelative = _oldVlcLocationRelative;
DialogResult = DialogResult.Cancel;
}

View File

@ -70,7 +70,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
_spellCheck.Location = new Point(parentForm.Left + (parentForm.Width / 2 - _spellCheck.Width / 2),
parentForm.Top + (parentForm.Height / 2 - _spellCheck.Height / 2));
_ocrFixReplaceList = new OcrFixReplaceList(threeLetterIsoLanguageName);
_ocrFixReplaceList = OcrFixReplaceList.FromLanguageId(threeLetterIsoLanguageName);
LoadSpellingDictionaries(threeLetterIsoLanguageName, hunspellName); // Hunspell etc.
AutoGuessesUsed = new List<string>();
@ -1249,7 +1249,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
case OcrSpellCheck.Action.AllwaysUseSuggestion:
try
{
_ocrFixReplaceList.SaveWordToWordList(word, _spellCheck.Word);
_ocrFixReplaceList.AddWordOrPartial(word, _spellCheck.Word);
}
catch (Exception exception)
{
@ -1262,7 +1262,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
case OcrSpellCheck.Action.ChangeAndSave:
try
{
_ocrFixReplaceList.SaveWordToWordList(word, _spellCheck.Word);
_ocrFixReplaceList.AddWordOrPartial(word, _spellCheck.Word);
}
catch (Exception exception)
{
@ -1281,7 +1281,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
result.FixedWholeLine = true;
break;
case OcrSpellCheck.Action.ChangeAllWholeText:
_ocrFixReplaceList.SaveWordToWholeLineList(_spellCheck.OriginalWholeText, _spellCheck.Paragraph);
_ocrFixReplaceList.AddToWholeLineList(_spellCheck.OriginalWholeText, _spellCheck.Paragraph);
result.Line = _spellCheck.Paragraph;
result.FixedWholeLine = true;
break;

View File

@ -10,7 +10,6 @@ namespace Nikse.SubtitleEdit.Logic
{
public class OcrFixReplaceList
{
private static readonly Regex RegExQuestion = new Regex(@"\S\?[A-ZÆØÅÄÖÉÈÀÙÂÊÎÔÛËÏa-zæøåäöéèàùâêîôûëï]", RegexOptions.Compiled);
private static readonly Regex RegExIandZero = new Regex(@"[a-zæøåöääöéèàùâêîôûëï][I1]", RegexOptions.Compiled);
private static readonly Regex RegExTime1 = new Regex(@"[a-zæøåöääöéèàùâêîôûëï][0]", RegexOptions.Compiled);
@ -18,8 +17,8 @@ namespace Nikse.SubtitleEdit.Logic
private static readonly Regex HexNumber = new Regex(@"^#?[\dABDEFabcdef]+$", RegexOptions.Compiled);
private static readonly Regex StartEndEndsWithNumber = new Regex(@"^\d+.+\d$", RegexOptions.Compiled);
private readonly Dictionary<string, string> _wordReplaceList;
private readonly Dictionary<string, string> _partialLineWordBoundaryReplaceList;
public Dictionary<string, string> WordReplaceList;
public readonly Dictionary<string, string> PartialLineWordBoundaryReplaceList;
private readonly Dictionary<string, string> _partialLineAlwaysReplaceList;
private readonly Dictionary<string, string> _beginLineReplaceList;
private readonly Dictionary<string, string> _endLineReplaceList;
@ -29,10 +28,11 @@ namespace Nikse.SubtitleEdit.Logic
private readonly Dictionary<string, string> _regExList;
private readonly string _replaceListXmlFileName;
public OcrFixReplaceList(string languageId)
public OcrFixReplaceList(string replaceListXmlFileName)
{
_wordReplaceList = new Dictionary<string, string>();
_partialLineWordBoundaryReplaceList = new Dictionary<string, string>();
_replaceListXmlFileName = replaceListXmlFileName;
WordReplaceList = new Dictionary<string, string>();
PartialLineWordBoundaryReplaceList = new Dictionary<string, string>();
_partialLineAlwaysReplaceList = new Dictionary<string, string>();
_beginLineReplaceList = new Dictionary<string, string>();
_endLineReplaceList = new Dictionary<string, string>();
@ -41,29 +41,46 @@ namespace Nikse.SubtitleEdit.Logic
_partialWordReplaceList = new Dictionary<string, string>();
_regExList = new Dictionary<string, string>();
_replaceListXmlFileName = Configuration.DictionariesFolder + languageId + "_OCRFixReplaceList.xml";
if (File.Exists(_replaceListXmlFileName))
{
var doc = new XmlDocument();
try
{
doc.Load(_replaceListXmlFileName);
}
catch (Exception exception)
{
MessageBox.Show("Unable to load " + _replaceListXmlFileName + ": " + exception.Message + Environment.NewLine);
}
var doc = LoadXmlReplaceListDocument();
var userDoc = LoadXmlReplaceListUserDocument();
_wordReplaceList = LoadReplaceList(doc, "WholeWords");
_partialWordReplaceListAlways = LoadReplaceList(doc, "PartialWordsAlways");
_partialWordReplaceList = LoadReplaceList(doc, "PartialWords");
_partialLineWordBoundaryReplaceList = LoadReplaceList(doc, "PartialLines");
_partialLineAlwaysReplaceList = LoadReplaceList(doc, "PartialAlwaysLines");
_beginLineReplaceList = LoadReplaceList(doc, "BeginLines");
_endLineReplaceList = LoadReplaceList(doc, "EndLines");
_wholeLineReplaceList = LoadReplaceList(doc, "WholeLines");
_regExList = LoadRegExList(doc, "RegularExpressions");
WordReplaceList = LoadReplaceList(doc, "WholeWords");
_partialWordReplaceListAlways = LoadReplaceList(doc, "PartialWordsAlways");
_partialWordReplaceList = LoadReplaceList(doc, "PartialWords");
PartialLineWordBoundaryReplaceList = LoadReplaceList(doc, "PartialLines");
_partialLineAlwaysReplaceList = LoadReplaceList(doc, "PartialAlwaysLines");
_beginLineReplaceList = LoadReplaceList(doc, "BeginLines");
_endLineReplaceList = LoadReplaceList(doc, "EndLines");
_wholeLineReplaceList = LoadReplaceList(doc, "WholeLines");
_regExList = LoadRegExList(doc, "RegularExpressions");
foreach (var kp in LoadReplaceList(userDoc, "WholeWords"))
{
if (!WordReplaceList.ContainsKey(kp.Key))
WordReplaceList.Add(kp.Key, kp.Value);
}
foreach (var kp in LoadReplaceList(userDoc, "RemovedWholeWords"))
{
if (WordReplaceList.ContainsKey(kp.Key))
WordReplaceList.Remove(kp.Key);
}
foreach (var kp in LoadReplaceList(userDoc, "PartialLines"))
{
if (!PartialLineWordBoundaryReplaceList.ContainsKey(kp.Key))
PartialLineWordBoundaryReplaceList.Add(kp.Key, kp.Value);
}
foreach (var kp in LoadReplaceList(userDoc, "RemovedPartialLines"))
{
if (PartialLineWordBoundaryReplaceList.ContainsKey(kp.Key))
PartialLineWordBoundaryReplaceList.Remove(kp.Key);
}
}
public static OcrFixReplaceList FromLanguageId(string languageId)
{
return new OcrFixReplaceList(Configuration.DictionariesFolder + languageId + "_OCRFixReplaceList.xml");
}
private static Dictionary<string, string> LoadReplaceList(XmlDocument doc, string name)
@ -114,7 +131,7 @@ namespace Nikse.SubtitleEdit.Logic
public string FixOcrErrorViaLineReplaceList(string input)
{
// Whole line
// Whole fromLine
foreach (string from in _wholeLineReplaceList.Keys)
{
if (input == from)
@ -139,7 +156,7 @@ namespace Nikse.SubtitleEdit.Logic
newText = newText.Remove(0, 3);
}
// begin line
// begin fromLine
string[] lines = newText.Split(Utilities.NewLineChars, StringSplitOptions.RemoveEmptyEntries);
var sb = new StringBuilder();
foreach (string l in lines)
@ -188,10 +205,10 @@ namespace Nikse.SubtitleEdit.Logic
}
newText += post;
foreach (string from in _partialLineWordBoundaryReplaceList.Keys)
foreach (string from in PartialLineWordBoundaryReplaceList.Keys)
{
if (newText.Contains(from))
newText = ReplaceWord(newText, from, _partialLineWordBoundaryReplaceList[from]);
newText = ReplaceWord(newText, from, PartialLineWordBoundaryReplaceList[from]);
}
foreach (string from in _partialLineAlwaysReplaceList.Keys)
@ -382,54 +399,54 @@ namespace Nikse.SubtitleEdit.Logic
word = word.Insert(match.Index + 2, " ");
}
foreach (string from in _wordReplaceList.Keys)
foreach (string from in WordReplaceList.Keys)
{
if (word.Length == from.Length)
{
if (word == from)
return pre + _wordReplaceList[from] + post;
return pre + WordReplaceList[from] + post;
}
else if (word.Length + post.Length == from.Length)
{
if (string.CompareOrdinal(word + post, from) == 0)
return pre + _wordReplaceList[from];
return pre + WordReplaceList[from];
}
if (pre.Length + word.Length + post.Length == from.Length && string.CompareOrdinal(preWordPost, from) == 0)
{
return _wordReplaceList[from];
return WordReplaceList[from];
}
}
if (Configuration.Settings.Tools.OcrFixUseHardcodedRules)
{
// uppercase I or 1 inside lowercase word (will be replaced by lowercase L)
// uppercase I or 1 inside lowercase fromWord (will be replaced by lowercase L)
word = FixIor1InsideLowerCaseWord(word);
// uppercase 0 inside lowercase word (will be replaced by lowercase L)
// uppercase 0 inside lowercase fromWord (will be replaced by lowercase L)
word = Fix0InsideLowerCaseWord(word);
// uppercase I or 1 inside lowercase word (will be replaced by lowercase L)
// uppercase I or 1 inside lowercase fromWord (will be replaced by lowercase L)
word = FixIor1InsideLowerCaseWord(word);
word = FixLowerCaseLInsideUpperCaseWord(word); // eg. SCARLETTl => SCARLETTI
}
// Retry word replace list
foreach (string from in _wordReplaceList.Keys)
// Retry fromWord replace list
foreach (string from in WordReplaceList.Keys)
{
if (word.Length == from.Length)
{
if (string.CompareOrdinal(word, from) == 0)
return pre + _wordReplaceList[from] + post;
return pre + WordReplaceList[from] + post;
}
else if (word.Length + post.Length == from.Length)
{
if (string.CompareOrdinal(word + post, from) == 0)
return pre + _wordReplaceList[from];
return pre + WordReplaceList[from];
}
if (pre.Length + word.Length + post.Length == from.Length && string.CompareOrdinal(preWordPost, from) == 0)
{
return _wordReplaceList[from];
return WordReplaceList[from];
}
}
@ -647,102 +664,241 @@ namespace Nikse.SubtitleEdit.Logic
if (word.Length == 0)
return preWordPost;
foreach (string from in _wordReplaceList.Keys)
foreach (string from in WordReplaceList.Keys)
{
if (word.Length == from.Length)
{
if (string.CompareOrdinal(word, from) == 0)
return pre + _wordReplaceList[from] + post;
return pre + WordReplaceList[from] + post;
}
else if (word.Length + post.Length == from.Length)
{
if (string.CompareOrdinal(word + post, from) == 0)
return pre + _wordReplaceList[from];
return pre + WordReplaceList[from];
}
if (pre.Length + word.Length + post.Length == from.Length && string.CompareOrdinal(preWordPost, from) == 0)
{
return _wordReplaceList[from];
return WordReplaceList[from];
}
}
return preWordPost;
}
public void SaveWordToWordList(string word, string spellCheckWord)
public bool RemoveWordOrPartial(string word)
{
if (_replaceListXmlFileName != null)
if (word.Contains(' '))
{
var doc = new XmlDocument();
if (File.Exists(_replaceListXmlFileName))
return DeletePartialLineFromWordList(word);
}
return DeleteWordFromWordList(word);
}
private bool DeleteWordFromWordList(string fromWord)
{
const string replaceListName = "WholeWords";
var doc = LoadXmlReplaceListDocument();
var list = LoadReplaceList(doc, replaceListName);
var userDoc = LoadXmlReplaceListUserDocument();
var userList = LoadReplaceList(userDoc, replaceListName);
return DeleteFromList(fromWord, userDoc, replaceListName, "Word", list, userList);
}
private bool DeletePartialLineFromWordList(string fromWord)
{
const string replaceListName = "PartialLines";
var doc = LoadXmlReplaceListDocument();
var list = LoadReplaceList(doc, replaceListName);
var userDoc = LoadXmlReplaceListUserDocument();
var userList = LoadReplaceList(userDoc, replaceListName);
return DeleteFromList(fromWord, userDoc, replaceListName, "LinePart", list, userList);
}
private bool DeleteFromList(string word, XmlDocument userDoc, string replaceListName, string elementName, Dictionary<string, string> dictionary, Dictionary<string, string> userDictionary)
{
if (dictionary == null)
throw new ArgumentNullException("dictionary");
if (userDictionary == null)
throw new ArgumentNullException("userDictionary");
bool removed = false;
if (userDictionary.ContainsKey((word)))
{
XmlNode wholeWordsNode = userDoc.DocumentElement.SelectSingleNode(replaceListName);
if (wholeWordsNode != null)
{
try
foreach (var kvp in userDictionary)
{
doc.Load(_replaceListXmlFileName);
}
catch
{
doc.LoadXml("<ReplaceList><WholeWords/><PartialLines/><BeginLines/><EndLines/><WholeLines/></ReplaceList>");
if (kvp.Key != word)
{
XmlNode newNode = userDoc.CreateNode(XmlNodeType.Element, elementName, null);
XmlAttribute aFrom = userDoc.CreateAttribute("from");
XmlAttribute aTo = userDoc.CreateAttribute("to");
aFrom.InnerText = kvp.Key;
aTo.InnerText = kvp.Value;
newNode.Attributes.Append(aTo);
newNode.Attributes.Append(aFrom);
wholeWordsNode.AppendChild(newNode);
}
}
userDoc.Save(ReplaceListXmlFileNameUser);
removed = true;
}
else
}
else if (dictionary.ContainsKey((word)))
{
XmlNode wholeWordsNode = userDoc.DocumentElement.SelectSingleNode("Removed" + replaceListName);
if (wholeWordsNode != null)
{
XmlNode newNode = userDoc.CreateNode(XmlNodeType.Element, elementName, null);
XmlAttribute aFrom = userDoc.CreateAttribute("from");
XmlAttribute aTo = userDoc.CreateAttribute("to");
aFrom.InnerText = word;
aTo.InnerText = string.Empty;
newNode.Attributes.Append(aTo);
newNode.Attributes.Append(aFrom);
wholeWordsNode.AppendChild(newNode);
userDoc.Save(ReplaceListXmlFileNameUser);
removed = true;
}
}
return removed;
}
private XmlDocument LoadXmlReplaceListDocument()
{
var doc = new XmlDocument();
if (File.Exists(_replaceListXmlFileName))
{
try
{
doc.Load(_replaceListXmlFileName);
}
catch
{
doc.LoadXml("<ReplaceList><WholeWords/><PartialLines/><BeginLines/><EndLines/><WholeLines/></ReplaceList>");
}
if (!_wordReplaceList.ContainsKey(word))
_wordReplaceList.Add(word, spellCheckWord);
XmlNode wholeWordsNode = doc.DocumentElement.SelectSingleNode("WholeWords");
if (wholeWordsNode != null)
{
XmlNode newNode = doc.CreateNode(XmlNodeType.Element, "Word", null);
XmlAttribute aFrom = doc.CreateAttribute("from");
XmlAttribute aTo = doc.CreateAttribute("to");
aFrom.InnerText = word;
aTo.InnerText = spellCheckWord;
newNode.Attributes.Append(aFrom);
newNode.Attributes.Append(aTo);
wholeWordsNode.AppendChild(newNode);
doc.Save(_replaceListXmlFileName);
}
}
else
{
doc.LoadXml("<ReplaceList><WholeWords/><PartialLines/><BeginLines/><EndLines/><WholeLines/></ReplaceList>");
}
return doc;
}
public void SaveWordToWholeLineList(string line, string spellCheckLine)
private string ReplaceListXmlFileNameUser
{
get { return Path.Combine(Path.GetDirectoryName(_replaceListXmlFileName), Path.GetFileNameWithoutExtension(_replaceListXmlFileName) + "_User" + Path.GetExtension(_replaceListXmlFileName)); }
}
private XmlDocument LoadXmlReplaceListUserDocument()
{
var doc = new XmlDocument();
if (File.Exists(ReplaceListXmlFileNameUser))
{
try
{
doc.Load(ReplaceListXmlFileNameUser);
}
catch
{
doc.LoadXml("<ReplaceList><WholeWords/><PartialLines/><BeginLines/><EndLines/><WholeLines/><RemovedWholeWords/><RemovedPartialLines/><RemovedBeginLines/><RemovedEndLines/><RemovedWholeLines/></ReplaceList>");
}
}
else
{
doc.LoadXml("<ReplaceList><WholeWords/><PartialLines/><BeginLines/><EndLines/><WholeLines/><RemovedWholeWords/><RemovedPartialLines/><RemovedBeginLines/><RemovedEndLines/><RemovedWholeLines/></ReplaceList>");
}
return doc;
}
public bool AddWordOrPartial(string fromWord, string toWord)
{
if (fromWord.Contains(' '))
{
return SavePartialLineToWordList(fromWord, toWord);
}
return SaveWordToWordList(fromWord, toWord);
}
private bool SaveWordToWordList(string fromWord, string toWord)
{
const string replaceListName = "WholeWords";
var doc = LoadXmlReplaceListDocument();
var list = LoadReplaceList(doc, replaceListName);
var userDoc = LoadXmlReplaceListUserDocument();
var userList = LoadReplaceList(userDoc, replaceListName);
return SaveToList(fromWord, toWord, userDoc, replaceListName, "Word", list, userList);
}
private bool SavePartialLineToWordList(string fromWord, string toWord)
{
const string replaceListName = "PartialLines";
var doc = LoadXmlReplaceListDocument();
var list = LoadReplaceList(doc, replaceListName);
var userDoc = LoadXmlReplaceListUserDocument();
var userList = LoadReplaceList(userDoc, replaceListName);
return SaveToList(fromWord, toWord, userDoc, replaceListName, "LinePart", list, userList);
}
private bool SaveToList(string fromWord, string toWord, XmlDocument userDoc, string replaceListName, string elementName, Dictionary<string, string> dictionary, Dictionary<string, string> userDictionary)
{
if (dictionary == null)
throw new ArgumentNullException("dictionary");
if (userDictionary == null)
throw new ArgumentNullException("userDictionary");
if (dictionary.ContainsKey(fromWord))
return false;
if (userDictionary.ContainsKey(fromWord))
return false;
userDictionary.Add(fromWord, toWord);
XmlNode wholeWordsNode = userDoc.DocumentElement.SelectSingleNode(replaceListName);
if (wholeWordsNode != null)
{
XmlNode newNode = userDoc.CreateNode(XmlNodeType.Element, elementName, null);
XmlAttribute aFrom = userDoc.CreateAttribute("from");
XmlAttribute aTo = userDoc.CreateAttribute("to");
aTo.InnerText = toWord;
aFrom.InnerText = fromWord;
newNode.Attributes.Append(aFrom);
newNode.Attributes.Append(aTo);
wholeWordsNode.AppendChild(newNode);
userDoc.Save(ReplaceListXmlFileNameUser);
}
return true;
}
public void AddToWholeLineList(string fromLine, string toLine)
{
try
{
if (_replaceListXmlFileName != null)
var userDocument = LoadXmlReplaceListUserDocument();
if (!_wholeLineReplaceList.ContainsKey(fromLine))
_wholeLineReplaceList.Add(fromLine, toLine);
XmlNode wholeWordsNode = userDocument.DocumentElement.SelectSingleNode("WholeLines");
if (wholeWordsNode != null)
{
var doc = new XmlDocument();
if (File.Exists(_replaceListXmlFileName))
{
try
{
doc.Load(_replaceListXmlFileName);
}
catch
{
doc.LoadXml("<ReplaceList><WholeWords/><PartialLines/><BeginLines/><EndLines/><WholeLines/></ReplaceList>");
}
}
else
{
doc.LoadXml("<ReplaceList><WholeWords/><PartialLines/><BeginLines/><EndLines/><WholeLines/></ReplaceList>");
}
if (!_wholeLineReplaceList.ContainsKey(line))
_wholeLineReplaceList.Add(line, spellCheckLine);
XmlNode wholeWordsNode = doc.DocumentElement.SelectSingleNode("WholeLines");
if (wholeWordsNode != null)
{
XmlNode newNode = doc.CreateNode(XmlNodeType.Element, "Line", null);
XmlAttribute aFrom = doc.CreateAttribute("from");
XmlAttribute aTo = doc.CreateAttribute("to");
aFrom.InnerText = line;
aTo.InnerText = spellCheckLine;
newNode.Attributes.Append(aFrom);
newNode.Attributes.Append(aTo);
wholeWordsNode.AppendChild(newNode);
doc.Save(_replaceListXmlFileName);
}
XmlNode newNode = userDocument.CreateNode(XmlNodeType.Element, "Line", null);
XmlAttribute aFrom = userDocument.CreateAttribute("from");
XmlAttribute aTo = userDocument.CreateAttribute("to");
aTo.InnerText = toLine;
aFrom.InnerText = fromLine;
newNode.Attributes.Append(aFrom);
newNode.Attributes.Append(aTo);
wholeWordsNode.AppendChild(newNode);
userDocument.Save(_replaceListXmlFileName);
}
}
catch (Exception exception)