mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-21 18:52:36 +01:00
Some more minor fixes in "Fix common errors"
git-svn-id: https://subtitleedit.googlecode.com/svn/trunk@193 99eadd0c-20b8-1223-b5c4-2a2b2df33de2
This commit is contained in:
parent
560a146afa
commit
442724cf67
@ -774,6 +774,10 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
|
||||
public void FixUnneededSpaces()
|
||||
{
|
||||
const string zeroWhiteSpace = "\u200B";
|
||||
const string zeroWidthNoBreakSpace = "\uFEFF";
|
||||
|
||||
|
||||
string fixAction = _language.UnneededSpace;
|
||||
int doubleSpaces = 0;
|
||||
for (int i = 0; i < _subtitle.Paragraphs.Count; i++)
|
||||
@ -782,6 +786,10 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
string oldText = p.Text;
|
||||
|
||||
p.Text = p.Text.Trim();
|
||||
|
||||
p.Text = p.Text.Replace(zeroWhiteSpace, string.Empty);
|
||||
p.Text = p.Text.Replace(zeroWidthNoBreakSpace, string.Empty);
|
||||
p.Text = p.Text.Replace("", string.Empty); // some kind of hidden space!!!
|
||||
while (p.Text.Contains(" "))
|
||||
{
|
||||
p.Text = p.Text.Replace(" ", " ");
|
||||
@ -1319,11 +1327,18 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
{
|
||||
if ((Utilities.GetLetters(true, true, true) + ",").Contains(st.StrippedText[match.Index - (Environment.NewLine.Length + 1)].ToString()))
|
||||
{
|
||||
st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
|
||||
p.Text = st.MergedString;
|
||||
uppercaseIsInsideLowercaseWords++;
|
||||
_totalFixes++;
|
||||
AddFixToListView(p, i + 1, fixAction, oldText, p.Text);
|
||||
string next = string.Empty;
|
||||
if (match.Length >= 2)
|
||||
next = match.Value.Substring(1, 1);
|
||||
|
||||
if (Utilities.LowerCaseVowels.Contains(next))
|
||||
{
|
||||
st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
|
||||
p.Text = st.MergedString;
|
||||
uppercaseIsInsideLowercaseWords++;
|
||||
_totalFixes++;
|
||||
AddFixToListView(p, i + 1, fixAction, oldText, p.Text);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (match.Index > 1 && ((st.StrippedText[match.Index - 1] == '\"') || (st.StrippedText[match.Index - 1] == '>') || (st.StrippedText[match.Index - 1] == '-')))
|
||||
@ -1348,11 +1363,17 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
}
|
||||
else
|
||||
{
|
||||
st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
|
||||
p.Text = st.MergedString;
|
||||
uppercaseIsInsideLowercaseWords++;
|
||||
_totalFixes++;
|
||||
AddFixToListView(p, i + 1, fixAction, oldText, p.Text);
|
||||
if (before == " " && !Utilities.LowerCaseVowels.Contains(after.ToLower()))
|
||||
{
|
||||
}
|
||||
else
|
||||
{
|
||||
st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
|
||||
p.Text = st.MergedString;
|
||||
uppercaseIsInsideLowercaseWords++;
|
||||
_totalFixes++;
|
||||
AddFixToListView(p, i + 1, fixAction, oldText, p.Text);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1699,7 +1720,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
{
|
||||
string text = p.Text.Substring(indexOfNewLine + 2);
|
||||
StripableText st = new StripableText(text);
|
||||
if (st.StrippedText.Length > 0 && st.StrippedText[0].ToString() != st.StrippedText[0].ToString().ToUpper())
|
||||
if (st.StrippedText.Length > 0 && st.StrippedText[0].ToString() != st.StrippedText[0].ToString().ToUpper() && !st.Pre.EndsWith("[") && !st.Pre.Contains("..."))
|
||||
{
|
||||
text = st.Pre + st.StrippedText.Remove(0, 1).Insert(0, st.StrippedText[0].ToString().ToUpper()) + st.Post;
|
||||
|
||||
@ -1858,7 +1879,12 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
prev = s[match.Index - 1].ToString();
|
||||
if (match.Index + 1 < s.Length)
|
||||
next = s[match.Index + 1].ToString();
|
||||
if (prev != ">" && next != ">" && next != "}")
|
||||
|
||||
string wholePrev = string.Empty;
|
||||
if (match.Index > 1)
|
||||
wholePrev = s.Substring(0, match.Index - 1);
|
||||
|
||||
if (prev != ">" && next != ">" && next != "}" && !wholePrev.Trim().EndsWith("..."))
|
||||
{
|
||||
string temp = s.Substring(0, match.Index) + "I";
|
||||
if (match.Index + 1 < oldText.Length)
|
||||
|
@ -752,7 +752,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
}
|
||||
string line = GetStringWithItalicTags(matches);
|
||||
if (checkBoxAutoFixCommonErrors.Checked)
|
||||
line = OcrFixEngine.FixOcrErrorsViaHardcodedRules(line, _lastLine);
|
||||
line = OcrFixEngine.FixOcrErrorsViaHardcodedRules(line, _lastLine, null); // TODO: add abbreviations list
|
||||
return line;
|
||||
}
|
||||
|
||||
|
@ -27,6 +27,7 @@ namespace Nikse.SubtitleEdit.Logic.OCR
|
||||
List<string> _namesEtcList = new List<string>();
|
||||
List<string> _namesEtcListUppercase = new List<string>();
|
||||
List<string> _namesEtcMultiWordList = new List<string>(); // case sensitive phrases
|
||||
List<string> _abbreviationList;
|
||||
List<string> _userWordList = new List<string>();
|
||||
List<string> _wordSkipList = new List<string>();
|
||||
Hunspell _hunspell;
|
||||
@ -120,6 +121,19 @@ namespace Nikse.SubtitleEdit.Logic.OCR
|
||||
_userWordList = new List<string>();
|
||||
_userWordListXmlFileName = Utilities.LoadUserWordList(_userWordList, _fiveLetterWordListLanguageName);
|
||||
|
||||
// Find abbreviations
|
||||
_abbreviationList = new List<string>();
|
||||
foreach (string name in _namesEtcList)
|
||||
{
|
||||
if (name.EndsWith("."))
|
||||
_abbreviationList.Add(name);
|
||||
}
|
||||
foreach (string name in _userWordList)
|
||||
{
|
||||
if (name.EndsWith("."))
|
||||
_abbreviationList.Add(name);
|
||||
}
|
||||
|
||||
// Load NHunspell spellchecker
|
||||
_hunspell = new Hunspell(dictionary + ".aff", dictionary + ".dic");
|
||||
IsDictionaryLoaded = true;
|
||||
@ -455,7 +469,7 @@ namespace Nikse.SubtitleEdit.Logic.OCR
|
||||
|
||||
private string FixCommenOcrLineErrors(string input, string lastLine)
|
||||
{
|
||||
input = FixOcrErrorsViaHardcodedRules(input, lastLine);
|
||||
input = FixOcrErrorsViaHardcodedRules(input, lastLine, _abbreviationList);
|
||||
input = FixOcrErrorViaLineReplaceList(input);
|
||||
|
||||
// e.g. "selectionsu." -> "selections..."
|
||||
@ -481,16 +495,31 @@ namespace Nikse.SubtitleEdit.Logic.OCR
|
||||
return input;
|
||||
}
|
||||
|
||||
public static string FixOcrErrorsViaHardcodedRules(string input, string lastLine)
|
||||
private static bool EndsWithAbbreviation(string line, List<string> abbreviationList)
|
||||
{
|
||||
if (string.IsNullOrEmpty(line) || abbreviationList == null)
|
||||
return false;
|
||||
|
||||
abbreviationList.Add("a.m.");
|
||||
abbreviationList.Add("p.m.");
|
||||
abbreviationList.Add("o.r.");
|
||||
foreach (string abbreviation in abbreviationList)
|
||||
{
|
||||
if (line.ToLower().EndsWith(" " + abbreviation.ToLower()))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static string FixOcrErrorsViaHardcodedRules(string input, string lastLine, List<string> abbreviationList)
|
||||
{
|
||||
if (lastLine == null ||
|
||||
lastLine.EndsWith(".") ||
|
||||
lastLine.EndsWith("!") ||
|
||||
lastLine.EndsWith("?"))
|
||||
{
|
||||
if (lastLine == null || !lastLine.EndsWith("..."))
|
||||
if (lastLine == null || (!lastLine.EndsWith("...") && !EndsWithAbbreviation(lastLine, abbreviationList)))
|
||||
{
|
||||
|
||||
if (input.Length > 0 && input[0].ToString() != input[0].ToString().ToUpper())
|
||||
input = input.Remove(0, 1).Insert(0, input[0].ToString().ToUpper());
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user