Minor refact

This commit is contained in:
niksedk 2022-12-18 08:56:32 +01:00
parent 0a6cb6ce79
commit 03e1d962a5
2 changed files with 103 additions and 96 deletions

View File

@ -246,7 +246,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
public string FixOcrErrorViaLineReplaceList(string input)
{
// Whole fromLine
foreach (string from in _wholeLineReplaceList.Keys)
foreach (var from in _wholeLineReplaceList.Keys)
{
if (input == from)
{
@ -254,8 +254,8 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
}
}
string newText = input;
string pre = string.Empty;
var newText = input;
var pre = string.Empty;
if (newText.StartsWith("<i>", StringComparison.Ordinal))
{
pre += "<i>";
@ -275,14 +275,14 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
// begin fromLine
var lines = newText.SplitToLines();
var sb = new StringBuilder(input.Length + 2);
foreach (string l in lines)
foreach (var l in lines)
{
string s = l;
var s = l;
foreach (string from in _beginLineReplaceList.Keys)
{
if (s.FastIndexOf(from) >= 0)
{
string with = _beginLineReplaceList[from];
var with = _beginLineReplaceList[from];
if (s.StartsWith(from, StringComparison.Ordinal))
{
s = s.Remove(0, from.Length).Insert(0, with);
@ -300,14 +300,14 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
}
newText = pre + sb.ToString().TrimEnd(Utilities.NewLineChars);
string post = string.Empty;
var post = string.Empty;
if (newText.EndsWith("</i>", StringComparison.Ordinal))
{
newText = newText.Remove(newText.Length - 4, 4);
post = "</i>";
}
foreach (string from in _endLineReplaceList.Keys)
foreach (var from in _endLineReplaceList.Keys)
{
if (newText.EndsWith(from, StringComparison.Ordinal))
{
@ -317,7 +317,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
}
newText += post;
foreach (string from in PartialLineWordBoundaryReplaceList.Keys)
foreach (var from in PartialLineWordBoundaryReplaceList.Keys)
{
if (newText.FastIndexOf(from) >= 0)
{
@ -325,7 +325,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
}
}
foreach (string from in _partialLineAlwaysReplaceList.Keys)
foreach (var from in _partialLineAlwaysReplaceList.Keys)
{
if (newText.FastIndexOf(from) >= 0)
{
@ -336,7 +336,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
if (_replaceRegExes == null || _regExList.Count != _replaceRegExes.Count)
{
_replaceRegExes = new List<Regex>();
foreach (string findWhat in _regExList.Keys)
foreach (var findWhat in _regExList.Keys)
{
var regex = new Regex(findWhat, RegexOptions.Multiline | RegexOptions.Compiled);
_replaceRegExes.Add(regex);
@ -345,8 +345,8 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
}
else
{
int i = 0;
foreach (string findWhat in _regExList.Keys)
var i = 0;
foreach (var findWhat in _regExList.Keys)
{
var regex = _replaceRegExes[i];
newText = regex.Replace(newText, _regExList[findWhat]);
@ -376,10 +376,10 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
{
var list = new List<string>();
var previousGuesses = new List<string>();
foreach (string letter in _partialWordReplaceList.Keys)
foreach (var letter in _partialWordReplaceList.Keys)
{
var indexes = new List<int>();
for (int i = 0; i <= word.Length - letter.Length; i++)
for (var i = 0; i <= word.Length - letter.Length; i++)
{
if (word.Substring(i).StartsWith(letter, StringComparison.Ordinal))
{
@ -403,7 +403,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
if (!_partialWordReplaceList[letter].Contains(' '))
{
var multiGuess = word;
for (int i = indexes.Count - 1; i >= 0; i--)
for (var i = indexes.Count - 1; i >= 0; i--)
{
var idx = indexes[i];
multiGuess = multiGuess.Remove(idx, letter.Length).Insert(idx, _partialWordReplaceList[letter]);
@ -420,7 +420,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
if (indexes.Count > 0)
{
for (int i = indexes.Count - 1; i >= 0; i--)
for (var i = indexes.Count - 1; i >= 0; i--)
{
var idx = indexes[i];
if (idx > 1 && idx < word.Length - 2)
@ -433,7 +433,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
foreach (var previousGuess in previousGuesses)
{
for (int i = 0; i < previousGuess.Length - letter.Length; i++)
for (var i = 0; i < previousGuess.Length - letter.Length; i++)
{
if (previousGuess.Substring(i).StartsWith(letter, StringComparison.Ordinal))
{
@ -498,13 +498,13 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
}
//always replace list
foreach (string letter in _partialWordAlwaysReplaceList.Keys)
foreach (var letter in _partialWordAlwaysReplaceList.Keys)
{
word = word.Replace(letter, _partialWordAlwaysReplaceList[letter]);
}
string pre = string.Empty;
string post = string.Empty;
var pre = string.Empty;
var post = string.Empty;
if (word.StartsWith("<i>", StringComparison.Ordinal))
{
@ -563,7 +563,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
word = word.Remove(word.Length - 4, 4);
}
string preWordPost = pre + word + post;
var preWordPost = pre + word + post;
if (word.Length == 0)
{
return preWordPost;
@ -663,6 +663,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
word = word.Replace('l', 'I');
}
}
return word;
}
@ -691,7 +692,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
{
if (word[match.Index + 1] == 'I' || word[match.Index + 1] == '1')
{
bool doFix = word[match.Index + 1] != 'I' && match.Index >= 1 && word.Substring(match.Index - 1).StartsWith("Mc", StringComparison.Ordinal);
var doFix = word[match.Index + 1] != 'I' && match.Index >= 1 && word.Substring(match.Index - 1).StartsWith("Mc", StringComparison.Ordinal);
if (word[match.Index + 1] == 'I' && match.Index >= 2 && word.Substring(match.Index - 2).StartsWith("Mac", StringComparison.Ordinal))
{
doFix = false;
@ -699,7 +700,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
if (doFix)
{
string oldText = word;
var oldText = word;
word = word.Substring(0, match.Index + 1) + "l";
if (match.Index + 2 < oldText.Length)
{
@ -737,12 +738,12 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
if (word.LastIndexOf('0') > 0)
{
Match match = RegExTime1.Match(word);
var match = RegExTime1.Match(word);
while (match.Success)
{
if (word[match.Index + 1] == '0')
{
string oldText = word;
var oldText = word;
word = word.Substring(0, match.Index + 1) + "o";
if (match.Index + 2 < oldText.Length)
{
@ -760,7 +761,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
{
if (match.Index == 0 || !expectedDigits.Contains(word[match.Index - 1]))
{
string oldText = word;
var oldText = word;
word = word.Substring(0, match.Index) + "o";
if (match.Index + 1 < oldText.Length)
{
@ -779,13 +780,13 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
var word = input;
//always replace list
foreach (string letter in _partialWordAlwaysReplaceList.Keys)
foreach (var letter in _partialWordAlwaysReplaceList.Keys)
{
word = word.Replace(letter, _partialWordAlwaysReplaceList[letter]);
}
string pre = string.Empty;
string post = string.Empty;
var pre = string.Empty;
var post = string.Empty;
if (word.StartsWith("<i>", StringComparison.Ordinal))
{
@ -864,7 +865,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
word = word.Remove(word.Length - 4, 4);
}
string preWordPost = pre + word + post;
var preWordPost = pre + word + post;
if (word.Length == 0)
{
return preWordPost;
@ -1048,6 +1049,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
}
return false;
}
if (SaveWordToWordList(fromWord, toWord))
{
if (!WordReplaceList.ContainsKey(fromWord))
@ -1056,6 +1058,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
}
return true;
}
return false;
}
@ -1152,12 +1155,12 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
if (text.Contains(word))
{
const string separatorChars = @" ¡¿<>-""”“()[]'`´¶♪¿¡.…—!?,:;/";
int appendFrom = 0;
for (int i = 0; i < text.Length; i++)
var appendFrom = 0;
for (var i = 0; i < text.Length; i++)
{
if (text[i] == word[0] && i >= appendFrom && text.Substring(i).StartsWith(word, StringComparison.Ordinal))
{
bool startOk = i == 0;
var startOk = i == 0;
if (!startOk)
{
var prevChar = text[i - 1];
@ -1169,7 +1172,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
}
if (startOk)
{
bool endOk = i + word.Length == text.Length;
var endOk = i + word.Length == text.Length;
if (!endOk)
{
var nextChar = text[i + word.Length];

View File

@ -158,7 +158,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
private void LoadSpellingDictionaries(string threeLetterIsoLanguageName, string hunspellName)
{
string dictionaryFolder = Utilities.DictionaryFolder;
var dictionaryFolder = Utilities.DictionaryFolder;
if (!Directory.Exists(dictionaryFolder))
{
return;
@ -207,51 +207,54 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
foreach (var culture in Iso639Dash2LanguageCode.List)
{
if (culture.ThreeLetterCode == threeLetterIsoLanguageName)
if (culture.ThreeLetterCode != threeLetterIsoLanguageName)
{
string dictionaryFileName = null;
if (!string.IsNullOrEmpty(hunspellName) && hunspellName.StartsWith(culture.TwoLetterCode, StringComparison.OrdinalIgnoreCase) && File.Exists(Path.Combine(dictionaryFolder, hunspellName + ".dic")))
{
dictionaryFileName = Path.Combine(dictionaryFolder, hunspellName + ".dic");
LoadSpellingDictionariesViaDictionaryFileName(threeLetterIsoLanguageName, dictionaryFileName, true);
return;
}
foreach (string dic in Directory.GetFiles(dictionaryFolder, "*.dic"))
{
string name = Path.GetFileNameWithoutExtension(dic);
if (!string.IsNullOrEmpty(name) && !name.StartsWith("hyph", StringComparison.Ordinal))
{
try
{
name = name.Replace('_', '-');
if (name.Length > 5)
{
name = name.Substring(0, 5);
}
var ci = CultureInfo.GetCultureInfo(name);
if (ci.GetThreeLetterIsoLanguageName() == threeLetterIsoLanguageName ||
ci.GetThreeLetterIsoLanguageName().Equals(threeLetterIsoLanguageName, StringComparison.OrdinalIgnoreCase))
{
dictionaryFileName = dic;
break;
}
}
catch (Exception exception)
{
System.Diagnostics.Debug.WriteLine(exception.Message);
}
}
}
if (dictionaryFileName == null)
{
return;
}
continue;
}
string dictionaryFileName = null;
if (!string.IsNullOrEmpty(hunspellName) && hunspellName.StartsWith(culture.TwoLetterCode, StringComparison.OrdinalIgnoreCase) && File.Exists(Path.Combine(dictionaryFolder, hunspellName + ".dic")))
{
dictionaryFileName = Path.Combine(dictionaryFolder, hunspellName + ".dic");
LoadSpellingDictionariesViaDictionaryFileName(threeLetterIsoLanguageName, dictionaryFileName, true);
return;
}
foreach (var dic in Directory.GetFiles(dictionaryFolder, "*.dic"))
{
var name = Path.GetFileNameWithoutExtension(dic);
if (!string.IsNullOrEmpty(name) && !name.StartsWith("hyph", StringComparison.Ordinal))
{
try
{
name = name.Replace('_', '-');
if (name.Length > 5)
{
name = name.Substring(0, 5);
}
var ci = CultureInfo.GetCultureInfo(name);
if (ci.GetThreeLetterIsoLanguageName() == threeLetterIsoLanguageName ||
ci.GetThreeLetterIsoLanguageName().Equals(threeLetterIsoLanguageName, StringComparison.OrdinalIgnoreCase))
{
dictionaryFileName = dic;
break;
}
}
catch (Exception exception)
{
System.Diagnostics.Debug.WriteLine(exception.Message);
}
}
}
if (dictionaryFileName == null)
{
return;
}
LoadSpellingDictionariesViaDictionaryFileName(threeLetterIsoLanguageName, dictionaryFileName, true);
return;
}
string dicFileName = null;
@ -484,10 +487,10 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
text = text.Replace("<i>.</i>", ".");
text = text.TrimStart();
int len = text.Length;
for (int i = 0; i < len; i++)
var len = text.Length;
for (var i = 0; i < len; i++)
{
char ch = text[i];
var ch = text[i];
switch (ch)
{
case 'fi':
@ -695,7 +698,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
}
}
int start = text.IndexOf(tag, StringComparison.Ordinal);
var start = text.IndexOf(tag, StringComparison.Ordinal);
while (start > 0)
{
lastLine = HtmlUtil.RemoveHtmlTags(text.Substring(0, start)).TrimEnd().TrimEnd('-').TrimEnd();
@ -709,7 +712,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
if (start > 1)
{
string beforeThis = HtmlUtil.RemoveHtmlTags(text.Substring(0, start));
var beforeThis = HtmlUtil.RemoveHtmlTags(text.Substring(0, start));
endingBeforeThis = beforeThis.EndsWith('.') || beforeThis.EndsWith('!') || beforeThis.EndsWith('?');
}
@ -735,7 +738,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
{
if (start > 1)
{
string beforeThis = HtmlUtil.RemoveHtmlTags(text.Substring(0, start));
var beforeThis = HtmlUtil.RemoveHtmlTags(text.Substring(0, start));
endingBeforeThis = beforeThis.EndsWith('.') || beforeThis.EndsWith('!') || beforeThis.EndsWith('?');
}
@ -987,7 +990,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
text = "." + text;
}
string pre = string.Empty;
var pre = string.Empty;
if (text.StartsWith("- ", StringComparison.Ordinal))
{
pre = "- ";
@ -1262,7 +1265,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
var match = RegexUppercaseI.Match(text);
while (match.Success)
{
bool doFix = !(match.Index >= 1 && text.Substring(match.Index - 1).StartsWith("Mc", StringComparison.Ordinal));
var doFix = !(match.Index >= 1 && text.Substring(match.Index - 1).StartsWith("Mc", StringComparison.Ordinal));
if (match.Index >= 2 && text.Substring(match.Index - 2).StartsWith("Mac", StringComparison.Ordinal))
{
doFix = false;
@ -1339,7 +1342,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
return line;
}
string tempLine = line;
var tempLine = line;
const string p = " ¡¿,.!?:;()[]{}+-$£\"„”“#&%…—♪\r\n";
var trimChars = p.ToArray();
bool hasAllUpperWord = false;
@ -1771,9 +1774,9 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
private static string GetWordWithDominatedCasing(string word)
{
int lowercase = 0;
int uppercase = 0;
for (int i = 0; i < word.Length; i++)
var lowercase = 0;
var uppercase = 0;
for (var i = 0; i < word.Length; i++)
{
var ch = word[i];
if (char.IsLower(ch))
@ -1953,18 +1956,18 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
{
if (s.Length > 10 && s.Contains('/'))
{
string[] ar = s.Split('/');
var ar = s.Split('/');
if (ar.Length == 2)
{
if (ar[0].Length > 3 && ar[1].Length > 3)
{
string a = ar[0];
var a = ar[0];
if (a == a.ToUpperInvariant())
{
a = a[0] + a.Substring(1).ToLowerInvariant();
}
string b = ar[0];
var b = ar[0];
if (b == b.ToUpperInvariant())
{
b = b[0] + b.Substring(1).ToLowerInvariant();
@ -2044,22 +2047,22 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
return 0;
}
int minLength = 2;
var minLength = 2;
if (Configuration.Settings.Tools.CheckOneLetterWords)
{
minLength = 1;
}
int wordsNotFound = 0;
var wordsNotFound = 0;
var words = HtmlUtil.RemoveOpenCloseTags(line, HtmlUtil.TagItalic).Split(" \r\n\t".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
for (int i = 0; i < words.Length; i++)
{
string word = words[i].Trim(SpellCheckWordLists.SplitChars.ToArray());
var word = words[i].Trim(SpellCheckWordLists.SplitChars.ToArray());
if (word.Length >= minLength)
{
if (!IsWordKnownOrNumber(word, line))
{
bool correct = word.Length > 1 && _hunspell.Spell(word);
var correct = word.Length > 1 && _hunspell.Spell(word);
if (!correct)
{
correct = word.Length > 2 && _hunspell.Spell(word.Trim('\''));
@ -2085,6 +2088,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
}
}
}
return wordsNotFound;
}
@ -2095,12 +2099,12 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
_hunspell.Dispose();
_hunspell = null;
}
if (_spellCheck != null)
{
_spellCheck.Dispose();
_spellCheck = null;
}
}
}
}