Add a few extra words to the Macedonian word split list

This commit is contained in:
niksedk 2022-01-13 12:05:12 +01:00
parent c61afd21b4
commit 9563d6bbff
2 changed files with 5722 additions and 3943 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1339,16 +1339,16 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
} }
} }
foreach (string name in _nameMultiWordListAndWordsWithPeriods) foreach (var name in _nameMultiWordListAndWordsWithPeriods)
{ {
int start = tempLine.FastIndexOf(name); var start = tempLine.FastIndexOf(name);
if (start < 0 && hasAllUpperWord) if (start < 0 && hasAllUpperWord)
{ {
start = tempLine.FastIndexOf(name.ToUpperInvariant()); start = tempLine.FastIndexOf(name.ToUpperInvariant());
} }
if (start == 0 || (start > 0 && p.Contains(tempLine[start - 1]))) if (start == 0 || (start > 0 && p.Contains(tempLine[start - 1])))
{ {
int end = start + name.Length; var end = start + name.Length;
if (end == tempLine.Length || p.Contains(tempLine[end])) if (end == tempLine.Length || p.Contains(tempLine[end]))
{ {
tempLine = tempLine.Remove(start, name.Length); tempLine = tempLine.Remove(start, name.Length);
@ -1356,7 +1356,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
} }
} }
int minLength = 2; var minLength = 2;
if (Configuration.Settings.Tools.CheckOneLetterWords) if (Configuration.Settings.Tools.CheckOneLetterWords)
{ {
minLength = 1; minLength = 1;
@ -1373,10 +1373,10 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
words.Add(w.Trim(trimChars)); words.Add(w.Trim(trimChars));
} }
for (int i = 0; i < words.Count && i < 1000; i++) for (var i = 0; i < words.Count && i < 1000; i++)
{ {
string word = words[i].TrimStart('\''); var word = words[i].TrimStart('\'');
string wordNotEndTrimmed = word; var wordNotEndTrimmed = word;
word = word.TrimEnd('\''); word = word.TrimEnd('\'');
if (!IsWordKnownOrNumber(word, line) && !localIgnoreWords.Contains(word)) if (!IsWordKnownOrNumber(word, line) && !localIgnoreWords.Contains(word))
{ {
@ -1458,7 +1458,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
if (!correct) if (!correct)
{ {
//look for match via dash'ed word, e.g. sci-fi //look for match via dash'ed word, e.g. sci-fi
string dashedWord = GetDashedWordBefore(word, line, words, i); var dashedWord = GetDashedWordBefore(word, line, words, i);
if (!string.IsNullOrEmpty(dashedWord)) if (!string.IsNullOrEmpty(dashedWord))
{ {
correct = IsWordKnownOrNumber(dashedWord, line); correct = IsWordKnownOrNumber(dashedWord, line);
@ -1522,7 +1522,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
wordsNotFound++; wordsNotFound++;
if (log) if (log)
{ {
string nf = word; var nf = word;
if (nf.StartsWith("<i>", StringComparison.Ordinal)) if (nf.StartsWith("<i>", StringComparison.Ordinal))
{ {
nf = nf.Remove(0, 3); nf = nf.Remove(0, 3);