Minor fix for OCR unknown word (line split)

This commit is contained in:
niksedk 2016-08-03 13:31:33 +02:00
parent d5eeea8a4e
commit 1026af5cfc
2 changed files with 3 additions and 3 deletions

View File

@ -88,7 +88,7 @@ namespace Nikse.SubtitleEdit.Forms
{
if (word != null && richTextBoxParagraph.Text.Contains(word))
{
const string ExpectedWordBoundaryChars = " <>-\"”“[]'`´¶()♪¿¡.…—!?,:;/\r\n";
const string ExpectedWordBoundaryChars = " <>-\"”“«»[]'`´¶()♪¿¡.…—!?,:;/\r\n";
for (int i = 0; i < richTextBoxParagraph.Text.Length; i++)
{
if (richTextBoxParagraph.Text.Substring(i).StartsWith(word))

View File

@ -959,7 +959,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
}
}
string[] words = tempLine.Replace("</i>", string.Empty).Split((Environment.NewLine + " ¡¿,.!?:;()[]{}+-£\"”“#&%…—♪").ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
string[] words = tempLine.Replace("</i>", string.Empty).Split((Environment.NewLine + " ¡¿,.!?:;()[]{}+-£\"”“«»#&%…—♪").ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
for (int i = 0; i < words.Length; i++)
{
string word = words[i].TrimStart('\'');
@ -1382,7 +1382,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
return 0;
int wordsNotFound = 0;
var words = HtmlUtil.RemoveOpenCloseTags(line, HtmlUtil.TagItalic).Split((Environment.NewLine + " ¡¿,.!?:;()[]{}+-$£\"#&%…“”").ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
var words = HtmlUtil.RemoveOpenCloseTags(line, HtmlUtil.TagItalic).Split((Environment.NewLine + " ¡¿,.!?:;()[]{}+-$£\"#&%…“”«»").ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
for (int i = 0; i < words.Length; i++)
{
string word = words[i];