From 1026af5cfc87cd121e016f8d59dc25ba7feb1177 Mon Sep 17 00:00:00 2001 From: niksedk Date: Wed, 3 Aug 2016 13:31:33 +0200 Subject: [PATCH] Minor fix for OCR unknown word (line split) --- src/Forms/OCRSpellCheck.cs | 2 +- src/Logic/OCR/OcrFixEngine.cs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Forms/OCRSpellCheck.cs b/src/Forms/OCRSpellCheck.cs index 3df63d48a..119a7df0d 100644 --- a/src/Forms/OCRSpellCheck.cs +++ b/src/Forms/OCRSpellCheck.cs @@ -88,7 +88,7 @@ namespace Nikse.SubtitleEdit.Forms { if (word != null && richTextBoxParagraph.Text.Contains(word)) { - const string ExpectedWordBoundaryChars = " <>-\"”“[]'‘`´¶()♪¿¡.…—!?,:;/\r\n"; + const string ExpectedWordBoundaryChars = " <>-\"”“«»[]'‘`´¶()♪¿¡.…—!?,:;/\r\n"; for (int i = 0; i < richTextBoxParagraph.Text.Length; i++) { if (richTextBoxParagraph.Text.Substring(i).StartsWith(word)) diff --git a/src/Logic/OCR/OcrFixEngine.cs b/src/Logic/OCR/OcrFixEngine.cs index 0292b1462..0c40307a8 100644 --- a/src/Logic/OCR/OcrFixEngine.cs +++ b/src/Logic/OCR/OcrFixEngine.cs @@ -959,7 +959,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr } } - string[] words = tempLine.Replace("", string.Empty).Split((Environment.NewLine + " ¡¿,.!?:;()[]{}+-£\"”“#&%…—♪").ToCharArray(), StringSplitOptions.RemoveEmptyEntries); + string[] words = tempLine.Replace("", string.Empty).Split((Environment.NewLine + " ¡¿,.!?:;()[]{}+-£\"”“«»#&%…—♪").ToCharArray(), StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < words.Length; i++) { string word = words[i].TrimStart('\''); @@ -1382,7 +1382,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr return 0; int wordsNotFound = 0; - var words = HtmlUtil.RemoveOpenCloseTags(line, HtmlUtil.TagItalic).Split((Environment.NewLine + " ¡¿,.!?:;()[]{}+-$£\"#&%…“”").ToCharArray(), StringSplitOptions.RemoveEmptyEntries); + var words = HtmlUtil.RemoveOpenCloseTags(line, HtmlUtil.TagItalic).Split((Environment.NewLine + " ¡¿,.!?:;()[]{}+-$£\"#&%…“”«»").ToCharArray(), StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < words.Length; i++) { string word = words[i];