Minor fix for OCR

space after "-" or "'" for nOCR/BIC + update dictionaries
This commit is contained in:
Nikolaj Olsson 2020-06-17 18:25:31 +02:00
parent c45c6cc08a
commit 64589b90c3
3 changed files with 11 additions and 6 deletions

View File

@ -1322,6 +1322,7 @@
<Word from="let'sjust" to="let's just" />
<Word from="Let'sjust" to="Let's just" />
<Word from="Lf" to="If" />
<Word from="lf" to="If" />
<Word from="lfeelonelung" to="I feel one lung" />
<Word from="lfhe's" to="If he's" />
<Word from="lfl" to="if I" />

Binary file not shown.

View File

@ -1,8 +1,8 @@
using System;
using Nikse.SubtitleEdit.Forms.Ocr;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Nikse.SubtitleEdit.Forms.Ocr;
namespace Nikse.SubtitleEdit.Logic.Ocr
{
@ -57,7 +57,7 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
sbWord = new StringBuilder();
italicCount = 0;
}
else if (m.Text != " ")
else
{
sbWord.Append(m.Text);
if (m.Italic)
@ -81,7 +81,9 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
sb.Append("</i>");
}
var text = sb.ToString().Trim();
text = text.Replace("<i>-</i>", "-")
text = text
.Replace("<i>-</i>", "-")
.Replace("</i>-<i>", "-")
.Replace("<i>s</i>", "s")
.Replace("</i>s<i>", "s")
.Replace("<i>!</i>", "!")
@ -89,9 +91,11 @@ namespace Nikse.SubtitleEdit.Logic.Ocr
.Replace("<i>?</i>", "?")
.Replace("</i>?<i>", "?")
.Replace("<i>'</i>", "'")
.Replace("<i>''</i>", "'")
.Replace("</i>'<i>", "'")
.Replace("</i>''<i>", "'")
.Replace("<i>''</i>", "''")
.Replace("</i>''<i>", "''")
.Replace("<i>\"</i>", "\"")
.Replace("</i>\"<i>", "\"")
.Replace("<i>:</i>", ":")
.Replace("</i>:<i>", ":")
.Replace("<i>.</i>", ".")