mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2025-01-31 21:11:39 +01:00
Updated OcrFixReplaceList.cs
Added other common Latin ligatures present in Unicode. Also added the acute accent, which I've often seen used instead of the apostrophe, either as an OCR error or because people mistake it for the curly apostrophe. Closes #1961
This commit is contained in:
parent
8aba3f8f17
commit
74c5c0a29e
@ -281,10 +281,18 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
|
|||||||
{
|
{
|
||||||
if (Configuration.Settings.Tools.OcrFixUseHardcodedRules)
|
if (Configuration.Settings.Tools.OcrFixUseHardcodedRules)
|
||||||
{
|
{
|
||||||
|
// common Latin ligatures from legacy encodings;
|
||||||
|
// Unicode includes them only for compatibility and discourages their use
|
||||||
|
word = word.Replace("ff", "ff");
|
||||||
word = word.Replace("fi", "fi");
|
word = word.Replace("fi", "fi");
|
||||||
|
word = word.Replace("fl", "fl");
|
||||||
|
word = word.Replace("ffi", "ffi");
|
||||||
|
word = word.Replace("ffl", "ffl");
|
||||||
|
|
||||||
word = word.Replace('ν', 'v'); // first 'v' is U+03BD GREEK SMALL LETTER NU
|
word = word.Replace('ν', 'v'); // first 'v' is U+03BD GREEK SMALL LETTER NU
|
||||||
word = word.Replace('’', '\'');
|
word = word.Replace('’', '\'');
|
||||||
word = word.Replace('`', '\'');
|
word = word.Replace('`', '\'');
|
||||||
|
word = word.Replace('´', '\'');
|
||||||
word = word.Replace('‘', '\'');
|
word = word.Replace('‘', '\'');
|
||||||
word = word.Replace('—', '-');
|
word = word.Replace('—', '-');
|
||||||
while(word.Contains("--"))
|
while(word.Contains("--"))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user