Update English OCR fix list (minor)

This commit is contained in:
Nikolaj Olsson 2020-04-24 12:21:48 +02:00
parent a3e42a4026
commit eb21f3af76

View File

@ -3184,6 +3184,7 @@
<Beginning from="I-i " to="I-I " /> <Beginning from="I-i " to="I-I " />
<Beginning from="H m, " to="Hm, " /> <Beginning from="H m, " to="Hm, " />
<Beginning from="Im sorry" to="I'm sorry" /> <Beginning from="Im sorry" to="I'm sorry" />
<Beginning from="lce " to="Ice " />
</BeginLines> </BeginLines>
<EndLines> <EndLines>
<Ending from=", sin" to=", sir." /> <Ending from=", sin" to=", sir." />
@ -3204,6 +3205,6 @@
<RegEx find="^_\.\.(\p{L})" replaceWith="...$1" /> <RegEx find="^_\.\.(\p{L})" replaceWith="...$1" />
<RegEx find=" l([!?\.])" replaceWith=" I$1" /> <RegEx find=" l([!?\.])" replaceWith=" I$1" />
<RegEx find="\b\|\b" replaceWith="I" /> <RegEx find="\b\|\b" replaceWith="I" />
<RegEx find="\b1 (know|will|almost|didn't|get|got|have|apologize|paid|like|think|would|hope|shall|chose|choose|won|am|was|don't|just|start|run|saw|said|believe|try|ever|need|certainly|can't|anticipated|did|can|rang|heard|gave|came|decided|should|took|wanted|read|thought|was|still|do|love|want|overstepped|accept|authorized|owe|understand|made|guess|bumped|wasn't|mean|admire|had|spent|told|see|walk|were|help|definitely|could|say|take|brought|assume|proposed|realized|loved|base|left|change|changed|rule|feel|date|dated|imagine|went|kind|couldn't|wouldn't|work|care|make|lost)+\b" replaceWith="I $1" /> <RegEx find="\b1 (know|will|almost|didn't|get|got|have|apologize|paid|like|think|would|hope|shall|chose|choose|won|am|was|don't|just|start|run|saw|said|believe|try|ever|need|certainly|can't|anticipated|did|can|rang|heard|gave|came|decided|should|took|wanted|read|thought|was|still|do|love|want|overstepped|accept|authorized|owe|understand|made|guess|bumped|wasn't|mean|admire|had|spent|told|see|walk|were|help|definitely|could|say|take|brought|assume|proposed|realized|loved|base|left|change|changed|rule|feel|date|dated|imagine|went|kind|couldn't|wouldn't|work|care|make|lost)+\b" replaceWith="I $1" />
</RegularExpressions> </RegularExpressions>
</OCRFixReplaceList> </OCRFixReplaceList>