Merge pull request #2714 from diomed/patch-1

Update srp_OCRFixReplaceList.xml
This commit is contained in:
Nikolaj Olsson 2018-01-22 07:37:34 +01:00 committed by GitHub
commit a192907d24
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -13,16 +13,32 @@
<Word from="Hey" to="Hej" />
<Word from="hey" to="hej" />
<Word from="htjeo" to="htio" />
<Word from="Hočeš" to="Hoćeš" />
<Word from="hočeš" to="hoćeš" />
<Word from="iči" to="ići" />
<Word from="jel" to="je l'" />
<Word from="Jel" to="Je l'" />
<Word from="nedaj" to="ne daj" />
<Word from="Nedaj" to="Ne daj" />
<Word from="nemogu" to="ne mogu" />
<Word from="Nemogu" to="Ne mogu" />
<Word from="nemora" to="ne mora" />
<Word from="Nemora" to="Ne mora" />
<Word from="nemoraš" to="ne moraš" />
<Word from="Nemoraš" to="Ne moraš" />
<Word from="predamnom" to="preda mnom" />
<Word from="Predamnom" to="Preda mnom" />
<Word from="Rješit" to="Riješit" />
<Word from="samnom" to="sa mnom" />
<Word from="Samnom" to="Sa mnom" />
<Word from="smjeo" to="smio" />
<Word from="uopče" to="uopće" />
<Word from="Uopče" to="Uopće" />
<Word from="umijesto" to="umjesto" />
<Word from="Umijesto" to="Umjesto" />
<Word from="uvjek" to="uvijek" />
<Word from="Uvjek" to="Uvijek" />
<Word from="valda" to="valjda" />
<Word from="zamnom" to="za mnom" />
<Word from="Zamnom" to="Za mnom" />
<Word from="želila" to="željela" />
</WholeWords>
<PartialWordsAlways />
@ -65,11 +81,7 @@
<LinePart from="jeli ti " to="je li ti" />
<LinePart from="Jeli to " to="Je li to" />
<LinePart from="Nebrini" to="Ne brini" />
<LinePart from="nedaj" to="ne daj" />
<LinePart from="ne ću" to="neću" />
<LinePart from="Nemogu" to="Ne mogu" />
<LinePart from="ne mogu" to="ne mogu" />
<LinePart from="Nemoraš" to="Ne moraš" />
<LinePart from="od kako" to="otkako" />
<LinePart from="Si dobro" to="Jesi li dobro" />
<LinePart from="Svo vreme" to="Sve vrijeme" />
@ -81,6 +93,16 @@
<EndLines />
<WholeLines />
<RegularExpressions>
<RegEx find="ÄŤ" replaceWith="č" />
<RegEx find="ć" replaceWith="ć" />
<RegEx find="Ä‘" replaceWith="đ" />
<RegEx find="Ĺľ" replaceWith="ž" />
<RegEx find="š" replaceWith="š" />
<RegEx find="ÄŚ" replaceWith="Č" />
<RegEx find="Ć" replaceWith="Ć" />
<RegEx find="Ä" replaceWith="Đ" />
<RegEx find="Ĺ " replaceWith="Š" />
<RegEx find="Ĺ˝" replaceWith="Ž" />
<RegEx find="đž" replaceWith="dž" />
<RegEx find="ajsmiješnij" replaceWith="ajsmješnij" />
<RegEx find="boži[čć]([aeiu]|em|ima)?\b" replaceWith="Božić$1" />
@ -90,42 +112,37 @@
<RegEx find="([gG])-đo +(?=[A-ZČĐŠŽ])" replaceWith="$1gđo " />
<RegEx find="gdina +(?=[A-ZČĐŠŽ])" replaceWith="g. " />
<RegEx find=" gosp +" replaceWith=" g. " />
<RegEx find="([hH])oč" replaceWith="$1oć" />
<RegEx find="Jel si sigur" replaceWith="Jesi li sigur" />
<RegEx find="Jel' si sigur" replaceWith="Jesi li sigur" />
<RegEx find="\b([jJ])el\?" replaceWith="$1e l'?" />
<RegEx find="\bJel'" replaceWith="Je l'" />
<RegEx find="([kK]alib(?:ar|r[aeui]))\. *([0-9])" replaceWith="$1 .$2" />
<RegEx find="([mM])jenjati" replaceWith="$1ijenjati" />
<RegEx find="([mM])oguč" replaceWith="$1oguć" />
<RegEx find="([mM])jenja(?!č)" replaceWith="$1ijenja" />
<RegEx find="oguč" replaceWith="oguć" />
<RegEx find="\b([nN])ebih?" replaceWith="$1e bi" />
<RegEx find="\b([nN])eč([ue]š?|emo|ete)\b" replaceWith="$1eć$2" />
<RegEx find="\b([nN])emože(mo|š|te)?\b" replaceWith="$1e može$2" />
<RegEx find="emo[zž]e" replaceWith="e može" />
<RegEx find="\b([nN])ezna([šm]o?|t[ei]|ju|jući|vši)?\b" replaceWith="$1e zna$2" />
<RegEx find="najcijenjen" replaceWith="najcjenjen" />
<RegEx find="N[jJ]u Jork" replaceWith="Njujork" />
<RegEx find="([oO])d([kp])" replaceWith="$1t$2" />
<RegEx find="([oO])ružij([aeu])" replaceWith="$1ružj$2" />
<RegEx find="ružij" replaceWith="ružj" />
<RegEx find="([oO])sječa" replaceWith="$1sjeća" />
<RegEx find="([pPdD])onje([lt])" replaceWith="$1onije$2" />
<RegEx find="([pP])objedi([mšto])" replaceWith="$1obijedi$2" />
<RegEx find="redamnom" replaceWith="reda mnom" />
<RegEx find="redpostav" replaceWith="retpostav" />
<RegEx find="([pP])rimjeti" replaceWith="$1rimijeti" />
<RegEx find="ed([ph])" replaceWith="et$1" />
<RegEx find="rimjeti" replaceWith="rimijeti" />
<RegEx find="([pP])romjeni([mštol])" replaceWith="$1romijeni$2" />
<RegEx find="([rR])azumijeć" replaceWith="$1azumjeć" />
<RegEx find="azumijeć" replaceWith="azumjeć" />
<RegEx find="rascjepljen" replaceWith="rascijepljen" />
<RegEx find="redhodn" replaceWith="rethodn" />
<RegEx find="rimjenjen" replaceWith="rimijenjen" />
<RegEx find="([^d])rješit" replaceWith="$1riješit" />
<RegEx find="([sSzZ])amnom" replaceWith="$1a mnom" />
<RegEx find="([sS])lijede[čć]([aeiu]|e[mg])" replaceWith="$1ljedeć$2" />
<RegEx find="([sS])mješno" replaceWith="$1miješno" />
<RegEx find="([uU])mijesto" replaceWith="$1mjesto" />
<RegEx find="([uU])spijeh" replaceWith="$1spjeh" />
<RegEx find="spijeh" replaceWith="spjeh" />
<RegEx find="([uU])spiješ(an|n[aeiou]|no[mgj])" replaceWith="$1spješ$2" />
<RegEx find="([uU])vjek" replaceWith="$1vijek" />
<RegEx find="\b([vV])eč([aeiou])" replaceWith="$1eć$2" />
<RegEx find="([zZ])ahtijeva" replaceWith="$1ahtjeva" />
<RegEx find="\b([vV])eč([aiu]|[ei][mg]|ih|ima|in[iu]|uom|o[mj])?\b" replaceWith="$1eć$2" />
<RegEx find="([zZ])ahtjeva([ojlmšt])" replaceWith="$1ahtijeva$2" />
<RegEx find="([ks]ao)\.:" replaceWith="$1:" />
<RegEx find="(?&lt;=[a-zčđšž])Ij(?=[a-zčđšž])" replaceWith="lj" />
@ -231,4 +248,4 @@
<!-- <RegEx find="^\.{3}([a-zčđšž&quot;&lt;])" replaceWith="$1" /> -->
<!-- <RegEx find=" +([.?!])" replaceWith="$1" /> -->
</RegularExpressions>
</OCRFixReplaceList>
</OCRFixReplaceList>