Update hrv_OCRFixReplaceList.xml

Closes #1877
This commit is contained in:
Kruno H 2016-08-02 11:05:42 +02:00 committed by Waldi Ravens
parent a36657bc20
commit 7bc6953e1e

View File

@ -38,6 +38,7 @@
<Word from="bregu" to="brijegu" />
<Word from="ceo" to="cijeli" />
<Word from="Ceo" to="Cijeli" />
<Word from="cepa" to="cijepa" />
<Word from="čaršav" to="plahta" />
<Word from="čaršave" to="plahte" />
<Word from="čaršavima" to="plahtama" />
@ -152,10 +153,14 @@
<Word from="kijim" to="kojim" />
<Word from="Ko" to="Tko" />
<Word from="komandni" to="zapovjedni" />
<Word from="kombinuju" to="kombiniraju" />
<Word from="kompanija" to="tvrtka" />
<Word from="koa" to="kao" />
<Word from="koaj" to="koja" />
<Word from="Kontroliši" to="Kontroliraj" />
<Word from="kontroliši" to="kontroliraj" />
<Word from="korpu" to="košaru" />
<Word from="kritikuju" to="kritiziraju" />
<Word from="krstom" to="križem" />
<Word from="križom" to="križem" />
<Word from="kupatilo" to="kupaona" />
@ -410,6 +415,7 @@
<Word from="štagod" to="što god" />
<Word from="šta" to="što" />
<Word from="štp" to="što" />
<Word from="tablu" to="ploču" />
<Word from="takođe" to="također" />
<Word from="Takođe" to="Također" />
<Word from="tečnost" to="tekućina" />
@ -417,6 +423,9 @@
<Word from="točak" to="kotač" />
<Word from="Točak" to="Kotač" />
<Word from="trabam" to="trebam" />
<Word from="trkanje" to="utrkivanje" />
<Word from="trkanja" to="utrkivanja" />
<Word from="trkao" to="utrkivao" />
<Word from="trpeo" to="trpio" />
<Word from="tugi" to="tuzi" />
<Word from="tvrtci" to="tvrtki" />
@ -443,6 +452,7 @@
<Word from="u sred" to="usred" />
<Word from="usredsrede" to="usredotoče" />
<Word from="uticao" to="utjecao" />
<Word from="uticati" to="utjecati" />
<Word from="uveče" to="navečer" />
<Word from="uvijet" to="uvjet" />
<Word from="uvo" to="uho" />
@ -657,7 +667,7 @@
<RegEx find="ajsmiješnij" replaceWith="ajsmješnij" />
<RegEx find="([aA])kcion" replaceWith="$1kcijsk" />
<RegEx find="\b[aA]las([kc])" replaceWith="Aljas$1" />
<RegEx find="nijum" replaceWith="nij" />
<RegEx find="([dn])ijum" replaceWith="$1ij" />
<RegEx find="ngažov" replaceWith="ngažir" />
<RegEx find="armij" replaceWith="vojsk" />
<RegEx find="Armij" replaceWith="Vojsk" />
@ -689,7 +699,7 @@
<RegEx find="([bB])lj?ed([aeiouj])" replaceWith="$1lijed$2" />
<RegEx find="ogastv" replaceWith="ogatstv" />
<RegEx find="([bBVv])ole(n|l[aieo]|ti)\b" replaceWith="$1olje$2" />
<RegEx find="([bvmBVM])oleo" replaceWith="$1olio" />
<RegEx find="([dbvmBVM])oleo" replaceWith="$1olio" />
<RegEx find="([bB])ožij" replaceWith="$1ožj" />
<RegEx find="boži[čć]([aeiu]|em|ima)?\b" replaceWith="Božić$1" />
<RegEx find="(?&lt;!\A|[.!?][&quot;”’]?\s+)\bBoži[čć]n([aeiou]|om|im)\b" replaceWith="božićn$1" />
@ -829,7 +839,7 @@
<RegEx find="Hirur" replaceWith="Kirur" />
<RegEx find="hleb" replaceWith="kruh" />
<RegEx find="Hleb" replaceWith="Kruh" />
<RegEx find="([hH])oče(š|mo|te)?\b" replaceWith="$1oće$2" />
<RegEx find="\b([hH])oče" replaceWith="$1oće" />
<RegEx find="\bHolands" replaceWith="Nizozems" />
<RegEx find="\bNizuzem" replaceWith="Nizozem" />
<RegEx find="holesterol" replaceWith="kolesterol" />
@ -846,14 +856,14 @@
<RegEx find="([hH])tj?eo" replaceWith="$1tio" />
<RegEx find="([hH])te([lt])" replaceWith="$1tje$2" />
<RegEx find="orisa" replaceWith="orira" />
<RegEx find="\b([iI])mać([eu]|eš|emo|ete)\b" replaceWith="$1mat ć$2" />
<RegEx find="([iI])nformacij?on" replaceWith="$1nformacijsk" />
<RegEx find="\b([iI])mać" replaceWith="$1mat ć" />
<RegEx find="nformacij?on" replaceWith="nformacijsk" />
<RegEx find="nostranstv" replaceWith="nozemstv" />
<RegEx find="([iI])nsistirać" replaceWith="$1nzistirat ć" />
<RegEx find="([iI])nstikt" replaceWith="$1nstinkt" />
<RegEx find="nsistirać" replaceWith="nzistirat ć" />
<RegEx find="nstikt" replaceWith="nstinkt" />
<RegEx find="interesantn" replaceWith="zanimljiv" />
<RegEx find="Interesantn" replaceWith="Zanimljiv" />
<RegEx find="([iI])nteresova" replaceWith="$1nteresira" />
<RegEx find="teresova" replaceWith="teresira" />
<RegEx find="vjuis" replaceWith="vjuir" />
<RegEx find="vjuiše" replaceWith="vjuira" />
<RegEx find="vjuišu" replaceWith="vjuiraju" />
@ -863,7 +873,7 @@
<RegEx find="([iI])spovj?ed" replaceWith="$1spovijed" />
<RegEx find="([iI])zbe([gć])" replaceWith="$1zbje$2" />
<RegEx find="sčez" replaceWith="ščez" />
<RegEx find="([iI])spresj?ecan" replaceWith="$1spresijecan" />
<RegEx find="spresj?ecan" replaceWith="presijecan" />
<RegEx find="([iI])spri[čć]ać([eu])" replaceWith="$1spričat ć$2" />
<RegEx find="italijan" replaceWith="talijan" />
<RegEx find="Italijan" replaceWith="Talijan" />
@ -871,12 +881,12 @@
<RegEx find="([iI])znj?eć" replaceWith="$1znijet ć" />
<RegEx find="znj?el" replaceWith="znijel" />
<RegEx find="zolova" replaceWith="zolira" />
<RegEx find="([iI])zgladne" replaceWith="$1zgladnje" />
<RegEx find="zgladne" replaceWith="zgladnje" />
<RegEx find="([iI])zume([ltv])" replaceWith="$1zumje$2" />
<RegEx find="([iI])zneveri" replaceWith="$1nevjeri" />
<RegEx find="zneveri" replaceWith="nevjeri" />
<RegEx find="([iI])zvesn" replaceWith="$1zvjesn" />
<RegEx find="([iI])zvine([mš])" replaceWith="$1spriča$2" />
<RegEx find="([iI])zvinjava([mšojlt]) se" replaceWith="$1spričava$2 se" />
<RegEx find="zvinjava([mšojlt]) se" replaceWith="spričava$1 se" />
<RegEx find="([iI])zvin[iu]([lo])" replaceWith="$1spriča$2" />
<RegEx find="\b([jJ])agnje" replaceWith="$1anje" />
<RegEx find="jereti" replaceWith="hereti" />
@ -913,7 +923,6 @@
<!-- ne vadi iz RegEx -->
<RegEx find="komanduj" replaceWith="naređuj" />
<RegEx find="inuje" replaceWith="inira" />
<RegEx find="binuju" replaceWith="biniraju" />
<RegEx find="binova" replaceWith="binira" />
<!-- ne diraj!!! - mijenja drugačije (komentira) -->
<RegEx find="mentariše" replaceWith="mentira" />
@ -924,7 +933,7 @@
<!-- kompromitova -->
<RegEx find="omitova" replaceWith="omitira" />
<RegEx find="komšijsk" replaceWith="susjedn" />
<RegEx find="([kK])onfor" replaceWith="$1omfor" />
<RegEx find="onfor" replaceWith="omfor" />
<RegEx find="konkurs" replaceWith="natječaj" />
<RegEx find="Konkurs" replaceWith="Natječaj" />
<RegEx find="kuris" replaceWith="kurir" />
@ -935,7 +944,7 @@
<RegEx find="([kK])orj?en" replaceWith="$1orijen" />
<RegEx find="\b([kK])orp([aei])" replaceWith="$1ošar$2" />
<RegEx find="ritikuje" replaceWith="ritizira" />
<RegEx find="([kK])rofn" replaceWith="$1rafn" />
<RegEx find="rofn" replaceWith="rafn" />
<RegEx find="rompir" replaceWith="rumpir" />
<RegEx find="\b([kK])rst([au]|ića?)?\b" replaceWith="$1riž$2" />
<RegEx find="\b([kK])rstov(e|ima)\b" replaceWith="$1rižev$2" />
@ -964,6 +973,7 @@
<RegEx find="\bličn([aeiou]|im|o[mgj])" replaceWith="osobn$1" />
<RegEx find="\bLičn([aeiou]|im|o[mgj])" replaceWith="Osobn$1" />
<RegEx find="([lL])obanj" replaceWith="$1ubanj" />
<RegEx find="([lL])ović" replaceWith="$1ovit ć" />
<RegEx find="\b([lL])jep([aeiou]|om|oj|ima)\b" replaceWith="$1ijep$2" />
<RegEx find="([lL])uda(k|k[aeu]|kom|ci|čk[aeiou]|čkom|kinj[aeiou])\b" replaceWith="$1uđa$2" />
<RegEx find="([lL]u|[Pp]re|[sS]vi)deo" replaceWith="$1dio" />
@ -992,7 +1002,7 @@
<RegEx find="([mM])lj?ečn" replaceWith="$1liječn" />
<RegEx find="leven" replaceWith="ljeven" />
<RegEx find="([mM])oč([iun])" replaceWith="$1oć$2" />
<RegEx find="([mM])oguč" replaceWith="$1oguć" />
<RegEx find="oguč" replaceWith="oguć" />
<RegEx find="([mpsMPS])olić" replaceWith="$1olit ć" />
<RegEx find="([mM])orać([eu])" replaceWith="$1orat ć$2" />
<RegEx find="otivisa" replaceWith="otivira" />
@ -1005,10 +1015,8 @@
<RegEx find="muzičk" replaceWith="glazben" />
<RegEx find="Muzičk" replaceWith="Glazben" />
<RegEx find="ajcijenjen" replaceWith="ajcjenjen" />
<RegEx find="\b([nN])amer([aeiou]|om|n[ao]|no[mgj]|nim|a[mv]a|ava[mš]|avam?o|avate|avaju|aval[aeio])\b" replaceWith="$1amjer$2" />
<RegEx find="\b([nN])amj?en([aeiou])" replaceWith="$1amjen$2" />
<RegEx find="\b([nN])amj?eni([mštol])" replaceWith="$1amijeni$2" />
<RegEx find="([nN])amj?enjen" replaceWith="$1amijenjen" />
<RegEx find="([nN]a|[Ss])mešta" replaceWith="$1mješta" />
<RegEx find="([nN])ane([lt])" replaceWith="$1anje$2" />
<RegEx find="([nN])amesti" replaceWith="$1amjesti" />
@ -1028,7 +1036,6 @@
<RegEx find="(?&lt;!j)emačk" replaceWith="jemačk" />
<RegEx find="[nN]j?em(a?)c([aeiu])" replaceWith="Nijem$1c$2" />
<RegEx find="emo[zž]e" replaceWith="e može" />
<RegEx find="([nN])enamer(n[ao]|no[mgj]|nim)\b" replaceWith="$1enamjer$2" />
<RegEx find="eprijat([an])" replaceWith="eugod$1" />
<RegEx find="epobediv" replaceWith="epobjediv" />
<RegEx find="erj?ešen" replaceWith="eriješen" />
@ -1214,6 +1221,7 @@
<RegEx find="eligijoz" replaceWith="eligioz" />
<RegEx find="eskira" replaceWith="iskira" />
<RegEx find="\b([rR])i?j?ešava" replaceWith="$1ješava" />
<RegEx find="(?&lt;!k)amj?enjen" replaceWith="amijenjen" />
<RegEx find="rimjenjen" replaceWith="rimijenjen" />
<RegEx find="([rR])izik(ova|uje)" replaceWith="$1iskira" />
<RegEx find="([^d])rješit" replaceWith="$1riješit" />
@ -1292,7 +1300,7 @@
<RegEx find="([sS])re[cčć](a?)n" replaceWith="$1ret$2n" />
<RegEx find="\b([sS])rj?ed([au]|om|ama)\b" replaceWith="$1rijed$2" />
<RegEx find="\b([sS])ta[čć]([eu])" replaceWith="$1tat ć$2" />
<RegEx find="([sS])tavr" replaceWith="$1tvar" />
<RegEx find="tavr" replaceWith="tvar" />
<RegEx find="\b([sS])tj?en([aeu])" replaceWith="$1tijen$2" />
<RegEx find="stomak" replaceWith="trbuh" />
<RegEx find="Stomak" replaceWith="Trbuh" />
@ -1310,8 +1318,8 @@
<RegEx find="used" replaceWith="usjed" />
<RegEx find="\b([sS])uv([aeiou])\b" replaceWith="$1uh$2" />
<RegEx find="([sS])uvo([mgj])\b" replaceWith="$1uho$2" />
<RegEx find="suštin([eio])" replaceWith="biti" />
<RegEx find="Suštin([eio])" replaceWith="Biti" />
<RegEx find="suštin[eio]" replaceWith="biti" />
<RegEx find="Suštin[eio]" replaceWith="Biti" />
<RegEx find="vedo([čkc])" replaceWith="vjedo$1" />
<RegEx find="([sS])vesn" replaceWith="$1vjesn" />
<RegEx find="\b([sS])vjest\b" replaceWith="$1vijest" />
@ -1321,8 +1329,8 @@
<RegEx find="([sS])vež([aeiu]|[io]m|oj|in[aeiou]|inom)?\b" replaceWith="$1vjež$2" />
<RegEx find="\b([sS])vj?et([au]|om)?\b(?!\s+([A-ZČĐŠŽ]|vod|stvari?|čovj?ek|pism))" replaceWith="$1vijet$2" />
<RegEx find="([sS])vi?j?etsk" replaceWith="$1vjetsk" />
<RegEx find="šagarep" replaceWith="mrkv" />
<RegEx find="Šagarep" replaceWith="Mrkv" />
<RegEx find="šar?garep" replaceWith="mrkv" />
<RegEx find="Šar?garep" replaceWith="Mrkv" />
<RegEx find="([šŠ])ečer" replaceWith="$1ećer" />
<RegEx find="([šŠ])olj" replaceWith="$1alic" />
<RegEx find="\bŠpanij([aeou])" replaceWith="Španjolsk$1" />
@ -1331,8 +1339,8 @@
<RegEx find="Štab" replaceWith="Stožer" />
<RegEx find="štamparsk" replaceWith="tiskovn" />
<RegEx find="Štamparsk" replaceWith="Tiskovn" />
<RegEx find="takmičenj([aeiu])" replaceWith="natjecanj$1" />
<RegEx find="Takmičenj([aeiu])" replaceWith="Natjecanj$1" />
<RegEx find="takmičenj" replaceWith="natjecanj" />
<RegEx find="Takmičenj" replaceWith="Natjecanj" />
<RegEx find="\b([tT])ač(an|n[aeoiu]|no[mgj]|nima|nij[aeiu]|nij[ei]m|nijoj|k[aeiou]|kama)" replaceWith="$1oč$2" />
<RegEx find="\b([nN])etač([an])" replaceWith="$1etoč$2" />
<RegEx find="talas([ei])" replaceWith="valov$1" />
@ -1346,16 +1354,15 @@
<RegEx find="([^\Wi])terati\b" replaceWith="$1tjerati" />
<RegEx find="([tT])erać" replaceWith="$1jerat ć" />
<RegEx find="([tT])e?rba" replaceWith="$1reba" />
<RegEx find="tester([aeiou])" replaceWith="pil$1" />
<RegEx find="Tester([aeiou])" replaceWith="Pil$1" />
<RegEx find="tester" replaceWith="pil" />
<RegEx find="Tester" replaceWith="Pil" />
<RegEx find="\b([tT])j?esn([aeiou])" replaceWith="$1ijesn$2" />
<RegEx find="točkov([aei])" replaceWith="kotač$1" />
<RegEx find="\b([tT])okom(?!\s+ri?j?eke)" replaceWith="$1ijekom" />
<RegEx find="\b([tT])oleris" replaceWith="$1olerir" />
<RegEx find="([tT])oplot" replaceWith="$1oplin" />
<RegEx find="([tT])raći" replaceWith="$1rati" />
<RegEx find="([tT])rajać" replaceWith="$1rajat ć" />
<RegEx find="([tT])ražić" replaceWith="$1ražit ć" />
<RegEx find="([tT])ra(ja|ži)ć" replaceWith="$1ra$2t ć" />
<RegEx find="([Tt])rj?eza" replaceWith="$1rijeza" />
<RegEx find="\b([tT])rpe([lt])" replaceWith="$1rpje$2" />
<RegEx find="([Pp])retrpe([lt])" replaceWith="$1retrpje$2" />
@ -1468,9 +1475,9 @@
<RegEx find="([vV])rtel([aeio])" replaceWith="$1rtjel$2" />
<RegEx find="([zZ])ahtjeva([ojlmšt])" replaceWith="$1ahtijeva$2" />
<RegEx find="([zZ])ahtev([aeioun])?" replaceWith="$1ahtjev$2" />
<RegEx find="([zZ])amen([aeiou])" replaceWith="$1amjen$2" />
<RegEx find="([zZ])amen" replaceWith="$1amjen" />
<RegEx find="(?&lt;!k)amenj(uj|iv)" replaceWith="amjenj$1" />
<RegEx find="\b([zZ])amer([aei]|[ai]š|[ai]mo|[ai]te|il[aie]|io)\b" replaceWith="$1amjer$2" />
<RegEx find="\b([zZnN]a[mv])er" replaceWith="$1jer" />
<RegEx find="([zZ])amj?eni([šmo]|mo|t[mš]|l[aeio]|še)?\b" replaceWith="$1amijeni$2" />
<RegEx find="\b([zZ])ane([lt])" replaceWith="$1anje$2" />
<RegEx find="([zZ])ave([rs])([aeuo])" replaceWith="$1avje$2$3" />
@ -1494,7 +1501,7 @@
<RegEx find="žur[ck]" replaceWith="zabav" />
<RegEx find="Žur[ck]" replaceWith="Zabav" />
<RegEx find="([sSzZ])amnom" replaceWith="$1a mnom" />
<RegEx find="([sSnNPpzZ])(r?[aoi])z([vn])ać" replaceWith="$1$2z$3at ć" />
<RegEx find="([sSnNpPzZ]r?[aoi]z[vn])ać" replaceWith="$1at ć" />
<!-- mjeseci [\b mora biti nakon jun]-->
<RegEx find="januar([au]|ima)" replaceWith="siječnj$1" />
<RegEx find="([sS])ječnj([au]|ima)" replaceWith="$1iječnj$2" />
@ -1512,8 +1519,7 @@
<!-- experimental -->
<RegEx find="đž" replaceWith="dž" />
<RegEx find="(?&lt;![ml])a([blcrnz])ić" replaceWith="a$1it ć" />
<RegEx find="a(jm|mn)ić" replaceWith="a$1it ć" />
<RegEx find="a[mn]tić" replaceWith="amtit ć" />
<RegEx find="a(jm|mn)(t?)ić" replaceWith="a$1$2it ć" />
<RegEx find="alolet" replaceWith="aloljet" />
<RegEx find="apoved" replaceWith="apovjed" />
<RegEx find="apovj?est" replaceWith="apovijed" />
@ -1544,6 +1550,7 @@
<RegEx find="finisa" replaceWith="finira" />
<RegEx find="fi(sa|še)" replaceWith="fira" />
<RegEx find="frov" replaceWith="frir" />
<RegEx find="ick([eoiu])" replaceWith="ičk$1" />
<RegEx find="([ai])nić" replaceWith="$1nit ć" />
<RegEx find="(?&lt;!\b[Oo]zlo|\b[Ii]sp(rip)?ov|i)jeđen" replaceWith="ijeđen" />
<RegEx find="(?&lt;!hva)lisati" replaceWith="lirati" />