Update bih_OCRFixReplaceList.xml

This commit is contained in:
May Kittens Devour Your Soul 2018-02-03 17:49:57 +01:00 committed by GitHub
parent 64a3c85b5e
commit b4c2933e81
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -25,9 +25,6 @@
<Word from="broncane" to="brončane" />
<Word from="broncanu" to="brončanu" />
<Word from="budes" to="budeš" />
<Word from="buducnost" to="budućnost" />
<Word from="buducnosti" to="budućnosti" />
<Word from="buducnoscu" to="budućnošću" />
<Word from="Cak" to="Čak" />
<Word from="cak" to="čak" />
<Word from="Cao" to="Ćao" />
@ -103,7 +100,6 @@
<Word from="covjecnost" to="čovječnost" />
<Word from="necovjecnost" to="nečovječnost" />
<Word from="necovjecnosti" to="nečovječnosti" />
<Word from="necovjecnoscu" to="nečovječnošću" />
<Word from="cerka" to="kći" />
<Word from="Cerka" to="Kći" />
<Word from="cerke" to="kćeri" />
@ -141,11 +137,10 @@
<Word from="cuvali" to="čuvali" />
<Word from="cuveni" to="čuveni" />
<!--d-->
<Word from="dace" to="dat će" />
<Word from="davo" to="vrag" />
<Word from="definise" to="definira" />
<Word from="definisi" to="definiraj" />
<Word from="delimicno" to="djelomično" />
<Word from="delimicni" to="djelomični" />
<Word from="Desava" to="Događa" />
<Word from="desava" to="događa" />
<Word from="disi" to="diši" />
@ -218,6 +213,7 @@
<Word from="Hoces" to="Hoćeš" />
<Word from="hoces" to="hoćeš" />
<Word from="hodas" to="hodaš" />
<!--i-->
<Word from="Ici" to="Ići" />
<Word from="ici" to="ići" />
<Word from="iceg" to="ičeg" />
@ -230,6 +226,8 @@
<Word from="iduce" to="iduće" />
<Word from="iducem" to="idućem" />
<Word from="iduceg" to="idućeg" />
<Word from="inace" to="inače" />
<Word from="Inace" to="Inače" />
<Word from="isao" to="išao" />
<Word from="isla" to="išla" />
<Word from="istjece" to="istječe" />
@ -462,8 +460,6 @@
<Word from="odlucim" to="odlučim" />
<Word from="okrece" to="okreće" />
<Word from="ogrtac" to="ogrtač" />
<Word from="opcih" to="općih" />
<Word from="opcim" to="općim" />
<Word from="ostra" to="oštra" />
<Word from="ostre" to="oštre" />
<Word from="ostri" to="oštri" />
@ -473,7 +469,6 @@
<Word from="otidi" to="otiđi" />
<Word from="otidji" to="otiđi" />
<Word from="otisao" to="otišao" />
<Word from="otputujes" to="otputuješ" />
<Word from="oziljak" to="ožiljak" />
<!--p-->
<Word from="palaca" to="palača" />
@ -773,6 +768,9 @@
<Word from="trajace" to="trajat će" />
<Word from="Trajace" to="Trajat će" />
<Word from="trci" to="trči" />
<Word from="trci" to="trči" />
<Word from="trce" to="trče" />
<Word from="Trce" to="Trče" />
<Word from="trcim" to="trčim" />
<Word from="trcimo" to="trčimo" />
<Word from="treci" to="treći" />
@ -858,7 +856,6 @@
<Word from="viden" to="viđen" />
<Word from="Viden" to="Viđen" />
<Word from="vishe" to="više" />
<Word from="vjerujes" to="vjeruješ" />
<Word from="vjencati" to="vjenčati" />
<Word from="vjencali" to="vjenčali" />
<Word from="voce" to="voće" />
@ -975,7 +972,7 @@
<RegEx find="([bB])oric" replaceWith="$1orit ć" />
<RegEx find="([bB])ozij([aeiu]|om|ima)?\b" replaceWith="$1ožj$2" />
<RegEx find="[bB]o[zž]ic([aeiun]|em|ima)?\b" replaceWith="Božić$1" />
<RegEx find="([bB])udu[cč]" replaceWith="$1uduć" />
<RegEx find="udu[cč]" replaceWith="uduć" />
<RegEx find="([cCsS])vj?ec([aeiou]|[oe]m|ama)\b" replaceWith="$1vijeć$2" />
<RegEx find="[Cc]acka" replaceWith="čačka" />
<RegEx find="[cC]amac" replaceWith="čamac" />
@ -1019,6 +1016,7 @@
<RegEx find="([Dd])eck" replaceWith="$1ečk" />
<RegEx find="([dD])j?evojcic" replaceWith="$1jevojčic" />
<RegEx find="\b([dD])elic" replaceWith="$1jelić" />
<RegEx find="\b([dD])elimc" replaceWith="$1jelić" />
<RegEx find="\b([dD])j?elis\b" replaceWith="$1ijeliš" />
<RegEx find="\b([dD])ecic([aeiou]|om)\b" replaceWith="$1ječic$2" />
<RegEx find="([dD])j?eca([kc])" replaceWith="$1ječa$2" />
@ -1032,29 +1030,28 @@
<RegEx find="drzim" replaceWith="držim" />
<RegEx find="([dD])o[bc]ic([eu])" replaceWith="$1oći ć$2" />
<RegEx find="\b([dDpP])o(d?)nj?ec([eu])" replaceWith="$1o$2nijet ć$3" />
<RegEx find="([dDpP])ovesc([eu])" replaceWith="$1ovest ć$2" />
<RegEx find="ovesc([eu])" replaceWith="ovest ć$1" />
<RegEx find="\bd[jz]ep" replaceWith="džep" />
<RegEx find="\bD[jz]ep" replaceWith="Džep" />
<RegEx find="([dD])osao" replaceWith="$1ošao" />
<!-- posao i pošao je drugačije pa ne može biti uopće u skripti -->
<RegEx find="([dD])osl([aio])\b" replaceWith="$1ošl$2" />
<RegEx find="([dD])rza([volt])" replaceWith="$1rža$2" />
<RegEx find="([dD])rza(?!k)" replaceWith="$1rža" />
<RegEx find="([dD])us([aeiou])" replaceWith="$1uš$2" />
<RegEx find="dzigeric" replaceWith="jetr" />
<RegEx find="Dzigeric" replaceWith="Jetr" />
<RegEx find="([dD])zinovsk" replaceWith="$1ivovsk" />
<RegEx find="([dD])zinov" replaceWith="$1ivov" />
<RegEx find="gipcan" replaceWith="gipćan" />
<RegEx find="entise" replaceWith="entira" />
<RegEx find="onise" replaceWith="onira" />
<RegEx find="([gG])dj(?!e)" replaceWith="$1đ" />
<RegEx find="([gG])ledac" replaceWith="$1ledat ć" />
<RegEx find="([gG])oruc" replaceWith="$1oruć" />
<RegEx find="\b([gG])radj?([ae])n" replaceWith="$1rađ$2n" />
<RegEx find="(?&lt;![pP])radj" replaceWith="rađ" />
<RegEx find="\b([gG])rijesn" replaceWith="$1rješn" />
<RegEx find="([gG])rj?esi([smotl])" replaceWith="$1riješi$2" />
<RegEx find="([gG])reska" replaceWith="$1reška" />
<RegEx find="([hH])oce(š|mo|te)?\b" replaceWith="$1oće$2" />
<RegEx find="([hH])o[cć]es" replaceWith="$1oćeš" />
<RegEx find="([hH])o[cč]e([sš]|mo|te)?\b" replaceWith="$1oće$2" />
<RegEx find="hri?scan" replaceWith="kršćan" />
<RegEx find="Hri?scan" replaceWith="Kršćan" />
<RegEx find="hronicn" replaceWith="kroničn" />
@ -1064,7 +1061,6 @@
<RegEx find="gnori[sš]e[sš]" replaceWith="gnoriraš" />
<RegEx find="\b([iI])mac([eu]|es|emo|ete)\b" replaceWith="$1mat ć$2" />
<RegEx find="\b([iI])mas" replaceWith="$1maš" />
<RegEx find="([iI])nace" replaceWith="$1nače" />
<RegEx find="nsistirac" replaceWith="nzistirat ć" />
<RegEx find="([iI])zadj([ei])" replaceWith="$1zađ$2" />
<RegEx find="zbec" replaceWith="zbjeć" />
@ -1120,7 +1116,7 @@
<RegEx find="([mM])j?esa([jmns]|n[aio]|no[mgj]|nima?|mo|ju|njem|nju|l[aeio]|t[ei])\b" replaceWith="$1iješa$2" />
<RegEx find="([mM])lj?ecn" replaceWith="$1liječn" />
<RegEx find="\b([mMNn])oc([iuna])" replaceWith="$1oć$2" />
<RegEx find="([mM])oguc" replaceWith="$1oguć" />
<RegEx find="oguc" replaceWith="oguć" />
<RegEx find="([mM])oras" replaceWith="$1oraš" />
<RegEx find="([mM])orac([eu])" replaceWith="$1orat ć$2" />
<RegEx find="otivise([mst])" replaceWith="otivira$1" />
@ -1128,17 +1124,17 @@
<RegEx find="([mMbB])oze" replaceWith="$1ože" />
<RegEx find="([mM])o[zž]es" replaceWith="$1ožeš" />
<RegEx find="([mM])racn" replaceWith="$1račn" />
<RegEx find="([mM])rznj([aeiuo])" replaceWith="$1ržnj$2" />
<RegEx find="([mM])rzec([eu])" replaceWith="$1rzit ć$2" />
<RegEx find="([mM])rznj" replaceWith="$1ržnj" />
<RegEx find="rzec" replaceWith="rzit ć" />
<RegEx find="([mM])uci([mstol])" replaceWith="$1uči$2" />
<RegEx find="([mM])uskar" replaceWith="$1uškar" />
<RegEx find="uskar" replaceWith="uškar" />
<RegEx find="uvas" replaceWith="otaš" />
<RegEx find="muzick" replaceWith="glazben" />
<RegEx find="Muzick" replaceWith="Glazben" />
<RegEx find="([nNZz])acel" replaceWith="$1ačel" />
<RegEx find="([nN])acin" replaceWith="$1ačin" />
<RegEx find="(?&lt;![iI])([nN])acic" replaceWith="$1aći ć" />
<RegEx find="([nN])adji" replaceWith="$1ađi" />
<RegEx find="adji" replaceWith="ađi" />
<RegEx find="aocit" replaceWith="aočit" />
<RegEx find="avas" replaceWith="avaš" />
<RegEx find="amj?enis" replaceWith="amijeniš" />
@ -1178,17 +1174,18 @@
<RegEx find="\b([oO])cev" replaceWith="$1čev" />
<RegEx find="([oO])cek" replaceWith="$1ček" />
<RegEx find="\b([oO])ci" replaceWith="$1či" />
<RegEx find="([oO])dbacen" replaceWith="$1dbačen" />
<RegEx find="dbacen" replaceWith="dbačen" />
<RegEx find="([oO])dl([iu])can" replaceWith="$1dl$2can" />
<RegEx find="\b([oO])dj?ec([aeiou]|om)" replaceWith="$1djeć$2" />
<RegEx find="\b((?:[oO]|[pP]o|[rR]a|[sS]a)d?)sec" replaceWith="$1sjeć" />
<RegEx find="([oO])dvesc([eu])" replaceWith="$1dvest ć$2" />
<RegEx find="ogadj?a" replaceWith="ogađa" />
<RegEx find="dvesc([eu])" replaceWith="dvest ć$1" />
<RegEx find="gadj?a" replaceWith="gađa" />
<RegEx find="onizav" replaceWith="onižav" />
<RegEx find="\b([oO])pci(?!j)" replaceWith="$1pći" />
<RegEx find="rosti[cć]" replaceWith="rostit ć" />
<RegEx find="ruzj" replaceWith="ružj" />
<RegEx find="([oO])sj?e[cč]a" replaceWith="$1sjeća" />
<RegEx find="([oO])slobodić([eu])" replaceWith="$1slobodit ć$2" />
<RegEx find="slobodić([eu])" replaceWith="slobodit ć$1" />
<RegEx find="([oO])sta[čć]([eu])" replaceWith="$1stat ć$2" />
<RegEx find="([oO])svez" replaceWith="$1svjež" />
<RegEx find="([oO])tkri[cč]" replaceWith="$1tkrić" />
@ -1227,7 +1224,7 @@
<RegEx find="([ao])vrj?edis" replaceWith="$1vrijediš" />
<RegEx find="pozoris([tn](?:[aeu]|[eo]m|ima?)?)\b" replaceWith="kazališ$1" />
<RegEx find="Pozoris([tn](?:[aeu]|[eo]m|ima?)?)\b" replaceWith="Kazališ$1" />
<RegEx find="([pP])ozuri" replaceWith="$1ožuri" />
<RegEx find="ozuri" replaceWith="ožuri" />
<RegEx find="redjas" replaceWith="rijaš" />
<RegEx find="redlaz" replaceWith="redlaž" />
<RegEx find="(?&lt;![Šš])([pP])rica" replaceWith="$1riča" />
@ -1330,7 +1327,7 @@
<RegEx find="([sS])tize" replaceWith="$1tiže" />
<RegEx find="([sS])uoci" replaceWith="$1uoči" />
<RegEx find="\b([sS])uti" replaceWith="$1uti" />
<RegEx find="resut" replaceWith="rešut" />
<RegEx find="resu([ct])" replaceWith="rešu$1" />
<RegEx find="([RVrvg])odja" replaceWith="$1ođa" />
<RegEx find="([sS])veći" replaceWith="$1vijeći" />
<RegEx find="takmicenj" replaceWith="natjecanj" />
@ -1340,7 +1337,6 @@
<RegEx find="\b([tT])ice" replaceWith="$1iče" />
<RegEx find="\b([tTSs])rcan" replaceWith="$1rčan" />
<RegEx find="\b([tT])re[čc]in" replaceWith="$1rećin" />
<RegEx find="\b([tT])rce" replaceWith="$1rče" />
<RegEx find="\b([tTDd])rzis" replaceWith="$1ržiš" />
<RegEx find="\b([tT])isin" replaceWith="$1išin" />
<RegEx find="\b([tT])is([aeiu]|om|e[mg])\b" replaceWith="$1iš$2" />
@ -1409,8 +1405,9 @@
<RegEx find="[zZ]ivo([mgjt])" replaceWith="živo$1" />
<RegEx find="[zZ]lj?ezd([aeiou])" replaceWith="žlijezd$1" />
<RegEx find="\b([zZ])locin" replaceWith="$1ločin" />
<RegEx find="([Zz])naci" replaceWith="$1nači" />
<RegEx find="znajes" replaceWith="znaješ" />
<RegEx find="dajes" replaceWith="daješ" />
<RegEx find="([Zz])r?tv" replaceWith="žrtv" />
<RegEx find="zur[ck]" replaceWith="zabav" />
<RegEx find="Zur[ck]" replaceWith="Zabav" />
<RegEx find="zvanicn" replaceWith="služben" />
@ -1420,27 +1417,26 @@
<!-- yxy experimental -->
<RegEx find="bracun" replaceWith="bračun" />
<RegEx find="cupa" replaceWith="čupa" />
<RegEx find="zluduj" replaceWith="zluđuj" />
<RegEx find="zopacen" replaceWith="zopačen" />
<RegEx find="dajes" replaceWith="daješ" />
<RegEx find="rosj?ecn" replaceWith="rosječn" />
<RegEx find="([Rr])oden" replaceWith="$1ođen" />
<RegEx find="jacanj" replaceWith="jačanj" />
<RegEx find="zvecark" replaceWith="zvečark" />
<RegEx find="locest" replaceWith="ločest" />
<RegEx find="ticn" replaceWith="tičn" />
<RegEx find="icn" replaceWith="ičn" />
<RegEx find="zidem" replaceWith="ziđem" />
<RegEx find="zludu" replaceWith="zluđu" />
<RegEx find="zopacen" replaceWith="zopačen" />
<RegEx find="eskuc" replaceWith="eskuć" />
<RegEx find="([aez])vrsi" replaceWith="$1vrši" />
<RegEx find="acma" replaceWith="ačma" />
<RegEx find="aces" replaceWith="aćeš" />
<RegEx find="agicn" replaceWith="agičn" />
<RegEx find="ajvec" replaceWith="ajveć" />
<RegEx find="amcen" replaceWith="amčen" />
<RegEx find="antic([nk])" replaceWith="antič$1" />
<RegEx find="aredu?j" replaceWith="aređ" />
<RegEx find="arezlj" replaceWith="arežlj" />
<RegEx find="aruci" replaceWith="aruči" />
<RegEx find="asticn" replaceWith="astičn" />
<RegEx find="avlac" replaceWith="avlač" />
<RegEx find="azoc" replaceWith="azoč" />
<RegEx find="bacen" replaceWith="bačen" />
@ -1451,11 +1447,10 @@
<RegEx find="cemo" replaceWith="ćemo" />
<RegEx find="ucestvuj" replaceWith="sudjeluj" />
<RegEx find="cinje" replaceWith="činje" />
<RegEx find="[Cc]injenicn" replaceWith="činjeničn" />
<RegEx find="[Cc]injen" replaceWith="činjen" />
<RegEx find="cisc" replaceWith="čišć" />
<RegEx find="Cisc" replaceWith="Čišć" />
<RegEx find="dacn" replaceWith="dačn" />
<RegEx find="dace" replaceWith="dat će" />
<RegEx find="daces" replaceWith="dat ćeš" />
<RegEx find="dinacn" replaceWith="dinačn" />
<RegEx find="dlucn" replaceWith="dlučn" />
@ -1475,7 +1470,7 @@
<RegEx find="erisu" replaceWith="eriraju" />
<RegEx find="esavin" replaceWith="ešavin" />
<RegEx find="estace[sš]" replaceWith="estat ćeš" />
<RegEx find="eticn" replaceWith="etičn" />
<RegEx find="ezoc" replaceWith="ezoč" />
<RegEx find="fise" replaceWith="fira" />
<RegEx find="frick" replaceWith="fričk" />
<!-- popravlja i iz/nad/gledaš -->
@ -1486,7 +1481,6 @@
<RegEx find="gusi" replaceWith="guši" />
<RegEx find="(?&lt;!\b[oO]zlo|\b[iI]sp(rip)?ov|i)jedjen" replaceWith="ijeđen" />
<RegEx find="hic(k|en)" replaceWith="hić$1" />
<RegEx find="hoticn" replaceWith="hotičn" />
<RegEx find="hvaca" replaceWith="hvaća" />
<RegEx find="hvacas" replaceWith="hvaćaš" />
<RegEx find="hva([lt])is" replaceWith="hva$1iš" />
@ -1507,14 +1501,13 @@
<RegEx find="jasnic([eu])" replaceWith="jasnit ć$1" />
<RegEx find="juci" replaceWith="jući" />
<RegEx find="kaslja" replaceWith="kašlja" />
<RegEx find="krsi" replaceWith="krši" />
<RegEx find="kusava" replaceWith="kušava" />
<RegEx find="lican" replaceWith="ličan" />
<RegEx find="lacen" replaceWith="lačen" />
<RegEx find="l([ae])dj" replaceWith="l$1đ" />
<RegEx find="licn" replaceWith="ličn" />
<RegEx find="mj?ecen" replaceWith="mjećen" />
<RegEx find="([mv])edje" replaceWith="$1eđe" />
<RegEx find="micn" replaceWith="mičn" />
<RegEx find="miruce" replaceWith="miruće" />
<RegEx find="mec([eu])" replaceWith="meć$1" />
<RegEx find="mrsav" replaceWith="mršav" />
@ -1524,7 +1517,7 @@
<RegEx find="(?&lt;!sit)nisem" replaceWith="niram" />
<RegEx find="(?&lt;!sit)nises" replaceWith="niraš" />
<RegEx find="([rn])adj?en" replaceWith="$1ađen" />
<RegEx find="nisemo" replaceWith="niramo" />
<RegEx find="nise" replaceWith="nira" />
<RegEx find="nisten" replaceWith="ništen" />
<RegEx find="(?&lt;![Ee]t|[Gg]e|[Rr]i|[Tt]am)noc" replaceWith="noć" />
<RegEx find="oceju" replaceWith="oće" />
@ -1534,6 +1527,7 @@
<RegEx find="nosc" replaceWith="nošć" />
<RegEx find="sta([jn])es" replaceWith="sta$1eš" />
<RegEx find="osvece" replaceWith="osveće" />
<RegEx find="oti[cć]n" replaceWith="otičn" />
<RegEx find="ozen" replaceWith="ožen" />
<RegEx find="pecific" replaceWith="pecifič" />
<RegEx find="plase" replaceWith="plaše" />
@ -1571,7 +1565,7 @@
<RegEx find="tumaci" replaceWith="tumači" />
<RegEx find="\b([DdtTRrNn])uzn" replaceWith="$1užn" />
<RegEx find="\b([dtDTRr])uzan" replaceWith="$1užan" />
<RegEx find="tvrdju" replaceWith="tvrđu" />
<RegEx find="vrdj" replaceWith="vrđ" />
<RegEx find="tvrden" replaceWith="tvrđen" />
<RegEx find="\bujes\b" replaceWith="uješ" />
<RegEx find="\bzes\b" replaceWith="žeš" />
@ -1580,6 +1574,7 @@
<RegEx find="(?&lt;![Kk]lj)učiće" replaceWith="učit će" />
<RegEx find="udju" replaceWith="uđu" />
<RegEx find="([Uu])nisti" replaceWith="$1ništi" />
<RegEx find="nistav" replaceWith="ništav" />
<!--busenje je iznimka, ali bušenje nije, dakle češća riječ prevladava-->
<RegEx find="(?&lt;![jk])us[cč]" replaceWith="ušć" />
<RegEx find="usenj" replaceWith="ušenj" />
@ -1592,10 +1587,8 @@
<RegEx find="(?&lt;!p)rsten" replaceWith="ršten" />
<RegEx find="vruc" replaceWith="vruć" />
<RegEx find="vuce" replaceWith="vuče" />
<RegEx find="([Sso])naci" replaceWith="$1naći" />
<RegEx find="\b([Sso])naci" replaceWith="$1naći" />
<RegEx find="edjen" replaceWith="eđen" />
<RegEx find="([Zz])naci" replaceWith="$1nači" />
<RegEx find="([Zz])r?tv" replaceWith="žrtv" />
<RegEx find="ziljk" replaceWith="žiljk" />
<RegEx find="zitk" replaceWith="žitk" />
<RegEx find="zgajivac" replaceWith="zgajivač" />
@ -1608,5 +1601,6 @@
<RegEx find="zurb" replaceWith="žurb" />
<RegEx find="Zurb" replaceWith="Žurb" />
<RegEx find="zvucen" replaceWith="zvučen" />
<!-- <RegEx find=" " replaceWith=" " /> -->
</RegularExpressions>
</OCRFixReplaceList>