Merge pull request #7921 from diomed/main

Croatian OCR
This commit is contained in:
Nikolaj Olsson 2024-02-17 07:47:14 +01:00 committed by GitHub
commit be4cc84431
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -63,6 +63,8 @@
<Word from="bekstva" to="bijega" />
<Word from="bekstvo" to="bijeg" />
<Word from="bekstvu" to="bijegu" />
<Word from="belački" to="bjelački" />
<Word from="belačko" to="bjelačko" />
<Word from="belče" to="bijelče" />
<Word from="begstvu" to="bijegu" />
<!-- <Word from="beo" to="bijel | bio" /> ne može jer su dvije verzije! -->
@ -119,6 +121,8 @@
<Word from="Boksujem" to="Boksam" />
<Word from="boksuješ" to="boksaš" />
<Word from="Boksuješ" to="Boksaš" />
<Word from="boleće" to="boljet će" />
<Word from="Boleće" to="Boljet će" />
<Word from="bolesan" to="bolestan" />
<Word from="Bolesan" to="Bolestan" />
<Word from="bolila" to="boljela" />
@ -439,7 +443,6 @@
<Word from="ejst" to="jest" />
<Word from="en" to="ne" />
<Word from="enki" to="neki" />
<Word from="emitovao" to="emitirao" />
<Word from="emitovali" to="emitirali" />
<Word from="Emitovali" to="Emitirali" />
<Word from="emitovati" to="emitirati" />
@ -611,6 +614,7 @@
<Word from="igraćeš" to="igrat ćeš" />
<Word from="ilustrujete" to="ilustrirate" />
<Word from="ilustruju" to="ilustriraju" />
<Word from="inaće" to="inače" />
<Word from="inačr" to="inače" />
<Word from="inspirisao" to="inspirirao" />
<Word from="Inspirisao" to="Inspirirao" />
@ -1364,6 +1368,7 @@
<Word from="odelenje" to="odjel" />
<Word from="odelenju" to="odjelu" />
<Word from="odeljak" to="odjeljak" />
<Word from="odeljci" to="odjeljci" />
<Word from="odeljena" to="odijeljena" />
<Word from="odeljenje" to="odjel" />
<Word from="Odeljenje" to="Odjel" />
@ -1403,6 +1408,9 @@
<Word from="Odnjeti" to="Odnijeti" />
<Word from="odoliti" to="odoljeti" />
<Word from="odoleti" to="odoljeti" />
<Word from="odoleva" to="odolijeva" />
<Word from="odolevam" to="odolijevam" />
<Word from="odolevaš" to="odolijevaš" />
<Word from="odneti" to="odnijeti" />
<Word from="odrešene" to="odriješene" />
<Word from="Odseo" to="Odsjeo" />
@ -1544,6 +1552,7 @@
<Word from="pena" to="pjena" />
<Word from="penom" to="pjenom" />
<Word from="penu" to="pjenu" />
<Word from="penušavo" to="pjenušavo" />
<Word from="penušavu" to="pjenušavu" />
<Word from="pene" to="pjene" />
<Word from="penzionerski" to="umirovljenički" />
@ -1651,6 +1660,7 @@
<Word from="policja" to="policija" />
<Word from="poludili" to="poludjeli" />
<Word from="Polna" to="Spolna" />
<Word from="pomć" to="pomoć" />
<Word from="pomaći" to="pomaknuti" />
<Word from="pomaknim" to="pomaknem" />
<Word from="pomaknio" to="pomaknuo" />
@ -1787,10 +1797,12 @@
<Word from="preseći" to="presjeći" />
<Word from="Preseći" to="Presjeći" />
<Word from="prestonica" to="prijestolnica" />
<Word from="prestola" to="prijestolja" />
<Word from="prestupnik" to="prijestupnik" />
<Word from="preti" to="prijeti" />
<Word from="prete" to="prijete" />
<Word from="Prete" to="Prijete" />
<Word from="preteće" to="prijeteće" />
<Word from="pretvorni" to="prijetvorni" />
<Word from="pretvorno" to="prijetvorno" />
<Word from="prjeti" to="prijeti" />
@ -1820,6 +1832,8 @@
<Word from="neprimećena" to="neprimijećena" />
<Word from="primećivala" to="primijećivala" />
<Word from="primećivalo" to="primijećivalo" />
<Word from="Primećivalo" to="Primijećivalo" />
<Word from="primećivalo" to="primijećivalo" />
<Word from="primećujemo" to="primjećujemo" />
<Word from="prineo" to="prinio" />
<Word from="Prineo" to="Prinio" />
@ -2068,9 +2082,6 @@
<Word from="samosvest" to="samosvijest" />
<Word from="samnom" to="sa mnom" />
<Word from="Samnom" to="Sa mnom" />
<Word from="sa nama" to="s nama" />
<Word from="samp" to="samo" />
<Word from="sanke" to="sanjke" />
<Word from="saobraćaja" to="prometa" />
@ -2875,6 +2886,7 @@
<Word from="zaćutala" to="zašutjela" />
<Word from="zaćutao" to="zašutio" />
<Word from="zado" to="zato" />
<Word from="zago" to="zato" />
<Word from="zagreva" to="zagrijava" />
<Word from="Zagreva" to="Zagrijava" />
<Word from="zagrevanja" to="zagrijavanja" />
@ -3033,6 +3045,7 @@
<Word from="Brejdi" to="Brady" />
<Word from="Bridžit" to="Bridget" />
<Word from="Bridžet" to="Bridget" />
<Word from="Britni" to="Britney" />
<Word from="Brižit" to="Brigitte" />
<Word from="Bruks" to="Brooks" />
<Word from="Buč" to="Butch" />
@ -3185,6 +3198,7 @@
<Word from="Ilejn" to="Elaine" />
<Word from="Isak" to="Isaac" />
<Word from="Ist" to="East" />
<Word from="Iston" to="Easton" />
<Word from="Istvud" to="Eastwood" />
<Word from="Johni" to="Johnny" />
<Word from="Judžin" to="Eugene" />
@ -3219,7 +3233,8 @@
<Word from="Koltrejn" to="Coltraine" />
<Word from="Koni" to="Connie" />
<Word from="Konor" to="Connor" />
<Word from="Kortni" to="Courtney" />
<Word from="Koralajn" to="Coraline" />
<Word from="Kortni" to="Courtney" />
<Word from="Krejg" to="Craig" />
<Word from="Krejga" to="Craiga" />
<Word from="Krejgu" to="Craigu" />
@ -3941,6 +3956,8 @@
<LinePart from="moraš da daš" to="moraš dati" />
<LinePart from="Moraš da ideš" to="Moraš ići" />
<LinePart from="moraš da ideš" to="moraš ići" />
<LinePart from="Moraš da odeš" to="Moraš otići" />
<LinePart from="moraš da odeš" to="moraš otići" />
<LinePart from="moram da bacim" to="moram baciti" />
<LinePart from="Moram da nađem" to="Moram naći" />
<LinePart from="Moramo da pokušamo" to="Moramo pokušati" />
@ -4078,7 +4095,10 @@
<Beginning from="l " to="I " />
<Beginning from="- l " to="- I " />
</BeginLines>
<EndLines />
<EndLines>
<Ending from=".»." to="»." />
<Ending from="itr" to="iti" />
</EndLines>
<RegularExpressions>
<!-- deklinacije imenica i konjugacije glagola -->
<RegEx find="([0-9])-ogodišnj" replaceWith="$1-godišnj" />
@ -4089,6 +4109,7 @@
<RegEx find="Advokat(?![si])" replaceWith="Odvjetnik" />
<RegEx find="\badvokatsk" replaceWith="odvjetničk" />
<RegEx find="Advokatsk" replaceWith="Odvjetničk" />
<RegEx find="pađav" replaceWith="pašav" />
<RegEx find="ajsmiješnij" replaceWith="ajsmješnij" />
<RegEx find="([aA])kcion(?!ar)" replaceWith="$1kcijsk" />
<RegEx find="Akcionar" replaceWith="Dioničar" />
@ -4230,6 +4251,8 @@
<RegEx find="([dD])ospe(?=[mš]|te\b)" replaceWith="$1ospije" />
<RegEx find="dostaja[čćc]" replaceWith="dostajat ć" />
<RegEx find="oveš[čćc]" replaceWith="ovest ć" />
<RegEx find="djl" replaceWith="djel" />
<RegEx find="\bdrig" replaceWith="drug" />
<RegEx find="([dD])rj?ema" replaceWith="$1rijema" />
<RegEx find="drugaric" replaceWith="prijateljic" />
<RegEx find="Drugaric" replaceWith="Prijateljic" />
@ -4339,6 +4362,7 @@
<RegEx find="sqn" replaceWith="san" />
<RegEx find="italijan" replaceWith="talijan" />
<RegEx find="Italijan" replaceWith="Talijan" />
<RegEx find="itovao" replaceWith="itirao" />
<RegEx find="\b([iI])zmen" replaceWith="$1zmjen" />
<RegEx find="([iI])znj?eć" replaceWith="$1znijet ć" />
<RegEx find="znj?el" replaceWith="znijel" />
@ -4579,6 +4603,7 @@
<RegEx find="([pP])eva" replaceWith="$1jeva" />
<RegEx find="\bpijac" replaceWith="tržnic" />
<RegEx find="Pijac" replaceWith="Tržnic" />
<RegEx find="ploatiš" replaceWith="ploatiraj" />
<RegEx find="([pP])(ita|lati|obrinu|okaza|oveza|riča)ć" replaceWith="$1$2t ć" />
<RegEx find="([pP])luč([an])" replaceWith="$1luć$2" />
<RegEx find="\b([pP])obj?edi([mštol])" replaceWith="$1obijedi$2" />
@ -4621,6 +4646,7 @@
<RegEx find="([pP])os(l?)e([dt])" replaceWith="$1os$2je$3" />
<RegEx find="([pP])osmatra" replaceWith="$1romatra" />
<RegEx find="([pP])ose([td])" replaceWith="$1osje$2" />
<RegEx find="potc" replaceWith="podc" />
<RegEx find="otce" replaceWith="odcj" />
<RegEx find="([pP])oter" replaceWith="$1otjer" />
<RegEx find="([pP])otcj?enj" replaceWith="$1odcjenj" />
@ -5005,9 +5031,11 @@
<RegEx find="đž" replaceWith="dž" />
<RegEx find="đz" replaceWith="dž" />
<RegEx find="đt" replaceWith="št" />
<RegEx find="gf" replaceWith="gd" />
<RegEx find="nw" replaceWith="ne" />
<RegEx find="zž" replaceWith="ž" />
<RegEx find="žz" replaceWith="žu" />
<RegEx find="žž" replaceWith="ž" />
<RegEx find="([mnrtzpsdfghjkcvb])ls" replaceWith="$1is" />
<RegEx find="(?&lt;![Hh])abs" replaceWith="aps" />
<RegEx find="acel" replaceWith="acjel" />
@ -5075,6 +5103,7 @@
<RegEx find="coje" replaceWith="cije" />
<RegEx find="([čć])eđ" replaceWith="$1eš" />
<RegEx find="ćnot" replaceWith="ćnost" />
<RegEx find="ćw" replaceWith="će" />
<RegEx find="čmos" replaceWith="čnos" />
<RegEx find="dcz" replaceWith="doz" />
<RegEx find="diba" replaceWith="dina" />
@ -5136,7 +5165,6 @@
<RegEx find="frov" replaceWith="frir" />
<RegEx find="ftam" replaceWith="ftalm" />
<RegEx find="gačuj" replaceWith="gaćuj" />
<RegEx find="gf" replaceWith="gd" />
<RegEx find="gicn" replaceWith="gičn" />
<RegEx find="glda" replaceWith="gleda" />
<RegEx find="gof" replaceWith="god" />
@ -5155,9 +5183,11 @@
<RegEx find="(?&lt;![MH])ick([eoiu])" replaceWith="ičk$1" />
<RegEx find="\bicr" replaceWith="ice" />
<RegEx find="idev" replaceWith="idjev" />
<RegEx find="idk" replaceWith="isk" />
<RegEx find="ifej" replaceWith="idej" />
<RegEx find="ijd" replaceWith="idj" />
<RegEx find="ijo([absdegknptz])" replaceWith="io$1" />
<RegEx find="ihsk" replaceWith="ijsk" />
<RegEx find="([ai])nić" replaceWith="$1nit ć" />
<RegEx find="(?&lt;!\b[Oo]zlo|\b[Ii]sp(rip)?ov|i)jeđen" replaceWith="ijeđen" />
<RegEx find="(?&lt;!hva)lisa([olt])" replaceWith="lira$1" />
@ -5205,6 +5235,7 @@
<RegEx find="lepš" replaceWith="ljepš" />
<RegEx find="lešt" replaceWith="lješt" />
<RegEx find="levać" replaceWith="lijevat ć" />
<RegEx find="lhav" replaceWith="ljav" />
<RegEx find="liget" replaceWith="ligent" />
<RegEx find="linovan" replaceWith="liniran" />
<RegEx find="lkm" replaceWith="lim" />
@ -5238,10 +5269,12 @@
<RegEx find="msč" replaceWith="mač" />
<RegEx find="([Mm])sl" replaceWith="$1isl" />
<RegEx find="mtić" replaceWith="mtit ć" />
<RegEx find="mzn" replaceWith="mun" />
<RegEx find="mqm" replaceWith="mam" />
<RegEx find="mwn" replaceWith="men" />
<RegEx find="napredi" replaceWith="naprijedi" />
<RegEx find="napt" replaceWith="napr" />
<RegEx find="(?&lt;!a)ndm" replaceWith="nem" />
<RegEx find="([^\s]+)nesl" replaceWith="$1nijel" />
<RegEx find="nešen" replaceWith="nesen" />
<RegEx find="nimp" replaceWith="nimo" />
@ -5328,6 +5361,7 @@
<RegEx find="pisho" replaceWith="psiho" />
<RegEx find="pišačk" replaceWith="pišalin" />
<RegEx find="plst" replaceWith="plat" />
<RegEx find="pnd" replaceWith="ond" />
<RegEx find="pord" replaceWith="prod" />
<RegEx find="pqn" replaceWith="pon" />
<RegEx find="prouk" replaceWith="poruk" />
@ -5335,6 +5369,7 @@
<RegEx find="ptv" replaceWith="otv" />
<RegEx find="pwn" replaceWith="pen" />
<RegEx find="ralijs" replaceWith="rals" />
<RegEx find="(?&lt;!v)rck" replaceWith="rek" />
<RegEx find="resjedav" replaceWith="redsjedav" />
<RegEx find="kratij" replaceWith="kracij" />
<RegEx find="(?&lt;!h)rašće" replaceWith="rast će" />
@ -5354,6 +5389,7 @@
<RegEx find="pulis" replaceWith="pulir" />
<RegEx find="pva" replaceWith="ova" />
<RegEx find="pzn" replaceWith="pozn" />
<RegEx find="rp([bd])" replaceWith="ro$1" />
<RegEx find="rdk" replaceWith="rek" />
<RegEx find="riaj" replaceWith="rija" />
<RegEx find="rićemo" replaceWith="rit ćemo" />
@ -5519,6 +5555,7 @@
<RegEx find="(?&lt;!d)vof" replaceWith="vog" />
<RegEx find="(?&lt;!i)vp" replaceWith="vo" />
<RegEx find="vqn" replaceWith="van" />
<RegEx find="vwg" replaceWith="veg" />
<!-- mijenja u korist češće riječi -->
<RegEx find="([vV])rača" replaceWith="$1raća" />
<RegEx find="vrijedić" replaceWith="vrijedit ć" />
@ -5556,6 +5593,12 @@
<RegEx find="žalj?ev" replaceWith="žalijev" />
<RegEx find="([žzt])ivać" replaceWith="$1ivat ć" />
<RegEx find="žive([lot])" replaceWith="živi$1" />
<RegEx find="ćšu" replaceWith="šću" />
<RegEx find="kjn" replaceWith="kin" />
<RegEx find="othe" replaceWith="otje" />
<RegEx find="stp" replaceWith="sto" />
<!-- osobna imena/prezimena i imena gradova/država itd. -->
<RegEx find="A[fv]ghanistan" replaceWith="Afganistan" />
<RegEx find="Ajdah" replaceWith="Idah" />
@ -5585,7 +5628,7 @@
<RegEx find="Čarls" replaceWith="Charles" />
<RegEx find="Čendler" replaceWith="Chandler" />
<RegEx find="Č[eu]rčil" replaceWith="Churchill" />
<RegEx find="Česterton" replaceWith="Chesterton" />
<RegEx find="([Čč])ester" replaceWith="$1hester" />
<RegEx find="Čikag" replaceWith="Chicag" />
<RegEx find="Dajan" replaceWith="Dian" />
<RegEx find="Dankan" replaceWith="Duncan" />
@ -5690,6 +5733,7 @@
<RegEx find="Mekdonald" replaceWith="McDonald" />
<RegEx find="Mekdonalds" replaceWith="McDonald's" />
<RegEx find="Memfis" replaceWith="Memphis" />
<RegEx find="Mendi" replaceWith="Mandy" />
<RegEx find="Menhetn" replaceWith="Manhattan" />
<RegEx find="Metju" replaceWith="Matthew" />
<RegEx find="Mexic" replaceWith="Meksik" />
@ -5760,21 +5804,17 @@
<RegEx find="Viskonsin" replaceWith="Wisconsin" />
<RegEx find="Volter" replaceWith="Walter" />
<RegEx find="Vots" replaceWith="Wats" />
<RegEx find="ćšu" replaceWith="šću" />
<RegEx find="kjn" replaceWith="kin" />
<RegEx find="othe" replaceWith="otje" />
<RegEx find="stp" replaceWith="sto" />
<RegEx find=", ili" replaceWith="ili" />
<RegEx find="ili," replaceWith="ili" />
<RegEx find="\b\|\b" replaceWith="I" />
<!--
<RegEx find="([Ss])a (?!\s*[szž])" replaceWith="$1" />
<RegEx find="s ([szž])" replaceWith="sa $1" />
-->
<RegEx find="da li se (\w+)" replaceWith="$1 li se" />
<RegEx find=" ([Ss])a(?![szš]\b)" replaceWith=" $1 " />
<!--<RegEx find="s ([szž])" replaceWith="sa $1" />-->
<!-- 10kg » 10 kg | 20cm » 20 cm | 44dag » 44 dag -->
<RegEx find="\b(\d+)([a-z]{2,4})\b" replaceWith="$1 $2" />
<!-- 34m » 34 m -->