Merge pull request #1699 from diomed/patch-2

Update hrv_OCRFixReplaceList.xml
This commit is contained in:
Nikolaj Olsson 2016-04-16 12:38:36 +02:00
commit b315ed5722

View File

@ -28,6 +28,7 @@
<Word from="boleo" to="bolio" />
<Word from="bolesan" to="bolestan" />
<Word from="braon" to="smeđa" />
<Word from="bregu" to="brijegu" />
<Word from="ceo" to="cijeli" />
<Word from="Ceo" to="Cijeli" />
<Word from="čas" to="sat" />
@ -86,20 +87,33 @@
<Word from="Grečen" to="Gretchen" />
<Word from="greše" to="griješe" />
<Word from="greški" to="grešci" />
<Word from="Historija" to="Povijest" />
<Word from="Historiju" to="Povijest" />
<Word from="Historije" to="Povijesti" />
<Word from="Historiji" to="Povijesti" />
<Word from="Istorija" to="Povijest" />
<Word from="Istoriju" to="Povijest" />
<Word from="Istorije" to="Povijesti" />
<Word from="Istoriji" to="Povijesti" />
<Word from="historije" to="povijesti" />
<Word from="historiji" to="povijesti" />
<Word from="istorije" to="povijesti" />
<Word from="istoriji" to="povijesti" />
<Word from="Hju" to="Hugh" />
<Word from="hoču" to="hoću" />
<Word from="Hoču" to="Hoću" />
<Word from="Hjuston" to="Houston" />
<Word from="iči" to="ići" />
<Word from="Iči" to="Ići" />
<Word from="iko" to="itko" />
<Word from="ignoriši" to="ignoriraj" />
<Word from="ignorišu" to="ignoriraju" />
<Word from="interesuje" to="zanima" />
<Word from="Interesuje" to="Zanima" />
<Word from="ivica" to="rub" />
<Word from="ivice" to="ruba" />
<Word from="ivici" to="rubu" />
<Word from="ivicu" to="rub" />
<Word from="hoču" to="hoću" />
<Word from="Hoču" to="Hoću" />
<Word from="Hjuston" to="Houston" />
<Word from="ignoriši" to="ignoriraj" />
<Word from="ignorišu" to="ignoriraju" />
<Word from="isuviše" to="previše" />
<Word from="i te kako" to="itekako" />
<Word from="izvinim" to="ispričam" />
@ -214,6 +228,7 @@
<Word from="odprilike" to="otprilike" />
<Word from="odupreti" to="oduprijeti" />
<Word from="Ohajo" to="Ohio" />
<Word from="organizovao" to="organizirao" />
<Word from="organizuju" to="organiziraju" />
<Word from="ostrvima" to="otocima" />
<Word from="osete" to="osjete" />
@ -301,12 +316,14 @@
<Word from="sekiramo" to="brinemo" />
<Word from="sekirate" to="brinete" />
<Word from="sekirajte" to="brinite" />
<Word from="seme" to="sjeme" />
<Word from="sertan" to="sretan" />
<Word from="siguan" to="siguran" />
<Word from="sija" to="sja" />
<Word from="sirće" to="ocat" />
<Word from="sirćetu" to="octu" />
<Word from="seo" to="sjeo" />
<Word from="Seo" to="Sjeo" />
<Word from="sem" to="osim" />
<Word from="sma" to="sam" />
<Word from="smao" to="samo" />
@ -349,6 +366,7 @@
<Word from="svestan" to="svjestan" />
<Word from="šolja" to="šalica" />
<Word from="šolju" to="šalicu" />
<Word from="Španiji" to="Španjolskoj" />
<Word from="Šta" to="Što" />
<Word from="štagod" to="što god" />
<Word from="šta" to="što" />
@ -395,6 +413,7 @@
<Word from="veoma" to="vrlo" />
<Word from="Veoma" to="Vrlo" />
<Word from="vereni" to="zaručeni" />
<Word from="vlo" to="vrlo" />
<Word from="voliti" to="voljeti" />
<Word from="voleo" to="volio" />
<Word from="Voleo" to="Volio" />
@ -480,6 +499,10 @@
<Word from="udeo" to="udio" />
<Word from="uspeo" to="uspio" />
<Word from="Uspeo" to="Uspio" />
<Word from="zaseda" to="zasjeda" />
<Word from="zasede" to="zasjede" />
<Word from="zasedi" to="zasjedi" />
<Word from="zasedu" to="zasjedu" />
<!-- imena mjeseci -->
<Word from="januar" to="siječanj" />
<Word from="februar" to="veljača" />
@ -631,6 +654,7 @@
<RegEx find="\b([dD])j?el([ei])\b" replaceWith="$1ijel$2" />
<RegEx find="([dD])elikvent" replaceWith="$1elinkvent" />
<RegEx find="([dD])eluj([eu])" replaceWith="$1jeluj$2" />
<RegEx find="diskutova" replaceWith="raspravlja" />
<RegEx find="diskutuje" replaceWith="raspravlja" />
<RegEx find="Diskutuje" replaceWith="Raspravlja" />
<RegEx find="\b([dD])j?ete\b" replaceWith="$1ijete" />
@ -647,7 +671,6 @@
<RegEx find="\b([dD])esi(?!ć)" replaceWith="$1ogodi" />
<RegEx find="\b([dD])esić" replaceWith="$1ogodit ć" />
<RegEx find="([dD])evi([čc])" replaceWith="$1jevi$2" />
<RegEx find="([Dd])ijagnostikova" replaceWith="$1ijagnosticira" />
<RegEx find="\b([dD])j?eljenj([aeu]|em|ima)\b" replaceWith="$1ijeljenj$2" />
<RegEx find="\b([dD])ijec([aeiou]|om)\b" replaceWith="$1jec$2" />
<RegEx find="\b([dD])ragocen([aeiou])" replaceWith="$1ragocjen$2" />
@ -716,12 +739,8 @@
<RegEx find="Haos" replaceWith="Kaos" />
<RegEx find="hemi([jč])" replaceWith="kemi$1" />
<RegEx find="Hemi([jč])" replaceWith="Kemi$1" />
<RegEx find="\bh?istorij[au]" replaceWith="povijest" />
<RegEx find="Historij[au]" replaceWith="Povijest" />
<RegEx find="Istorij[au]" replaceWith="Povijest" />
<RegEx find="\bh?istorij[ei]" replaceWith="povijesti" />
<RegEx find="Historij[ei]" replaceWith="Povijesti" />
<RegEx find="Istorij[ei]" replaceWith="Povijesti" />
<RegEx find="\bh?istorij[au]" replaceWith="povijest" />
<RegEx find="\bh?istorijsk" replaceWith="povijesn" />
<RegEx find="\bHistorijsk" replaceWith="Povijesn" />
<RegEx find="\bIstorijsk" replaceWith="Povijesn" />
@ -818,7 +837,6 @@
<RegEx find="komitet" replaceWith="odbor" />
<RegEx find="Komitet" replaceWith="Odbor" />
<RegEx find="([kK])ompjuter" replaceWith="$1ompjutor" />
<RegEx find="([kK])omplikova" replaceWith="$1omplicira" />
<RegEx find="([kK])omplikuj" replaceWith="$1omplicira" />
<RegEx find="([kK])ompromitova" replaceWith="$1ompromitira" />
<RegEx find="komšijsk" replaceWith="susjedn" />
@ -928,6 +946,7 @@
<RegEx find="([nN])emačk" replaceWith="$1jemačk" />
<RegEx find="[nN]j?em(a?)c([aeiu])" replaceWith="Nijem$1c$2" />
<RegEx find="([nN])emože" replaceWith="$1e može" />
<RegEx find="([nN])enamer(n[ao]|no[mgj]|nim)\b" replaceWith="$1enamjer$2" />
<RegEx find="([nN])eprijatn" replaceWith="$1eugodn" />
<RegEx find="([nN])epobediv" replaceWith="$1epobjediv" />
<RegEx find="([nN])erj?ešen" replaceWith="$1eriješen" />
@ -967,8 +986,6 @@
<RegEx find="([oO])prostić([eu])" replaceWith="$1prostit ć$2" />
<RegEx find="([oO])psedn" replaceWith="$1psjedn" />
<RegEx find="([oO])pšt([aeiu]|[eoi]m)" replaceWith="$1pć$2" />
<RegEx find="([oO])rganizuje" replaceWith="$1rganizira" />
<RegEx find="([oO])rganizov" replaceWith="$1rganizir" />
<RegEx find="([oO])ružij([aeu])" replaceWith="$1ružj$2" />
<RegEx find="([oO])seti([lošmt])?" replaceWith="$1sjeti$2" />
<RegEx find="([oO])setljiv" replaceWith="$1sjetljiv" />
@ -980,7 +997,6 @@
<RegEx find="otadžbin" replaceWith="domovin" />
<RegEx find="Otadžbin" replaceWith="Domovin" />
<RegEx find="\b([oO])tera" replaceWith="$1tjera" />
<RegEx find="\b([oO])tvorić([eu])" replaceWith="$1tvorit ć$2" />
<RegEx find="ovaploćenj([aeiu])" replaceWith="utjelovljenj$1" />
<RegEx find="Ovaploćenj([aeiu])" replaceWith="Utjelovljenj$1" />
<RegEx find="([Oo])vlašćen" replaceWith="$1vlašten" />
@ -1344,6 +1360,7 @@
<RegEx find="\b([vV])ešt" replaceWith="$1ješt" />
<RegEx find="\b([vV])j?ešću\b" replaceWith="$1iješću" />
<RegEx find="([vV])et(ar|r[aeuo])" replaceWith="$1jet$2" />
<RegEx find="([vV])ever" replaceWith="$1jever" />
<RegEx find="([vV])ežb" replaceWith="$1ježb" />
<RegEx find="([vV])ide([hlt])" replaceWith="$1idje$2" />
<!-- [eu] mora ostati!!! -->
@ -1355,7 +1372,7 @@
<RegEx find="\b([vV])odk([aeiou])" replaceWith="$1otk$2" />
<RegEx find="([vV])ole([ntl])" replaceWith="$1olje$2" />
<RegEx find="([vV])olj?eć" replaceWith="$1oljet ć" />
<RegEx find="\b([vV])oz([au]|om|ovi)?\b" replaceWith="$1lak$2" />
<RegEx find="\b([vV])oz([au]|om|ov[ie]|ovima)?\b" replaceWith="$1lak$2" />
<RegEx find="\b([vV])ozi[čć]([eu])" replaceWith="$1ozit ć$2" />
<RegEx find="\b([vV])ratić([eu])" replaceWith="$1ratit ć$2" />
<RegEx find="\b([vV])rj?edi([mštl])?\b" replaceWith="$1rijedi$2" />
@ -1416,13 +1433,14 @@
<RegEx find="anić" replaceWith="anit ć" />
<RegEx find="(?&lt;!gl|[Nn])avić" replaceWith="avit ć" />
<RegEx find="\bdral" replaceWith="deral" />
<RegEx find="dranj" replaceWith="deranj" />
<RegEx find="dsek" replaceWith="dsjek" />
<RegEx find="ebać" replaceWith="ebat ć" />
<RegEx find="(?&lt;![NnJj])edać" replaceWith="edat ć" />
<RegEx find="eizbež" replaceWith="eizbjež" />
<RegEx find="efinišu" replaceWith="efiniraju" />
<RegEx find="erišu" replaceWith="eriraju" />
<RegEx find="fikova([nlot])" replaceWith="ficira$1" />
<RegEx find="(mpl|st|f)ikova([nlot])" replaceWith="$1icira$2" />
<RegEx find="fikuj" replaceWith="ficir" />
<RegEx find="fisa" replaceWith="fira" />
<RegEx find="fiše" replaceWith="fira" />
@ -1430,8 +1448,10 @@
<RegEx find="(?&lt;!\b[Oo]zlo|\b[Ii]sp(rip)?ov|i)jeđen" replaceWith="ijeđen" />
<RegEx find="(?&lt;!hva)lisati" replaceWith="lirati" />
<RegEx find="jći" replaceWith="jući" />
<RegEx find="([kKPp])ratić" replaceWith="$1ratit ć" />
<RegEx find="koriš[čć]a" replaceWith="korišta" />
<RegEx find="lizovan" replaceWith="liziran" />
<RegEx find="([nl])izovan" replaceWith="$1iziran" />
<RegEx find="(?&lt;![Oo]b)([nl])izuje" replaceWith="$1izira" />
<RegEx find="luparenj" replaceWith="lupiranj" />
<RegEx find="mjenim" replaceWith="mijenim" />
<RegEx find="nisa([nlot])" replaceWith="nira$1" />
@ -1445,7 +1465,8 @@
<RegEx find="ntis" replaceWith="ntir" />
<RegEx find="ntiše" replaceWith="ntira" />
<RegEx find="ntišu" replaceWith="ntiraju" />
<RegEx find="o[čć]aren" replaceWith="očaran" />
<RegEx find="([Oo])besi" replaceWith="$1bjesi" />
<RegEx find="o[cčć]aren" replaceWith="očaran" />
<RegEx find="oćeju" replaceWith="oće" />
<RegEx find="odpisa" replaceWith="otpisa" />
<RegEx find="\bpominj" replaceWith="spominj" />
@ -1457,17 +1478,19 @@
<!-- preduzeti / preduzetnik -->
<RegEx find="reduz" replaceWith="oduz" />
<RegEx find="relj?ep" replaceWith="relijep" />
<RegEx find="rimed" replaceWith="rimjed" />
<RegEx find="pulis" replaceWith="pulir" />
<RegEx find="r([mv])isan" replaceWith="r$1iran" />
<RegEx find="svetić" replaceWith="svetit ć" />
<RegEx find="tćeš" replaceWith="t ćeš" />
<RegEx find="tede([lt])" replaceWith="tedje$1" />
<RegEx find="tede(?![oć])" replaceWith="tedje" />
<RegEx find="tešn" replaceWith="tješn" />
<RegEx find="tisanj" replaceWith="tiranj" />
<RegEx find="tiviše" replaceWith="tivira" />
<RegEx find="trisa" replaceWith="trira" />
<RegEx find="triše" replaceWith="trira" />
<RegEx find="triši" replaceWith="triraj" />
<RegEx find="ubić" replaceWith="ubit ć" />
<RegEx find="ucać" replaceWith="ucat ć" />
<RegEx find="udj([ai])" replaceWith="uđ$1" />
<RegEx find="\bugao" replaceWith="kut" />
@ -1478,6 +1501,7 @@
<RegEx find="veštava" replaceWith="vještava" />
<RegEx find="visa([lt])" replaceWith="vira$1" />
<RegEx find="([vV])jeov" replaceWith="$1jerov" />
<RegEx find="(?&lt;![ČčĆć])vorić" replaceWith="vorit ć" />
<!-- mijenja u korist češće riječi -->
<RegEx find="([vV])rača" replaceWith="$1raća" />
<RegEx find="(?&lt;!obra)zovati" replaceWith="zirati" />
@ -1549,6 +1573,7 @@
<RegEx find="Los Anđeles" replaceWith="Los Angeles" />
<RegEx find="Majami" replaceWith="Miami" />
<RegEx find="Majkl" replaceWith="Michael" />
<RegEx find="Marfi" replaceWith="Murphy" />
<RegEx find="Memfis" replaceWith="Memphis" />
<RegEx find="Mejn(u|om)?\b" replaceWith="Maine$1" />
<RegEx find="Metju" replaceWith="Matthew" />
@ -1573,7 +1598,6 @@
<RegEx find="Stiven" replaceWith="Stephen" />
<RegEx find="Stjuart" replaceWith="Stuart" />
<RegEx find="Sančez" replaceWith="Sanchez" />
<RegEx find="Španiji" replaceWith="Španjolskoj" />
<RegEx find="([šŠ])vetsk" replaceWith="$1vedsk" />
<RegEx find="([šŠ])vajcarsk" replaceWith="$1vicarsk" />
<RegEx find="Tajms" replaceWith="Times" />