dictionaries: automated XML upkeep

This commit is contained in:
Waldi Ravens 2019-05-10 00:22:06 +02:00
parent 361f315df0
commit 575d52c61c
10 changed files with 2028 additions and 2034 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,11 @@
<NoBreakAfterList>
<Item>Dr.</Item>
<Item>Dr</Item>
<Item>Hr.</Item>
<Item>Mr.</Item>
<Item>Mr</Item>
<Item>Mrs.</Item>
<Item>Ms.</Item>
<Item>Frk.</Item>
<Item>Dr.</Item>
<Item>Frk.</Item>
<Item>Fru</Item>
<Item>Hr.</Item>
<Item>Mr</Item>
<Item>Mr.</Item>
<Item>Mrs.</Item>
<Item>Ms.</Item>
</NoBreakAfterList>

View File

@ -1,7 +1,7 @@
<NoBreakAfterList>
<Item>Dr</Item>
<Item>Dr.</Item>
<Item>Mr.</Item>
<Item>Mrs.</Item>
<Item>Ms.</Item>
<Item>Dr</Item>
</NoBreakAfterList>

View File

@ -24,6 +24,7 @@
<word>arsenic</word>
<word>arses</word>
<word>astatine</word>
<word>astrometry</word>
<word>attosecond</word>
<word>autotrophs</word>
<word>axe</word>
@ -107,6 +108,7 @@
<word>endeth</word>
<word>episiotomy</word>
<word>erbium</word>
<word>esophaguses</word>
<word>euro</word>
<word>europium</word>
<word>euros</word>
@ -205,6 +207,7 @@
<word>mercury</word>
<word>mesenteric</word>
<word>metres</word>
<word>micrografting</word>
<word>might've</word>
<word>millennia</word>
<word>mmm</word>
@ -212,6 +215,7 @@
<word>mojo</word>
<word>molybdenum</word>
<word>moonlet</word>
<word>multispectral</word>
<word>must've</word>
<word>nah</word>
<word>namaste</word>
@ -260,6 +264,8 @@
<word>purée</word>
<word>radium</word>
<word>radon</word>
<word>reacclimate</word>
<word>reacclimating</word>
<word>realise</word>
<word>realised</word>
<word>rebar</word>
@ -382,11 +388,4 @@
<word>yukina</word>
<word>zinc</word>
<word>zirconium</word>
<word>astrometry</word>
<word>esophaguses</word>
<word>micrografting</word>
<word>multispectral</word>
<word>reacclimate</word>
<word>reacclimating</word>
<word>skank</word>
</words>

View File

@ -2953,7 +2953,6 @@
<LinePart from=" hasn 't " to=" hasn't " />
<LinePart from=" you ' re " to=" you're " />
<LinePart from=" You ' re " to=" You're " />
<LinePart from=" that' s " to=" that's " />
<LinePart from="1 2th " to="12th " />
<LinePart from="-| " to="- I " />
<LinePart from=" | " to=" I " />
@ -3123,7 +3122,6 @@
<Beginning from="She' s " to="She's " />
<Beginning from="O kay, " to="Okay, " />
<Beginning from="l didn't" to="I didn't" />
<Beginning from="l don't" to="I don't" />
<Beginning from="-1 " to="- I" />
<Beginning from="I'II " to="I'll " />
<Beginning from="Do l " to="Do I " />
@ -3180,6 +3178,6 @@
<RegEx find="^-1t (\p{L})" replaceWith="- It $1" />
<RegEx find="^-_\.(\p{L})" replaceWith="- ...$1" />
<RegEx find="^_\.\.(\p{L})" replaceWith="...$1" />
<!-- <RegEx find="(\p{L}{2,})&quot;s " replaceWith="$1's " /> -->
<!-- <RegEx find="(\p{L}{2,})&quot;s " replaceWith="$1's " /> -->
</RegularExpressions>
</OCRFixReplaceList>

View File

@ -1,28 +1,28 @@
<NoBreakAfterList>
<Item>bl</Item>
<Item>bl.</Item>
<Item>dipl</Item>
<Item>dipl.</Item>
<Item>Dr.</Item>
<Item>Dr</Item>
<Item>dr.</Item>
<Item>dr</Item>
<Item>g.</Item>
<Item>g</Item>
<Item>god.</Item>
<Item>gosp</Item>
<Item>gosp.</Item>
<Item>gđa</Item>
<Item>u</Item>
<Item>gđica</Item>
<Item>gđice</Item>
<Item>gđici</Item>
<Item>gđicu</Item>
<Item>ing.</Item>
<Item>mr.</Item>
<Item>prof</Item>
<Item>prof.</Item>
<Item>sv.</Item>
<Item>sv</Item>
<Item>vlč.</Item>
</NoBreakAfterList>
<NoBreakAfterList>
<Item>bl</Item>
<Item>bl.</Item>
<Item>dipl</Item>
<Item>dipl.</Item>
<Item>dr</Item>
<Item>Dr</Item>
<Item>dr.</Item>
<Item>Dr.</Item>
<Item>g</Item>
<Item>g.</Item>
<Item>gđa</Item>
<Item>gđica</Item>
<Item>gđice</Item>
<Item>gđici</Item>
<Item>gđicu</Item>
<Item>gđu</Item>
<Item>god.</Item>
<Item>gosp</Item>
<Item>gosp.</Item>
<Item>ing.</Item>
<Item>mr.</Item>
<Item>prof</Item>
<Item>prof.</Item>
<Item>sv</Item>
<Item>sv.</Item>
<Item>vlč.</Item>
</NoBreakAfterList>

View File

@ -50,7 +50,7 @@
<Word from="bekstvo" to="bijeg" />
<Word from="bekstvu" to="bijegu" />
<Word from="begstvu" to="bijegu" />
<!--<Word from="beo" to="bijel | bio" /> ne može jer su dvije verzije! -->
<!--<Word from="beo" to="bijel | bio" /> ne može jer su dvije verzije! -->
<Word from="bes" to="bijes" />
<Word from="besa" to="bijesa" />
<Word from="besan" to="bijesan" />
@ -62,12 +62,12 @@
<Word from="biju" to="tuku" />
<Word from="bimso" to="bismo" />
<Word from="blijeđi" to="bljeđi" />
<Word from="biblija" to="Biblija" />
<Word from="biblije" to="Biblije" />
<Word from="bibliji" to="Bibliji" />
<Word from="bibliju" to="Bibliju" />
<Word from="biblijom" to="Biblijom" />
<Word from="biblijama" to="Biblijama" />
<Word from="biblija" to="Biblija" />
<Word from="biblije" to="Biblije" />
<Word from="bibliji" to="Bibliji" />
<Word from="bibliju" to="Bibliju" />
<Word from="biblijom" to="Biblijom" />
<Word from="biblijama" to="Biblijama" />
<Word from="bioje" to="bio je" />
<Word from="bi smo" to="bismo" />
<Word from="bi ste" to="biste" />
@ -506,7 +506,7 @@
<Word from="kevo" to="majko" />
<Word from="Kevo" to="Majko" />
<Word from="kevu" to="majku" />
<Word from="Kevu" to="Majku" />
<Word from="Kevu" to="Majku" />
<Word from="kevom" to="majkom" />
<Word from="Kevom" to="Majkom" />
<Word from="kidnapovan" to="otet" />
@ -552,7 +552,7 @@
<Word from="koverti" to="kuverti" />
<Word from="kovertu" to="kuvertu" />
<Word from="kritikuju" to="kritiziraju" />
<Word from="Krst" to="Križ" />
<Word from="Krst" to="Križ" />
<Word from="krst" to="križ" />
<Word from="krsta" to="križa" />
<Word from="krstaši" to="križari" />
@ -664,7 +664,7 @@
<Word from="ljepi" to="lijepi" />
<Word from="ljepo" to="lijepo" />
<Word from="ljepu" to="lijepu" />
<Word from="ljepim" to="lijepim" />
<Word from="ljepim" to="lijepim" />
<Word from="ljepima" to="lijepima" />
<Word from="ljepoj" to="lijepoj" />
<Word from="ljepom" to="lijepom" />
@ -682,9 +682,9 @@
<Word from="ljetnje" to="ljetne" />
<Word from="magnezijuma" to="magnezija" />
<Word from="magnezijumu" to="magneziju" />
<Word from="maja" to="svibnja"/>
<Word from="maju" to="svibnju"/>
<Word from="majem" to="svibnjem"/>
<Word from="maja" to="svibnja" />
<Word from="maju" to="svibnju" />
<Word from="majem" to="svibnjem" />
<Word from="majek" to="majke" />
<Word from="majca" to="majica" />
<Word from="majce" to="majice" />
@ -695,9 +695,9 @@
<Word from="maloprije" to="malo prije" />
<Word from="manifestuje" to="manifestira" />
<Word from="manifestuju" to="manifestiraju" />
<Word from="maram" to="moram"/>
<Word from="maram" to="moram" />
<Word from="marta" to="ožujka" />
<Word from="martu" to="ožujku"/>
<Word from="martu" to="ožujku" />
<Word from="martom" to="ožujkom" />
<Word from="matori" to="stari" />
<Word from="mehur" to="mjehur" />
@ -922,7 +922,7 @@
<Word from="osete" to="osjete" />
<Word from="ostrvo" to="otok" />
<Word from="Ostrvo" to="Otok" />
<Word from="ouno" to="puno" />
<Word from="ouno" to="puno" />
<Word from="osveženja" to="osvježenja" />
<Word from="osveženje" to="osvježenje" />
<Word from="osveženjem" to="osvježenjem" />
@ -1036,7 +1036,7 @@
<Word from="pomeraju" to="miču" />
<Word from="pomerala" to="micala" />
<Word from="pomjerala" to="pomicala" />
<Word from="pomjeraju" to="pomiču" />
<Word from="pomjeraju" to="pomiču" />
<Word from="pomeranja" to="pomicanja" />
<Word from="pomerati" to="micati" />
<Word from="pomjerati" to="pomicati" />
@ -1462,7 +1462,7 @@
<Word from="stače" to="stat će" />
<Word from="staču" to="stat ću" />
<Word from="Stače" to="Stat će" />
<Word from="Staču" to="Stat ću" />
<Word from="Staču" to="Stat ću" />
<Word from="starao" to="brinuo" />
<Word from="starati" to="brinuti" />
<Word from="Starati" to="Brinuti" />
@ -1904,7 +1904,6 @@
<Word from="Živeo" to="Živio" />
<Word from="živeo" to="živio" />
<Word from="žmureo" to="žmirio" />
<!-- imena -->
<Word from="Abi" to="Abby" />
<Word from="Alis" to="Alice" />
@ -2258,7 +2257,7 @@
<LinePart from="hoćeš da kažeš" to="želiš reći" />
<LinePart from="hoće da kaže" to="želi reći" />
<LinePart from="hoću da živim" to="želim živjeti" />
<LinePart from="Izvini se" to="Ispričaj se" />
<LinePart from="Izvini se" to="Ispričaj se" />
<LinePart from="izvini se" to="ispričaj se" />
<LinePart from="Izvinite me" to="Ispričajte me" />
<LinePart from="izvinite me" to="ispričajte me" />
@ -2641,7 +2640,7 @@
<RegEx find="komanduj" replaceWith="naređuj" />
<RegEx find="inuje" replaceWith="inira" />
<RegEx find="binova" replaceWith="binira" />
<RegEx find="\b([kKvV])olen" replaceWith="$1oljen" />
<RegEx find="\b([kKvV])olen" replaceWith="$1oljen" />
<RegEx find="komitet" replaceWith="odbor" />
<RegEx find="Komitet" replaceWith="Odbor" />
<RegEx find="plikuj" replaceWith="plicira" />
@ -3213,7 +3212,7 @@
<RegEx find="š" replaceWith="š" />
<RegEx find="Å¡" replaceWith="š" />
<RegEx find="ÄŚ" replaceWith="Č" />
<RegEx find="ÄŒ" replaceWith="Č" />
<RegEx find="ÄŒ" replaceWith="Č" />
<RegEx find="Ć" replaceWith="Ć" />
<RegEx find="Ä" replaceWith="Đ" />
<RegEx find="Ĺ " replaceWith="Š" />
@ -3357,7 +3356,7 @@
<RegEx find="mpra" replaceWith="mora" />
<RegEx find="mtić" replaceWith="mtit ć" />
<RegEx find="napredi" replaceWith="naprijedi" />
<RegEx find="([^\s]+)nesl" replaceWith="$1nijel" />
<RegEx find="([^\s]+)nesl" replaceWith="$1nijel" />
<RegEx find="ngažuj" replaceWith="ngažiraj" />
<RegEx find="nho" replaceWith="nko" />
<RegEx find="nisa([nlot])" replaceWith="nira$1" />
@ -3381,7 +3380,8 @@
<RegEx find="obed" replaceWith="objed" />
<RegEx find="obem" replaceWith="oblem" />
<RegEx find="([oO])besi" replaceWith="$1bjesi" />
<RegEx find="([Oo])bs" replaceWith="$1ps" /><!-- Jobs je problem! -->
<!-- Jobs je problem! -->
<RegEx find="([Oo])bs" replaceWith="$1ps" />
<RegEx find="o[cčć]aren" replaceWith="očaran" />
<RegEx find="([oO])d([kp])" replaceWith="$1t$2" />
<RegEx find="oćeju" replaceWith="oće" />
@ -3525,7 +3525,6 @@
<RegEx find="žalj?ev" replaceWith="žalijev" />
<RegEx find="živać" replaceWith="živat ć" />
<RegEx find="žive([lot])" replaceWith="živi$1" />
<!-- osobna imena/prezimena i imena gradova/država itd. -->
<RegEx find="Afghanistan" replaceWith="Afganistan" />
<RegEx find="Ajdah" replaceWith="Idah" />
@ -3772,4 +3771,4 @@
<!-- Skraćenice bez razmaka -->
<RegEx find="d\. o\.o\." replaceWith="d.o.o." />
</RegularExpressions>
</OCRFixReplaceList>
</OCRFixReplaceList>

View File

@ -945,7 +945,6 @@
<RegEx find="\Bo(log[ao]s?\b)" replaceWith="ó$1" />
<RegEx find="\Ba(log[ao]s?\b)" replaceWith="á$1" />
<RegEx find="\Bi(log[ao]s?\b)" replaceWith="í$1" />
<RegEx find="\bIes\b" replaceWith="les" />
<RegEx find="\bIos\b" replaceWith="los" />
</RegularExpressions>

View File

@ -1,28 +1,28 @@
<NoBreakAfterList>
<Item>bl</Item>
<Item>bl.</Item>
<Item>dipl</Item>
<Item>dipl.</Item>
<Item>Dr.</Item>
<Item>Dr</Item>
<Item>dr.</Item>
<Item>dr</Item>
<Item>g.</Item>
<Item>g</Item>
<Item>god.</Item>
<Item>gosp</Item>
<Item>gosp.</Item>
<Item>gđa</Item>
<Item>u</Item>
<Item>gđica</Item>
<Item>gđice</Item>
<Item>gđici</Item>
<Item>gđicu</Item>
<Item>ing.</Item>
<Item>mr.</Item>
<Item>prof</Item>
<Item>prof.</Item>
<Item>sv.</Item>
<Item>sv</Item>
<Item>vlč.</Item>
</NoBreakAfterList>
<NoBreakAfterList>
<Item>bl</Item>
<Item>bl.</Item>
<Item>dipl</Item>
<Item>dipl.</Item>
<Item>dr</Item>
<Item>Dr</Item>
<Item>dr.</Item>
<Item>Dr.</Item>
<Item>g</Item>
<Item>g.</Item>
<Item>gđa</Item>
<Item>gđica</Item>
<Item>gđice</Item>
<Item>gđici</Item>
<Item>gđicu</Item>
<Item>gđu</Item>
<Item>god.</Item>
<Item>gosp</Item>
<Item>gosp.</Item>
<Item>ing.</Item>
<Item>mr.</Item>
<Item>prof</Item>
<Item>prof.</Item>
<Item>sv</Item>
<Item>sv.</Item>
<Item>vlč.</Item>
</NoBreakAfterList>

View File

@ -111,7 +111,7 @@
<RegEx find="š" replaceWith="š" />
<RegEx find="Å¡" replaceWith="š" />
<RegEx find="ÄŚ" replaceWith="Č" />
<RegEx find="ÄŒ" replaceWith="Č" />
<RegEx find="ÄŒ" replaceWith="Č" />
<RegEx find="Ć" replaceWith="Ć" />
<RegEx find="Ĺ " replaceWith="Š" />
<RegEx find="Å " replaceWith="Š" />
@ -265,4 +265,4 @@
<!-- <RegEx find="^\.{3}([a-zčđšž&quot;&lt;])" replaceWith="$1" /> -->
<!-- <RegEx find=" +([.?!])" replaceWith="$1" /> -->
</RegularExpressions>
</OCRFixReplaceList>
</OCRFixReplaceList>