dictionaries: automated XML upkeep

This commit is contained in:
Waldi Ravens 2019-05-26 03:23:51 +02:00
parent dcc28ab676
commit 0469c7f59f
27 changed files with 140 additions and 128 deletions

View File

@ -1,6 +1,9 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<!-- This list contains names/words with specific casing - and specific to Danish only --> <!-- This list contains names with specific casing - and specific to Danish only -->
<names> <names>
<blacklist>
<name>Rabat</name>
</blacklist>
<name>Aquarium</name> <name>Aquarium</name>
<name>Armfeldt</name> <name>Armfeldt</name>
<name>Asgård</name> <name>Asgård</name>
@ -85,7 +88,4 @@
<name>Wharton</name> <name>Wharton</name>
<name>Wylie</name> <name>Wylie</name>
<name>Yardley</name> <name>Yardley</name>
<blacklist>
<name>Rabat</name>
</blacklist>
</names> </names>

View File

@ -1,4 +1,5 @@
<OCRFixReplaceList> <?xml version="1.0" encoding="utf-8"?>
<OCRFixReplaceList>
<WholeWords> <WholeWords>
<Word from="Haner" to="Han er" /> <Word from="Haner" to="Han er" />
<Word from="JaveL" to="Javel" /> <Word from="JaveL" to="Javel" />
@ -633,10 +634,10 @@
<WordPart from="Ã" to="Å" /> <WordPart from="Ã" to="Å" />
<WordPart from="í" to="i" /> <WordPart from="í" to="i" />
</PartialWords> </PartialWords>
<PartialLines /> <WholeLines />
<PartialLinesAlways /> <PartialLinesAlways />
<PartialLines />
<BeginLines /> <BeginLines />
<EndLines /> <EndLines />
<WholeLines />
<RegularExpressions /> <RegularExpressions />
</OCRFixReplaceList> </OCRFixReplaceList>

View File

@ -1,6 +1,7 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<!-- This list contains names/words with specific casing - and specific to German only --> <!-- This list contains names with specific casing - and specific to German only -->
<names> <names>
<blacklist />
<name>Abelard</name> <name>Abelard</name>
<name>Ada</name> <name>Ada</name>
<name>Adal</name> <name>Adal</name>

View File

@ -1,4 +1,5 @@
<OCRFixReplaceList> <?xml version="1.0" encoding="utf-8"?>
<OCRFixReplaceList>
<WholeWords> <WholeWords>
<Word from="/a" to="Ja" /> <Word from="/a" to="Ja" />
<Word from="/ch" to="Ich" /> <Word from="/ch" to="Ich" />
@ -7047,12 +7048,12 @@
<WordPart from="fi" to="fi" /> <WordPart from="fi" to="fi" />
<WordPart from="fl" to="fl" /> <WordPart from="fl" to="fl" />
</PartialWords> </PartialWords>
<WholeLines />
<PartialLinesAlways />
<PartialLines> <PartialLines>
<!-- Partial lines - but whole words only --> <!-- Partial lines - but whole words only -->
</PartialLines> </PartialLines>
<PartialLinesAlways />
<BeginLines /> <BeginLines />
<EndLines /> <EndLines />
<WholeLines />
<RegularExpressions /> <RegularExpressions />
</OCRFixReplaceList> </OCRFixReplaceList>

View File

@ -1,6 +1,11 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<!-- This list contains names/words with specific casing - and specific to English only --> <!-- This list contains names with specific casing - and specific to English only -->
<names> <names>
<blacklist>
<name>Bill</name>
<name>Black</name>
<name>Male</name>
</blacklist>
<name>Aarav</name> <name>Aarav</name>
<name>Abbey</name> <name>Abbey</name>
<name>Abbie</name> <name>Abbie</name>
@ -1570,9 +1575,4 @@
<name>Zion</name> <name>Zion</name>
<name>Zoie</name> <name>Zoie</name>
<name>Zuri</name> <name>Zuri</name>
<blacklist>
<name>Male</name>
<name>Bill</name>
<name>Black</name>
</blacklist>
</names> </names>

View File

@ -2694,6 +2694,26 @@
<WordPart from=")'" to="y" /> <WordPart from=")'" to="y" />
<WordPart from="a" to="d" /> <WordPart from="a" to="d" />
</PartialWords> </PartialWords>
<WholeLines>
<!-- Whole lines - including -" etc -->
<Line from="H ey." to="Hey." />
<Line from="He)-" to="Hey." />
<Line from="N0." to="No." />
<Line from="-N0." to="-No." />
<Line from="Noll" to="No!!" />
<Line from="(G ROANS)" to="(GROANS)" />
<Line from="[G ROANS]" to="[GROANS]" />
<Line from="(M EOWS)" to="(MEOWS)" />
<Line from="[M EOWS]" to="[MEOWS]" />
<Line from="Uaughs]" to="[laughs]" />
<Line from="[chitte rs]" to="[chitters]" />
<Line from="Hil it!" to="Hit it!" />
<Line from="&lt;i&gt;Hil it!&lt;/i&gt;" to="&lt;i&gt;Hit it!&lt;/i&gt;" />
<Line from="ISIGHS]" to="[SIGHS]" />
</WholeLines>
<PartialLinesAlways>
<LinePart from="forbest act" to="for best act" />
</PartialLinesAlways>
<PartialLines> <PartialLines>
<LinePart from=" /be " to=" I be " /> <LinePart from=" /be " to=" I be " />
<LinePart from=" aren '1'" to=" aren't" /> <LinePart from=" aren '1'" to=" aren't" />
@ -3014,9 +3034,6 @@
<LinePart from="you' re" to="you're" /> <LinePart from="you' re" to="you're" />
<LinePart from="You' ve " to="You've " /> <LinePart from="You' ve " to="You've " />
</PartialLines> </PartialLines>
<PartialLinesAlways>
<LinePart from="forbest act" to="for best act" />
</PartialLinesAlways>
<BeginLines> <BeginLines>
<Beginning from="lgot it" to="I got it" /> <Beginning from="lgot it" to="I got it" />
<Beginning from="Don,t " to="Don't " /> <Beginning from="Don,t " to="Don't " />
@ -3164,23 +3181,6 @@
<Ending from=" i..." to=" I..." /> <Ending from=" i..." to=" I..." />
<Ending from=" L." to=" I." /> <Ending from=" L." to=" I." />
</EndLines> </EndLines>
<WholeLines>
<!-- Whole lines - including -" etc -->
<Line from="H ey." to="Hey." />
<Line from="He)-" to="Hey." />
<Line from="N0." to="No." />
<Line from="-N0." to="-No." />
<Line from="Noll" to="No!!" />
<Line from="(G ROANS)" to="(GROANS)" />
<Line from="[G ROANS]" to="[GROANS]" />
<Line from="(M EOWS)" to="(MEOWS)" />
<Line from="[M EOWS]" to="[MEOWS]" />
<Line from="Uaughs]" to="[laughs]" />
<Line from="[chitte rs]" to="[chitters]" />
<Line from="Hil it!" to="Hit it!" />
<Line from="&lt;i&gt;Hil it!&lt;/i&gt;" to="&lt;i&gt;Hit it!&lt;/i&gt;" />
<Line from="ISIGHS]" to="[SIGHS]" />
</WholeLines>
<RegularExpressions> <RegularExpressions>
<RegEx find="([a-z]) Won't " replaceWith="$1 won't " /> <RegEx find="([a-z]) Won't " replaceWith="$1 won't " />
<RegEx find=" L([,\r\n :;!?]+)" replaceWith=" I$1" /> <RegEx find=" L([,\r\n :;!?]+)" replaceWith=" I$1" />

View File

@ -1,6 +1,7 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<!-- This list contains names/words with specific casing - and specific to Spanish only --> <!-- This list contains names with specific casing - and specific to Spanish only -->
<names> <names>
<blacklist />
<name>Aang</name> <name>Aang</name>
<name>Aarón</name> <name>Aarón</name>
<name>Abdulabri</name> <name>Abdulabri</name>
@ -1009,7 +1010,4 @@
<name>Zoroastro</name> <name>Zoroastro</name>
<name>Zuko</name> <name>Zuko</name>
<name>Zulú</name> <name>Zulú</name>
<blacklist>
<name></name>
</blacklist>
</names> </names>

View File

@ -1,6 +1,7 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<!-- This list contains names/words with specific casing - and specific to Finnish only --> <!-- This list contains names with specific casing - and specific to Finnish only -->
<names> <names>
<blacklist />
<name>AA</name> <name>AA</name>
<name>Abbie</name> <name>Abbie</name>
<name>Abbylla</name> <name>Abbylla</name>
@ -3072,7 +3073,4 @@
<name>Zoye</name> <name>Zoye</name>
<name>Zürichissa</name> <name>Zürichissa</name>
<name>Åkessonin</name> <name>Åkessonin</name>
<blacklist>
<name></name>
</blacklist>
</names> </names>

View File

@ -1,4 +1,5 @@
<OCRFixReplaceList> <?xml version="1.0" encoding="utf-8"?>
<OCRFixReplaceList>
<WholeWords> <WholeWords>
<Word from="kellojo" to="kello jo" /> <Word from="kellojo" to="kello jo" />
<Word from="onjo" to="on jo" /> <Word from="onjo" to="on jo" />
@ -987,10 +988,6 @@
</WholeWords> </WholeWords>
<PartialWordsAlways /> <PartialWordsAlways />
<PartialWords /> <PartialWords />
<PartialLines />
<PartialLinesAlways />
<BeginLines />
<EndLines />
<WholeLines> <WholeLines>
<Line from="Katsokaa pa." to="Katsokaapa." /> <Line from="Katsokaa pa." to="Katsokaapa." />
<Line from="Mik!&#xD;&#xA;&quot;&quot;e“9iräı" to="Mik!&#xD;&#xA;-Hengitä!" /> <Line from="Mik!&#xD;&#xA;&quot;&quot;e“9iräı" to="Mik!&#xD;&#xA;-Hengitä!" />
@ -1028,5 +1025,9 @@
<Line from="Haluan kertoa jotai n" to="Haluan kertoa jotain" /> <Line from="Haluan kertoa jotai n" to="Haluan kertoa jotain" />
<Line from="I-Ialuatte" to="Haluatte" /> <Line from="I-Ialuatte" to="Haluatte" />
</WholeLines> </WholeLines>
<PartialLinesAlways />
<PartialLines />
<BeginLines />
<EndLines />
<RegularExpressions /> <RegularExpressions />
</OCRFixReplaceList> </OCRFixReplaceList>

View File

@ -1,6 +1,7 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<!-- This list contains names/words with specific casing - and specific to French only --> <!-- This list contains names with specific casing - and specific to French only -->
<names> <names>
<blacklist />
<name>Abdon</name> <name>Abdon</name>
<name>Abdonie</name> <name>Abdonie</name>
<name>Abdonise</name> <name>Abdonise</name>
@ -808,7 +809,4 @@
<name>Zéphir</name> <name>Zéphir</name>
<name>Zéphirin</name> <name>Zéphirin</name>
<name>Zoé</name> <name>Zoé</name>
<blacklist>
<name></name>
</blacklist>
</names> </names>

View File

@ -1,4 +1,5 @@
<OCRFixReplaceList> <?xml version="1.0" encoding="utf-8"?>
<OCRFixReplaceList>
<WholeWords> <WholeWords>
<Word from="@immatriculation" to="d'immatriculation" /> <Word from="@immatriculation" to="d'immatriculation" />
<Word from="acquer" to="acquér" /> <Word from="acquer" to="acquér" />
@ -246,13 +247,6 @@
</WholeWords> </WholeWords>
<PartialWordsAlways /> <PartialWordsAlways />
<PartialWords /> <PartialWords />
<PartialLines>
<LinePart from=" I'" to=" l'" />
<LinePart from=" |'" to=" l'" />
</PartialLines>
<PartialLinesAlways />
<BeginLines />
<EndLines />
<WholeLines> <WholeLines>
<Line from="&quot;D'ac:c:ord.&quot;" to="&quot;D'accord.&quot;" /> <Line from="&quot;D'ac:c:ord.&quot;" to="&quot;D'accord.&quot;" />
<Line from="“i QUÎ gagne, qui perd," to="ni qui gagne, qui perd," /> <Line from="“i QUÎ gagne, qui perd," to="ni qui gagne, qui perd," />
@ -266,5 +260,12 @@
<Line from="Peter H u nt." to="Peter Hunt." /> <Line from="Peter H u nt." to="Peter Hunt." />
<Line from="&quot;C'est bien mieux dans Peau. &#xD;&#xA; &#xD;&#xA; On peut sfléclabousser, faire du bruit.&quot;" to="&quot;C'est bien mieux dans l'eau. &#xD;&#xA; &#xD;&#xA; On peut s'éclabousser, faire du bruit.&quot;" /> <Line from="&quot;C'est bien mieux dans Peau. &#xD;&#xA; &#xD;&#xA; On peut sfléclabousser, faire du bruit.&quot;" to="&quot;C'est bien mieux dans l'eau. &#xD;&#xA; &#xD;&#xA; On peut s'éclabousser, faire du bruit.&quot;" />
</WholeLines> </WholeLines>
<PartialLinesAlways />
<PartialLines>
<LinePart from=" I'" to=" l'" />
<LinePart from=" |'" to=" l'" />
</PartialLines>
<BeginLines />
<EndLines />
<RegularExpressions /> <RegularExpressions />
</OCRFixReplaceList> </OCRFixReplaceList>

View File

@ -1,4 +1,5 @@
<OCRFixReplaceList> <?xml version="1.0" encoding="utf-8"?>
<OCRFixReplaceList>
<WholeWords> <WholeWords>
<Word from="andele" to="anđele" /> <Word from="andele" to="anđele" />
<Word from="andeli" to="anđeli" /> <Word from="andeli" to="anđeli" />
@ -1218,6 +1219,8 @@
</WholeWords> </WholeWords>
<PartialWordsAlways /> <PartialWordsAlways />
<PartialWords /> <PartialWords />
<WholeLines />
<PartialLinesAlways />
<PartialLines> <PartialLines>
<LinePart from="da nadjem" to="naći" /> <LinePart from="da nadjem" to="naći" />
<LinePart from="da nadjes" to="naći" /> <LinePart from="da nadjes" to="naći" />
@ -1255,10 +1258,8 @@
<LinePart from="znas sto" to="znaš što" /> <LinePart from="znas sto" to="znaš što" />
<LinePart from="znaš sto" to="znaš što" /> <LinePart from="znaš sto" to="znaš što" />
</PartialLines> </PartialLines>
<PartialLinesAlways />
<BeginLines /> <BeginLines />
<EndLines /> <EndLines />
<WholeLines />
<RegularExpressions> <RegularExpressions>
<RegEx find="adas(?!v)" replaceWith="adaš" /> <RegEx find="adas(?!v)" replaceWith="adaš" />
<RegEx find="(?&lt;![Pp]r|[Nn])adje(?!(v|n(e|u[olt]))\b)" replaceWith="ađe" /> <RegEx find="(?&lt;![Pp]r|[Nn])adje(?!(v|n(e|u[olt]))\b)" replaceWith="ađe" />
@ -1895,7 +1896,7 @@
<RegEx find="(?&lt;![Kk]lj)učiće" replaceWith="učit će" /> <RegEx find="(?&lt;![Kk]lj)učiće" replaceWith="učit će" />
<RegEx find="udiva" replaceWith="uđiva" /> <RegEx find="udiva" replaceWith="uđiva" />
<RegEx find="udj([aiu])" replaceWith="uđ$1" /> <RegEx find="udj([aiu])" replaceWith="uđ$1" />
<!--ne može više zbog oportunisti i komunisti --> <!-- ne može više zbog oportunisti i komunisti -->
<RegEx find="\b([Uu])nisti" replaceWith="$1ništi" /> <RegEx find="\b([Uu])nisti" replaceWith="$1ništi" />
<RegEx find="nistav" replaceWith="ništav" /> <RegEx find="nistav" replaceWith="ništav" />
<RegEx find="ujuc" replaceWith="ujuć" /> <RegEx find="ujuc" replaceWith="ujuć" />
@ -1932,4 +1933,4 @@
<RegEx find="Zurb" replaceWith="Žurb" /> <RegEx find="Zurb" replaceWith="Žurb" />
<RegEx find="zvucen" replaceWith="zvučen" /> <RegEx find="zvucen" replaceWith="zvučen" />
</RegularExpressions> </RegularExpressions>
</OCRFixReplaceList> </OCRFixReplaceList>

View File

@ -1,4 +1,5 @@
<OCRFixReplaceList> <?xml version="1.0" encoding="utf-8"?>
<OCRFixReplaceList>
<WholeWords> <WholeWords>
<Word from="()d" to="Od" /> <Word from="()d" to="Od" />
<Word from="advokati" to="odvjetnici" /> <Word from="advokati" to="odvjetnici" />
@ -50,7 +51,7 @@
<Word from="bekstvo" to="bijeg" /> <Word from="bekstvo" to="bijeg" />
<Word from="bekstvu" to="bijegu" /> <Word from="bekstvu" to="bijegu" />
<Word from="begstvu" to="bijegu" /> <Word from="begstvu" to="bijegu" />
<!--<Word from="beo" to="bijel | bio" /> ne može jer su dvije verzije! --> <!-- <Word from="beo" to="bijel | bio" /> ne može jer su dvije verzije! -->
<Word from="bes" to="bijes" /> <Word from="bes" to="bijes" />
<Word from="besa" to="bijesa" /> <Word from="besa" to="bijesa" />
<Word from="besan" to="bijesan" /> <Word from="besan" to="bijesan" />
@ -2071,7 +2072,7 @@
<Word from="Kolu" to="Coleu" /> <Word from="Kolu" to="Coleu" />
<Word from="Kolins" to="Collins" /> <Word from="Kolins" to="Collins" />
<Word from="Koni" to="Connie" /> <Word from="Koni" to="Connie" />
<Word from="Konor" to="Connor"/> <Word from="Konor" to="Connor" />
<Word from="Kortni" to="Courtney" /> <Word from="Kortni" to="Courtney" />
<Word from="Krejg" to="Craig" /> <Word from="Krejg" to="Craig" />
<Word from="Krejga" to="Craiga" /> <Word from="Krejga" to="Craiga" />
@ -2228,6 +2229,8 @@
</WholeWords> </WholeWords>
<PartialWordsAlways /> <PartialWordsAlways />
<PartialWords /> <PartialWords />
<WholeLines />
<PartialLinesAlways />
<PartialLines> <PartialLines>
<LinePart from="Ako ej" to="Ako je" /> <LinePart from="Ako ej" to="Ako je" />
<LinePart from="ako ej" to="ako je" /> <LinePart from="ako ej" to="ako je" />
@ -2397,10 +2400,8 @@
<LinePart from="Želi da zna" to="Želi znati" /> <LinePart from="Želi da zna" to="Želi znati" />
<LinePart from="želi da zna" to="želi znati" /> <LinePart from="želi da zna" to="želi znati" />
</PartialLines> </PartialLines>
<PartialLinesAlways />
<BeginLines /> <BeginLines />
<EndLines /> <EndLines />
<WholeLines />
<RegularExpressions> <RegularExpressions>
<!-- deklinacije imenica i konjugacije glagola --> <!-- deklinacije imenica i konjugacije glagola -->
<RegEx find="([0-9])-ogodišnj" replaceWith="$1-godišnj" /> <RegEx find="([0-9])-ogodišnj" replaceWith="$1-godišnj" />
@ -3216,7 +3217,7 @@
<RegEx find="žur[ck]" replaceWith="zabav" /> <RegEx find="žur[ck]" replaceWith="zabav" />
<RegEx find="Žur[ck]" replaceWith="Zabav" /> <RegEx find="Žur[ck]" replaceWith="Zabav" />
<RegEx find="([sSnNpPzZ]r?[aoi]z[vn])ać" replaceWith="$1at ć" /> <RegEx find="([sSnNpPzZ]r?[aoi]z[vn])ać" replaceWith="$1at ć" />
<!-- mjeseci [\b mora biti nakon jun]--> <!-- mjeseci [\b mora biti nakon jun] -->
<RegEx find="([Ss])j?ečnj([au]|ima)" replaceWith="$1iječnj$2" /> <RegEx find="([Ss])j?ečnj([au]|ima)" replaceWith="$1iječnj$2" />
<RegEx find="\ba[uv]gust" replaceWith="kolovoz" /> <RegEx find="\ba[uv]gust" replaceWith="kolovoz" />
<RegEx find="septembr" replaceWith="rujn" /> <RegEx find="septembr" replaceWith="rujn" />
@ -3316,7 +3317,7 @@
<RegEx find="efiniši" replaceWith="efiniraj" /> <RegEx find="efiniši" replaceWith="efiniraj" />
<RegEx find="efinišu" replaceWith="efiniraju" /> <RegEx find="efinišu" replaceWith="efiniraju" />
<RegEx find="empton" replaceWith="ampton" /> <RegEx find="empton" replaceWith="ampton" />
<!-- bjegunac--> <!-- bjegunac -->
<RegEx find="(?&lt;!j)egun" replaceWith="jegun" /> <RegEx find="(?&lt;!j)egun" replaceWith="jegun" />
<RegEx find="(?&lt;!j)elokup" replaceWith="jelokup" /> <RegEx find="(?&lt;!j)elokup" replaceWith="jelokup" />
<RegEx find="enlj" replaceWith="enj" /> <RegEx find="enlj" replaceWith="enj" />
@ -3442,7 +3443,7 @@
<RegEx find="posel" replaceWith="posjel" /> <RegEx find="posel" replaceWith="posjel" />
<RegEx find="produkova" replaceWith="producira" /> <RegEx find="produkova" replaceWith="producira" />
<RegEx find="\bpominj" replaceWith="spominj" /> <RegEx find="\bpominj" replaceWith="spominj" />
<!-- ignoriše / koncentriše /operiše /toleriše /--> <!-- ignoriše / koncentriše /operiše /toleriše / -->
<RegEx find="([te]|ku|pi|no)riše" replaceWith="$1rira" /> <RegEx find="([te]|ku|pi|no)riše" replaceWith="$1rira" />
<RegEx find="(?&lt;![Pp]r|[Nn])adje(?!(v|n(e|u[olt]))\b)" replaceWith="ađe" /> <RegEx find="(?&lt;![Pp]r|[Nn])adje(?!(v|n(e|u[olt]))\b)" replaceWith="ađe" />
<RegEx find="par nedj?elja" replaceWith="par tjedana" /> <RegEx find="par nedj?elja" replaceWith="par tjedana" />

View File

@ -1,12 +1,13 @@
<OCRFixReplaceList> <?xml version="1.0" encoding="utf-8"?>
<OCRFixReplaceList>
<WholeWords /> <WholeWords />
<PartialWordsAlways /> <PartialWordsAlways />
<PartialWords /> <PartialWords />
<PartialLines /> <WholeLines />
<PartialLinesAlways /> <PartialLinesAlways />
<PartialLines />
<BeginLines /> <BeginLines />
<EndLines /> <EndLines />
<WholeLines />
<RegularExpressions> <RegularExpressions>
<!-- nagy I-l javítások --> <!-- nagy I-l javítások -->
<RegEx find="([\x41-\x5a\x61-\x7a\xc1-\xfc])II" replaceWith="$1ll" /> <RegEx find="([\x41-\x5a\x61-\x7a\xc1-\xfc])II" replaceWith="$1ll" />

View File

@ -5,6 +5,7 @@ This file is case sensitive.
This file is generated/updated by Multi Translator This file is generated/updated by Multi Translator
--> -->
<names> <names>
<blacklist />
<name>1A</name> <name>1A</name>
<name>2 Chainz</name> <name>2 Chainz</name>
<name>2 Pac</name> <name>2 Pac</name>
@ -2653,8 +2654,8 @@ This file is generated/updated by Multi Translator
<name>Iceland</name> <name>Iceland</name>
<name>Icelander</name> <name>Icelander</name>
<name>Icelandic</name> <name>Icelandic</name>
<name>I'd</name>
<name>ID</name> <name>ID</name>
<name>I'd</name>
<name>Idaho</name> <name>Idaho</name>
<name>Idris</name> <name>Idris</name>
<name>Idris Elba</name> <name>Idris Elba</name>

View File

@ -9,8 +9,8 @@
<word>fotograafje</word> <word>fotograafje</word>
<word>gemaar</word> <word>gemaar</word>
<word>gps</word> <word>gps</word>
<word>hielenlikkerij</word>
<word>hielenlikkerij</word> <word>hielenlikkerij</word>
<word>hielenlikkerij</word>
<word>hostessen</word> <word>hostessen</word>
<word>inbak</word> <word>inbak</word>
<word>insignificante</word> <word>insignificante</word>

View File

@ -1,4 +1,5 @@
<OCRFixReplaceList> <?xml version="1.0" encoding="utf-8"?>
<OCRFixReplaceList>
<WholeWords> <WholeWords>
<Word from="aandachtmag" to="aandacht mag" /> <Word from="aandachtmag" to="aandacht mag" />
<Word from="agrariers" to="agrariërs" /> <Word from="agrariers" to="agrariërs" />
@ -110,12 +111,16 @@
<Word from="zonderjou" to="zonder jou" /> <Word from="zonderjou" to="zonder jou" />
</WholeWords> </WholeWords>
<PartialWordsAlways /> <PartialWordsAlways />
<PartialWords /> <PartialWords>
<PartialLines /> <!-- Will be used to check words not in dictionary -->
<!-- If new word(s) exists in spelling dictionary, it is (they are) accepted -->
<WordPart from="ij" to="ij" />
</PartialWords>
<WholeLines />
<PartialLinesAlways /> <PartialLinesAlways />
<PartialLines />
<BeginLines /> <BeginLines />
<EndLines /> <EndLines />
<WholeLines />
<RegularExpressions> <RegularExpressions>
<RegEx find="\blk(?=\p{Ll}{2})" replaceWith="Ik" /> <RegEx find="\blk(?=\p{Ll}{2})" replaceWith="Ik" />
<RegEx find="\bln(?=\p{Ll}{2})" replaceWith="In" /> <RegEx find="\bln(?=\p{Ll}{2})" replaceWith="In" />

View File

@ -1,6 +1,22 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<!-- This list contains names/words with specific casing - and specific to Norwegian only --> <!-- This list contains names with specific casing - and specific to Norwegian only -->
<names> <names>
<blacklist>
<name>Ane</name>
<name>Ben</name>
<name>Bo</name>
<name>Dag</name>
<name>Fet</name>
<name>Fred</name>
<name>Gro</name>
<name>Hem</name>
<name>Jo</name>
<name>Per</name>
<name>Rune</name>
<name>Saga</name>
<name>Tom</name>
<name>Ål</name>
</blacklist>
<name>Aage</name> <name>Aage</name>
<name>Aagot</name> <name>Aagot</name>
<name>Aase</name> <name>Aase</name>
@ -2856,20 +2872,4 @@
<name>Åsne</name> <name>Åsne</name>
<name>Åsnes</name> <name>Åsnes</name>
<name>Åsta</name> <name>Åsta</name>
<blacklist>
<name>Ane</name>
<name>Ben</name>
<name>Bo</name>
<name>Dag</name>
<name>Fet</name>
<name>Fred</name>
<name>Gro</name>
<name>Hem</name>
<name>Jo</name>
<name>Per</name>
<name>Rune</name>
<name>Saga</name>
<name>Tom</name>
<name>Ål</name>
</blacklist>
</names> </names>

View File

@ -48,10 +48,10 @@
<WordPart from="Ã" to="Å" /> <WordPart from="Ã" to="Å" />
<WordPart from="í" to="i" /> <WordPart from="í" to="i" />
</PartialWords> </PartialWords>
<PartialLines /> <WholeLines />
<PartialLinesAlways /> <PartialLinesAlways />
<PartialLines />
<BeginLines /> <BeginLines />
<EndLines /> <EndLines />
<WholeLines />
<RegularExpressions /> <RegularExpressions />
</OCRFixReplaceList> </OCRFixReplaceList>

View File

@ -1,4 +1,5 @@
<OCRFixReplaceList> <?xml version="1.0" encoding="utf-8"?>
<OCRFixReplaceList>
<WholeWords /> <WholeWords />
<PartialWordsAlways /> <PartialWordsAlways />
<PartialWords> <PartialWords>
@ -34,10 +35,10 @@
<WordPart from="Ã" to="Å" /> <WordPart from="Ã" to="Å" />
<WordPart from="í" to="i" /> <WordPart from="í" to="i" />
</PartialWords> </PartialWords>
<PartialLines /> <WholeLines />
<PartialLinesAlways /> <PartialLinesAlways />
<PartialLines />
<BeginLines /> <BeginLines />
<EndLines /> <EndLines />
<WholeLines />
<RegularExpressions /> <RegularExpressions />
</OCRFixReplaceList> </OCRFixReplaceList>

View File

@ -1,4 +1,5 @@
<OCRFixReplaceList> <?xml version="1.0" encoding="utf-8"?>
<OCRFixReplaceList>
<WholeWords> <WholeWords>
<Word from="abitual" to="habitual" /> <Word from="abitual" to="habitual" />
<Word from="àcerca" to="acerca" /> <Word from="àcerca" to="acerca" />
@ -445,6 +446,8 @@
</WholeWords> </WholeWords>
<PartialWordsAlways /> <PartialWordsAlways />
<PartialWords /> <PartialWords />
<WholeLines />
<PartialLinesAlways />
<PartialLines> <PartialLines>
<LinePart from="IN 6-E" to="N 6 E" /> <LinePart from="IN 6-E" to="N 6 E" />
<LinePart from="in tegrar-se" to="integrar-se" /> <LinePart from="in tegrar-se" to="integrar-se" />
@ -474,10 +477,8 @@
<LinePart from="R egião" to="Região" /> <LinePart from="R egião" to="Região" />
<LinePart from="unsuficien temente" to="insuficientemente" /> <LinePart from="unsuficien temente" to="insuficientemente" />
</PartialLines> </PartialLines>
<PartialLinesAlways />
<BeginLines /> <BeginLines />
<EndLines /> <EndLines />
<WholeLines />
<RegularExpressions> <RegularExpressions>
<!-- <RegEx find="\bi\b" replaceWith="I" /> just an example - do not use this regex --> <!-- <RegEx find="\bi\b" replaceWith="I" /> just an example - do not use this regex -->
<RegEx find="([0-9]) +º" replaceWith="$1º" /> <RegEx find="([0-9]) +º" replaceWith="$1º" />

View File

@ -1,6 +1,7 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<!-- This list contains names/words with specific casing - and specific to Portuguese only --> <!-- This list contains names with specific casing - and specific to Portuguese only -->
<names> <names>
<blacklist />
<name>Aarão</name> <name>Aarão</name>
<name>Abdénago</name> <name>Abdénago</name>
<name>Abedenego</name> <name>Abedenego</name>

View File

@ -1,6 +1,7 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<!-- This list contains names/words with specific casing - and specific to Russian only --> <!-- This list contains names with specific casing - and specific to Russian only -->
<names> <names>
<blacklist />
<name>Абакум</name> <name>Абакум</name>
<name>Абакумович</name> <name>Абакумович</name>
<name>Абакумовна</name> <name>Абакумовна</name>
@ -2891,7 +2892,4 @@
<name>Ярославна</name> <name>Ярославна</name>
<name>Ярославович</name> <name>Ярославович</name>
<name>Ярославовна</name> <name>Ярославовна</name>
<blacklist>
<name></name>
</blacklist>
</names> </names>

View File

@ -1,4 +1,5 @@
<OCRFixReplaceList> <?xml version="1.0" encoding="utf-8"?>
<OCRFixReplaceList>
<WholeWords> <WholeWords>
<Word from="НЄЙ" to="НЕЙ" /> <Word from="НЄЙ" to="НЕЙ" />
<Word from="ОРГЗНИЗМОБ" to="ОРГАНИЗМА" /> <Word from="ОРГЗНИЗМОБ" to="ОРГАНИЗМА" />
@ -248,10 +249,10 @@
<WordPart from="ШЗ" to="ША" /> <WordPart from="ШЗ" to="ША" />
<WordPart from="І\/І" to="М" /> <WordPart from="І\/І" to="М" />
</PartialWords> </PartialWords>
<PartialLines /> <WholeLines />
<PartialLinesAlways /> <PartialLinesAlways />
<PartialLines />
<BeginLines /> <BeginLines />
<EndLines /> <EndLines />
<WholeLines />
<RegularExpressions /> <RegularExpressions />
</OCRFixReplaceList> </OCRFixReplaceList>

View File

@ -1,4 +1,5 @@
<OCRFixReplaceList> <?xml version="1.0" encoding="utf-8"?>
<OCRFixReplaceList>
<WholeWords> <WholeWords>
<!-- Abreviaturas simples --> <!-- Abreviaturas simples -->
<Word from="KBs" to="kB" /> <Word from="KBs" to="kB" />
@ -368,6 +369,11 @@
</WholeWords> </WholeWords>
<PartialWordsAlways /> <PartialWordsAlways />
<PartialWords /> <PartialWords />
<WholeLines>
<!-- Todas las líneas -->
<Line from="No" to="No." />
</WholeLines>
<PartialLinesAlways />
<PartialLines> <PartialLines>
<!-- Varios --> <!-- Varios -->
<LinePart from="de gratis" to="gratis" /> <LinePart from="de gratis" to="gratis" />
@ -710,15 +716,10 @@
<LinePart from="misterl" to="misteri" /> <LinePart from="misterl" to="misteri" />
<LinePart from="vivencl" to="vivenci" /> <LinePart from="vivencl" to="vivenci" />
</PartialLines> </PartialLines>
<PartialLinesAlways />
<BeginLines /> <BeginLines />
<EndLines> <EndLines>
<Ending from=".»." to="»." /> <Ending from=".»." to="»." />
</EndLines> </EndLines>
<WholeLines>
<!-- Todas las líneas -->
<Line from="No" to="No." />
</WholeLines>
<RegularExpressions> <RegularExpressions>
<!-- Abreviaturas compuestas --> <!-- Abreviaturas compuestas -->
<RegEx find="\b[Ss](r|ra|rta)\b\.?" replaceWith="S$1." /> <RegEx find="\b[Ss](r|ra|rta)\b\.?" replaceWith="S$1." />

View File

@ -1,4 +1,5 @@
<!-- Credit goes to: MilanRS [http://www.prijevodi-online.org] --> <?xml version="1.0" encoding="utf-8"?>
<!-- Credit goes to: MilanRS [http://www.prijevodi-online.org] -->
<OCRFixReplaceList> <OCRFixReplaceList>
<WholeWords> <WholeWords>
<Word from="ču" to="ću" /> <Word from="ču" to="ću" />
@ -59,6 +60,8 @@
<WordPart from="IVl" to="M" /> <WordPart from="IVl" to="M" />
<WordPart from="lVl" to="M" /> <WordPart from="lVl" to="M" />
</PartialWords> </PartialWords>
<WholeLines />
<PartialLinesAlways />
<PartialLines> <PartialLines>
<LinePart from="bi smo" to="bismo" /> <LinePart from="bi smo" to="bismo" />
<LinePart from="dali je" to="da li je" /> <LinePart from="dali je" to="da li je" />
@ -97,10 +100,8 @@
<LinePart from="Svo vrijeme" to="Sve vrijeme" /> <LinePart from="Svo vrijeme" to="Sve vrijeme" />
<LinePart from="Cijelo vrijeme" to="Sve vrijeme" /> <LinePart from="Cijelo vrijeme" to="Sve vrijeme" />
</PartialLines> </PartialLines>
<PartialLinesAlways />
<BeginLines /> <BeginLines />
<EndLines /> <EndLines />
<WholeLines />
<RegularExpressions> <RegularExpressions>
<RegEx find="ÄŤ" replaceWith="č" /> <RegEx find="ÄŤ" replaceWith="č" />
<RegEx find="Ä" replaceWith="č" /> <RegEx find="Ä" replaceWith="č" />

View File

@ -440,13 +440,13 @@
<WordPart from="ejag" to="e jag" /> <WordPart from="ejag" to="e jag" />
<WordPart from="ärp" to="är p" /> <WordPart from="ärp" to="är p" />
</PartialWords> </PartialWords>
<PartialLines /> <WholeLines />
<PartialLinesAlways /> <PartialLinesAlways />
<PartialLines />
<BeginLines> <BeginLines>
<Beginning from="Ln " to="In " /> <Beginning from="Ln " to="In " />
<Beginning from="U ppfattat" to="Uppfattat" /> <Beginning from="U ppfattat" to="Uppfattat" />
</BeginLines> </BeginLines>
<EndLines /> <EndLines />
<WholeLines />
<RegularExpressions /> <RegularExpressions />
</OCRFixReplaceList> </OCRFixReplaceList>