dictionaries: automated XML upkeep

2024-11-21 18:52:36 +01:00 · 2019-05-26 03:23:51 +02:00 · 2019-05-26 03:23:51 +02:00 · 0469c7f59f
commit 0469c7f59f
parent dcc28ab676
27 changed files with 140 additions and 128 deletions
--- a/Dictionaries/da_names.xml
+++ b/Dictionaries/da_names.xml
@ -1,6 +1,9 @@
 <?xml version="1.0" encoding="utf-8"?>
-<!-- This list contains names/words with specific casing - and specific to Danish only -->
+<!-- This list contains names with specific casing - and specific to Danish only -->
 <names>
+  <blacklist>
+    <name>Rabat</name>
+  </blacklist>
  <name>Aquarium</name>
  <name>Armfeldt</name>
  <name>Asgård</name>
@ -85,7 +88,4 @@
  <name>Wharton</name>
  <name>Wylie</name>
  <name>Yardley</name>
-  <blacklist>
-    <name>Rabat</name>
-  </blacklist>
 </names>
--- a/Dictionaries/dan_OCRFixReplaceList.xml
+++ b/Dictionaries/dan_OCRFixReplaceList.xml
@ -1,4 +1,5 @@
-<OCRFixReplaceList>
+<?xml version="1.0" encoding="utf-8"?>
+<OCRFixReplaceList>
  <WholeWords>
    <Word from="Haner" to="Han er" />
    <Word from="JaveL" to="Javel" />
@ -633,10 +634,10 @@
    <WordPart from="Ã" to="Å" />
    <WordPart from="í" to="i" />
  </PartialWords>
-  <PartialLines />
+  <WholeLines />
  <PartialLinesAlways />
+  <PartialLines />
  <BeginLines />
  <EndLines />
-  <WholeLines />
  <RegularExpressions />
 </OCRFixReplaceList>
--- a/Dictionaries/de_names.xml
+++ b/Dictionaries/de_names.xml
@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
-<!-- This list contains names/words with specific casing - and specific to German only -->
+<!-- This list contains names with specific casing - and specific to German only -->
 <names>
+  <blacklist />
  <name>Abelard</name>
  <name>Ada</name>
  <name>Adal</name>
--- a/Dictionaries/deu_OCRFixReplaceList.xml
+++ b/Dictionaries/deu_OCRFixReplaceList.xml
@ -1,4 +1,5 @@
-<OCRFixReplaceList>
+<?xml version="1.0" encoding="utf-8"?>
+<OCRFixReplaceList>
  <WholeWords>
    <Word from="/a" to="Ja" />
    <Word from="/ch" to="Ich" />
@ -7047,12 +7048,12 @@
    <WordPart from="ﬁ" to="fi" />
    <WordPart from="ﬂ" to="fl" />
  </PartialWords>
+  <WholeLines />
+  <PartialLinesAlways />
  <PartialLines>
    <!-- Partial lines - but whole words only -->
  </PartialLines>
-  <PartialLinesAlways />
  <BeginLines />
  <EndLines />
-  <WholeLines />
  <RegularExpressions />
 </OCRFixReplaceList>
--- a/Dictionaries/en_names.xml
+++ b/Dictionaries/en_names.xml
@ -1,6 +1,11 @@
 <?xml version="1.0" encoding="utf-8"?>
-<!-- This list contains names/words with specific casing - and specific to English only -->
+<!-- This list contains names with specific casing - and specific to English only -->
 <names>
+  <blacklist>
+    <name>Bill</name>
+    <name>Black</name>
+    <name>Male</name>
+  </blacklist>
  <name>Aarav</name>
  <name>Abbey</name>
  <name>Abbie</name>
@ -1570,9 +1575,4 @@
  <name>Zion</name>
  <name>Zoie</name>
  <name>Zuri</name>
-  <blacklist>
-    <name>Male</name>
-    <name>Bill</name>
-    <name>Black</name>
-  </blacklist>
 </names>
--- a/Dictionaries/eng_OCRFixReplaceList.xml
+++ b/Dictionaries/eng_OCRFixReplaceList.xml
@ -2694,6 +2694,26 @@
    <WordPart from=")'" to="y" />
    <WordPart from="a’" to="d" />
  </PartialWords>
+  <WholeLines>
+    <!-- Whole lines - including -" etc -->
+    <Line from="H ey." to="Hey." />
+    <Line from="He)’-" to="Hey." />
+    <Line from="N0." to="No." />
+    <Line from="-N0." to="-No." />
+    <Line from="Noll" to="No!!" />
+    <Line from="(G ROANS)" to="(GROANS)" />
+    <Line from="[G ROANS]" to="[GROANS]" />
+    <Line from="(M EOWS)" to="(MEOWS)" />
+    <Line from="[M EOWS]" to="[MEOWS]" />
+    <Line from="Uaughs]" to="[laughs]" />
+    <Line from="[chitte rs]" to="[chitters]" />
+    <Line from="Hil‘ it!" to="Hit it!" />
+    <Line from="&lt;i&gt;Hil‘ it!&lt;/i&gt;" to="&lt;i&gt;Hit it!&lt;/i&gt;" />
+    <Line from="ISIGHS]" to="[SIGHS]" />
+  </WholeLines>
+  <PartialLinesAlways>
+    <LinePart from="forbest act" to="for best act" />
+  </PartialLinesAlways>
  <PartialLines>
    <LinePart from=" /be " to=" I be " />
    <LinePart from=" aren '1'" to=" aren't" />
@ -3014,9 +3034,6 @@
    <LinePart from="you' re" to="you're" />
    <LinePart from="You' ve " to="You've " />
  </PartialLines>
-  <PartialLinesAlways>
-    <LinePart from="forbest act" to="for best act" />
-  </PartialLinesAlways>
  <BeginLines>
    <Beginning from="lgot it" to="I got it" />
    <Beginning from="Don,t " to="Don't " />
@ -3164,23 +3181,6 @@
    <Ending from=" i..." to=" I..." />
    <Ending from=" L." to=" I." />
  </EndLines>
-  <WholeLines>
-    <!-- Whole lines - including -" etc -->
-    <Line from="H ey." to="Hey." />
-    <Line from="He)’-" to="Hey." />
-    <Line from="N0." to="No." />
-    <Line from="-N0." to="-No." />
-    <Line from="Noll" to="No!!" />
-    <Line from="(G ROANS)" to="(GROANS)" />
-    <Line from="[G ROANS]" to="[GROANS]" />
-    <Line from="(M EOWS)" to="(MEOWS)" />
-    <Line from="[M EOWS]" to="[MEOWS]" />
-    <Line from="Uaughs]" to="[laughs]" />
-    <Line from="[chitte rs]" to="[chitters]" />
-    <Line from="Hil‘ it!" to="Hit it!" />
-    <Line from="&lt;i&gt;Hil‘ it!&lt;/i&gt;" to="&lt;i&gt;Hit it!&lt;/i&gt;" />
-    <Line from="ISIGHS]" to="[SIGHS]" />
-  </WholeLines>
  <RegularExpressions>
    <RegEx find="([a-z]) Won't " replaceWith="$1 won't " />
    <RegEx find=" L([,\r\n :;!?]+)" replaceWith=" I$1" />
--- a/Dictionaries/es_names.xml
+++ b/Dictionaries/es_names.xml
@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
-<!-- This list contains names/words with specific casing - and specific to Spanish only -->
+<!-- This list contains names with specific casing - and specific to Spanish only -->
 <names>
+  <blacklist />
  <name>Aang</name>
  <name>Aarón</name>
  <name>Abdulabri</name>
@ -1009,7 +1010,4 @@
  <name>Zoroastro</name>
  <name>Zuko</name>
  <name>Zulú</name>
-  <blacklist>
-    <name></name>
-  </blacklist>
 </names>
--- a/Dictionaries/fi_names.xml
+++ b/Dictionaries/fi_names.xml
@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
-<!-- This list contains names/words with specific casing - and specific to Finnish only -->
+<!-- This list contains names with specific casing - and specific to Finnish only -->
 <names>
+  <blacklist />
  <name>AA</name>
  <name>Abbie</name>
  <name>Abbylla</name>
@ -3072,7 +3073,4 @@
  <name>Zoye</name>
  <name>Zürichissa</name>
  <name>Åkessonin</name>
-  <blacklist>
-    <name></name>
-  </blacklist>
 </names>
--- a/Dictionaries/fin_OCRFixReplaceList.xml
+++ b/Dictionaries/fin_OCRFixReplaceList.xml
@ -1,4 +1,5 @@
-<OCRFixReplaceList>
+<?xml version="1.0" encoding="utf-8"?>
+<OCRFixReplaceList>
  <WholeWords>
    <Word from="kellojo" to="kello jo" />
    <Word from="onjo" to="on jo" />
@ -987,10 +988,6 @@
  </WholeWords>
  <PartialWordsAlways />
  <PartialWords />
-  <PartialLines />
-  <PartialLinesAlways />
-  <BeginLines />
-  <EndLines />
  <WholeLines>
    <Line from="Katsokaa pa." to="Katsokaapa." />
    <Line from="Mik!&#xD;&#xA;&quot;&quot;e“9iräı" to="Mik!&#xD;&#xA;-Hengitä!" />
@ -1028,5 +1025,9 @@
    <Line from="Haluan kertoa jotai n" to="Haluan kertoa jotain" />
    <Line from="I-Ialuatte" to="Haluatte" />
  </WholeLines>
+  <PartialLinesAlways />
+  <PartialLines />
+  <BeginLines />
+  <EndLines />
  <RegularExpressions />
 </OCRFixReplaceList>
--- a/Dictionaries/fr_names.xml
+++ b/Dictionaries/fr_names.xml
@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
-<!-- This list contains names/words with specific casing - and specific to French only -->
+<!-- This list contains names with specific casing - and specific to French only -->
 <names>
+  <blacklist />
  <name>Abdon</name>
  <name>Abdonie</name>
  <name>Abdonise</name>
@ -808,7 +809,4 @@
  <name>Zéphir</name>
  <name>Zéphirin</name>
  <name>Zoé</name>
-  <blacklist>
-    <name></name>
-  </blacklist>
 </names>
--- a/Dictionaries/fra_OCRFixReplaceList.xml
+++ b/Dictionaries/fra_OCRFixReplaceList.xml
@ -1,4 +1,5 @@
-<OCRFixReplaceList>
+<?xml version="1.0" encoding="utf-8"?>
+<OCRFixReplaceList>
  <WholeWords>
    <Word from="@immatriculation" to="d'immatriculation" />
    <Word from="acquer" to="acquér" />
@ -246,13 +247,6 @@
  </WholeWords>
  <PartialWordsAlways />
  <PartialWords />
-  <PartialLines>
-    <LinePart from=" I'" to=" l'" />
-    <LinePart from=" |'" to=" l'" />
-  </PartialLines>
-  <PartialLinesAlways />
-  <BeginLines />
-  <EndLines />
  <WholeLines>
    <Line from="&quot;D'ac:c:ord.&quot;" to="&quot;D'accord.&quot;" />
    <Line from="“i QUÎ gagne, qui perd," to="ni qui gagne, qui perd," />
@ -266,5 +260,12 @@
    <Line from="Peter H u nt." to="Peter Hunt." />
    <Line from="&quot;C'est bien mieux dans Peau. &#xD;&#xA; &#xD;&#xA; On peut sﬂéclabousser, faire du bruit.&quot;" to="&quot;C'est bien mieux dans l'eau. &#xD;&#xA; &#xD;&#xA; On peut s'éclabousser, faire du bruit.&quot;" />
  </WholeLines>
+  <PartialLinesAlways />
+  <PartialLines>
+    <LinePart from=" I'" to=" l'" />
+    <LinePart from=" |'" to=" l'" />
+  </PartialLines>
+  <BeginLines />
+  <EndLines />
  <RegularExpressions />
 </OCRFixReplaceList>
--- a/Dictionaries/hrb_OCRFixReplaceList.xml
+++ b/Dictionaries/hrb_OCRFixReplaceList.xml
@ -1,4 +1,5 @@
-<OCRFixReplaceList>
+<?xml version="1.0" encoding="utf-8"?>
+<OCRFixReplaceList>
  <WholeWords>
    <Word from="andele" to="anđele" />
    <Word from="andeli" to="anđeli" />
@ -1218,6 +1219,8 @@
  </WholeWords>
  <PartialWordsAlways />
  <PartialWords />
+  <WholeLines />
+  <PartialLinesAlways />
  <PartialLines>
    <LinePart from="da nadjem" to="naći" />
    <LinePart from="da nadjes" to="naći" />
@ -1255,10 +1258,8 @@
    <LinePart from="znas sto" to="znaš što" />
    <LinePart from="znaš sto" to="znaš što" />
  </PartialLines>
-  <PartialLinesAlways />
  <BeginLines />
  <EndLines />
-  <WholeLines />
  <RegularExpressions>
    <RegEx find="adas(?!v)" replaceWith="adaš" />
    <RegEx find="(?&lt;![Pp]r|[Nn])adje(?!(v|n(e|u[olt]))\b)" replaceWith="ađe" />
@ -1895,7 +1896,7 @@
    <RegEx find="(?&lt;![Kk]lj)učiće" replaceWith="učit će" />
    <RegEx find="udiva" replaceWith="uđiva" />
    <RegEx find="udj([aiu])" replaceWith="uđ$1" />
-    <!--ne može više zbog oportunisti i komunisti -->
+    <!-- ne može više zbog oportunisti i komunisti -->
    <RegEx find="\b([Uu])nisti" replaceWith="$1ništi" />
    <RegEx find="nistav" replaceWith="ništav" />
    <RegEx find="ujuc" replaceWith="ujuć" />
@ -1932,4 +1933,4 @@
    <RegEx find="Zurb" replaceWith="Žurb" />
    <RegEx find="zvucen" replaceWith="zvučen" />
  </RegularExpressions>
-</OCRFixReplaceList>
+</OCRFixReplaceList>
--- a/Dictionaries/hrv_OCRFixReplaceList.xml
+++ b/Dictionaries/hrv_OCRFixReplaceList.xml
@ -1,4 +1,5 @@
-<OCRFixReplaceList>
+<?xml version="1.0" encoding="utf-8"?>
+<OCRFixReplaceList>
  <WholeWords>
    <Word from="()d" to="Od" />
    <Word from="advokati" to="odvjetnici" />
@ -50,7 +51,7 @@
    <Word from="bekstvo" to="bijeg" />
    <Word from="bekstvu" to="bijegu" />
    <Word from="begstvu" to="bijegu" />
-    <!--<Word from="beo" to="bijel | bio" /> ne može jer su dvije verzije! -->
+    <!-- <Word from="beo" to="bijel | bio" /> ne može jer su dvije verzije! -->
    <Word from="bes" to="bijes" />
    <Word from="besa" to="bijesa" />
    <Word from="besan" to="bijesan" />
@ -2071,7 +2072,7 @@
    <Word from="Kolu" to="Coleu" />
    <Word from="Kolins" to="Collins" />
    <Word from="Koni" to="Connie" />
-    <Word from="Konor" to="Connor"/>
+    <Word from="Konor" to="Connor" />
    <Word from="Kortni" to="Courtney" />
    <Word from="Krejg" to="Craig" />
    <Word from="Krejga" to="Craiga" />
@ -2228,6 +2229,8 @@
  </WholeWords>
  <PartialWordsAlways />
  <PartialWords />
+  <WholeLines />
+  <PartialLinesAlways />
  <PartialLines>
    <LinePart from="Ako ej" to="Ako je" />
    <LinePart from="ako ej" to="ako je" />
@ -2397,10 +2400,8 @@
    <LinePart from="Želi da zna" to="Želi znati" />
    <LinePart from="želi da zna" to="želi znati" />
  </PartialLines>
-  <PartialLinesAlways />
  <BeginLines />
  <EndLines />
-  <WholeLines />
  <RegularExpressions>
    <!-- deklinacije imenica i konjugacije glagola -->
    <RegEx find="([0-9])-ogodišnj" replaceWith="$1-godišnj" />
@ -3216,7 +3217,7 @@
    <RegEx find="žur[ck]" replaceWith="zabav" />
    <RegEx find="Žur[ck]" replaceWith="Zabav" />
    <RegEx find="([sSnNpPzZ]r?[aoi]z[vn])ać" replaceWith="$1at ć" />
-    <!-- mjeseci [\b mora biti nakon jun]-->
+    <!-- mjeseci [\b mora biti nakon jun] -->
    <RegEx find="([Ss])j?ečnj([au]|ima)" replaceWith="$1iječnj$2" />
    <RegEx find="\ba[uv]gust" replaceWith="kolovoz" />
    <RegEx find="septembr" replaceWith="rujn" />
@ -3316,7 +3317,7 @@
    <RegEx find="efiniši" replaceWith="efiniraj" />
    <RegEx find="efinišu" replaceWith="efiniraju" />
    <RegEx find="empton" replaceWith="ampton" />
-    <!-- bjegunac-->
+    <!-- bjegunac -->
    <RegEx find="(?&lt;!j)egun" replaceWith="jegun" />
    <RegEx find="(?&lt;!j)elokup" replaceWith="jelokup" />
    <RegEx find="enlj" replaceWith="enj" />
@ -3442,7 +3443,7 @@
    <RegEx find="posel" replaceWith="posjel" />
    <RegEx find="produkova" replaceWith="producira" />
    <RegEx find="\bpominj" replaceWith="spominj" />
-    <!-- ignoriše / koncentriše /operiše /toleriše /-->
+    <!-- ignoriše / koncentriše /operiše /toleriše / -->
    <RegEx find="([te]|ku|pi|no)riše" replaceWith="$1rira" />
    <RegEx find="(?&lt;![Pp]r|[Nn])adje(?!(v|n(e|u[olt]))\b)" replaceWith="ađe" />
    <RegEx find="par nedj?elja" replaceWith="par tjedana" />
--- a/Dictionaries/hun_OCRFixReplaceList.xml
+++ b/Dictionaries/hun_OCRFixReplaceList.xml
@ -1,12 +1,13 @@
-<OCRFixReplaceList>
+<?xml version="1.0" encoding="utf-8"?>
+<OCRFixReplaceList>
  <WholeWords />
  <PartialWordsAlways />
  <PartialWords />
-  <PartialLines />
+  <WholeLines />
  <PartialLinesAlways />
+  <PartialLines />
  <BeginLines />
  <EndLines />
-  <WholeLines />
  <RegularExpressions>
    <!-- nagy I-l javítások -->
    <RegEx find="([\x41-\x5a\x61-\x7a\xc1-\xfc])II" replaceWith="$1ll" />
--- a/Dictionaries/names.xml
+++ b/Dictionaries/names.xml
@ -5,6 +5,7 @@ This file is case sensitive.
 This file is generated/updated by Multi Translator
 -->
 <names>
+  <blacklist />
  <name>1A</name>
  <name>2 Chainz</name>
  <name>2 Pac</name>
@ -2653,8 +2654,8 @@ This file is generated/updated by Multi Translator
  <name>Iceland</name>
  <name>Icelander</name>
  <name>Icelandic</name>
-  <name>I'd</name>
  <name>ID</name>
+  <name>I'd</name>
  <name>Idaho</name>
  <name>Idris</name>
  <name>Idris Elba</name>
--- a/Dictionaries/nl_NL_user.xml
+++ b/Dictionaries/nl_NL_user.xml
@ -9,8 +9,8 @@
  <word>fotograafje</word>
  <word>gemaar</word>
  <word>gps</word>
-  <word>hielenlikkerĳ</word>
  <word>hielenlikkerij</word>
+  <word>hielenlikkerĳ</word>
  <word>hostessen</word>
  <word>inbak</word>
  <word>insignificante</word>
--- a/Dictionaries/nld_OCRFixReplaceList.xml
+++ b/Dictionaries/nld_OCRFixReplaceList.xml
@ -1,4 +1,5 @@
-<OCRFixReplaceList>
+<?xml version="1.0" encoding="utf-8"?>
+<OCRFixReplaceList>
  <WholeWords>
    <Word from="aandachtmag" to="aandacht mag" />
    <Word from="agrariers" to="agrariërs" />
@ -110,12 +111,16 @@
    <Word from="zonderjou" to="zonder jou" />
  </WholeWords>
  <PartialWordsAlways />
-  <PartialWords />
-  <PartialLines />
+  <PartialWords>
+    <!-- Will be used to check words not in dictionary -->
+    <!-- If new word(s) exists in spelling dictionary, it is (they are) accepted -->
+    <WordPart from="ĳ" to="ij" />
+  </PartialWords>
+  <WholeLines />
  <PartialLinesAlways />
+  <PartialLines />
  <BeginLines />
  <EndLines />
-  <WholeLines />
  <RegularExpressions>
    <RegEx find="\blk(?=\p{Ll}{2})" replaceWith="Ik" />
    <RegEx find="\bln(?=\p{Ll}{2})" replaceWith="In" />
--- a/Dictionaries/no_names.xml
+++ b/Dictionaries/no_names.xml
@ -1,6 +1,22 @@
 <?xml version="1.0" encoding="utf-8"?>
-<!-- This list contains names/words with specific casing - and specific to Norwegian only -->
+<!-- This list contains names with specific casing - and specific to Norwegian only -->
 <names>
+  <blacklist>
+    <name>Ane</name>
+    <name>Ben</name>
+    <name>Bo</name>
+    <name>Dag</name>
+    <name>Fet</name>
+    <name>Fred</name>
+    <name>Gro</name>
+    <name>Hem</name>
+    <name>Jo</name>
+    <name>Per</name>
+    <name>Rune</name>
+    <name>Saga</name>
+    <name>Tom</name>
+    <name>Ål</name>
+  </blacklist>
  <name>Aage</name>
  <name>Aagot</name>
  <name>Aase</name>
@ -2856,20 +2872,4 @@
  <name>Åsne</name>
  <name>Åsnes</name>
  <name>Åsta</name>
-  <blacklist>
-    <name>Ane</name>
-    <name>Ben</name>
-    <name>Bo</name>
-    <name>Dag</name>
-    <name>Fet</name>
-    <name>Fred</name>
-    <name>Gro</name>
-    <name>Hem</name>
-    <name>Jo</name>
-    <name>Per</name>
-    <name>Rune</name>
-    <name>Saga</name>
-    <name>Tom</name>
-    <name>Ål</name>
-  </blacklist>
 </names>
--- a/Dictionaries/nob_OCRFixReplaceList.xml
+++ b/Dictionaries/nob_OCRFixReplaceList.xml
@ -48,10 +48,10 @@
    <WordPart from="Ã" to="Å" />
    <WordPart from="í" to="i" />
  </PartialWords>
-  <PartialLines />
+  <WholeLines />
  <PartialLinesAlways />
+  <PartialLines />
  <BeginLines />
  <EndLines />
-  <WholeLines />
  <RegularExpressions />
 </OCRFixReplaceList>
--- a/Dictionaries/nor_OCRFixReplaceList.xml
+++ b/Dictionaries/nor_OCRFixReplaceList.xml
@ -1,4 +1,5 @@
-<OCRFixReplaceList>
+<?xml version="1.0" encoding="utf-8"?>
+<OCRFixReplaceList>
  <WholeWords />
  <PartialWordsAlways />
  <PartialWords>
@ -34,10 +35,10 @@
    <WordPart from="Ã" to="Å" />
    <WordPart from="í" to="i" />
  </PartialWords>
-  <PartialLines />
+  <WholeLines />
  <PartialLinesAlways />
+  <PartialLines />
  <BeginLines />
  <EndLines />
-  <WholeLines />
  <RegularExpressions />
 </OCRFixReplaceList>
--- a/Dictionaries/por_OCRFixReplaceList.xml
+++ b/Dictionaries/por_OCRFixReplaceList.xml
@ -1,4 +1,5 @@
-<OCRFixReplaceList>
+<?xml version="1.0" encoding="utf-8"?>
+<OCRFixReplaceList>
  <WholeWords>
    <Word from="abitual" to="habitual" />
    <Word from="àcerca" to="acerca" />
@ -445,6 +446,8 @@
  </WholeWords>
  <PartialWordsAlways />
  <PartialWords />
+  <WholeLines />
+  <PartialLinesAlways />
  <PartialLines>
    <LinePart from="IN 6-E" to="N 6 E" />
    <LinePart from="in tegrar-se" to="integrar-se" />
@ -474,10 +477,8 @@
    <LinePart from="R egião" to="Região" />
    <LinePart from="unsuficien temente" to="insuficientemente" />
  </PartialLines>
-  <PartialLinesAlways />
  <BeginLines />
  <EndLines />
-  <WholeLines />
  <RegularExpressions>
    <!-- <RegEx find="\bi\b" replaceWith="I" /> just an example - do not use this regex -->
    <RegEx find="([0-9]) +º" replaceWith="$1º" />
--- a/Dictionaries/pt_names.xml
+++ b/Dictionaries/pt_names.xml
@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
-<!-- This list contains names/words with specific casing - and specific to Portuguese only -->
+<!-- This list contains names with specific casing - and specific to Portuguese only -->
 <names>
+  <blacklist />
  <name>Aarão</name>
  <name>Abdénago</name>
  <name>Abedenego</name>
--- a/Dictionaries/ru_names.xml
+++ b/Dictionaries/ru_names.xml
@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
-<!-- This list contains names/words with specific casing - and specific to Russian only -->
+<!-- This list contains names with specific casing - and specific to Russian only -->
 <names>
+  <blacklist />
  <name>Абакум</name>
  <name>Абакумович</name>
  <name>Абакумовна</name>
@ -2891,7 +2892,4 @@
  <name>Ярославна</name>
  <name>Ярославович</name>
  <name>Ярославовна</name>
-  <blacklist>
-    <name></name>
-  </blacklist>
 </names>
--- a/Dictionaries/rus_OCRFixReplaceList.xml
+++ b/Dictionaries/rus_OCRFixReplaceList.xml
@ -1,4 +1,5 @@
-<OCRFixReplaceList>
+<?xml version="1.0" encoding="utf-8"?>
+<OCRFixReplaceList>
  <WholeWords>
    <Word from="НЄЙ" to="НЕЙ" />
    <Word from="ОРГЗНИЗМОБ" to="ОРГАНИЗМА" />
@ -248,10 +249,10 @@
    <WordPart from="ШЗ" to="ША" />
    <WordPart from="І\/І" to="М" />
  </PartialWords>
-  <PartialLines />
+  <WholeLines />
  <PartialLinesAlways />
+  <PartialLines />
  <BeginLines />
  <EndLines />
-  <WholeLines />
  <RegularExpressions />
 </OCRFixReplaceList>
--- a/Dictionaries/spa_OCRFixReplaceList.xml
+++ b/Dictionaries/spa_OCRFixReplaceList.xml
@ -1,4 +1,5 @@
-<OCRFixReplaceList>
+<?xml version="1.0" encoding="utf-8"?>
+<OCRFixReplaceList>
  <WholeWords>
    <!-- Abreviaturas simples -->
    <Word from="KBs" to="kB" />
@ -368,6 +369,11 @@
  </WholeWords>
  <PartialWordsAlways />
  <PartialWords />
+  <WholeLines>
+    <!-- Todas las líneas -->
+    <Line from="No" to="No." />
+  </WholeLines>
+  <PartialLinesAlways />
  <PartialLines>
    <!-- Varios -->
    <LinePart from="de gratis" to="gratis" />
@ -710,15 +716,10 @@
    <LinePart from="misterl" to="misteri" />
    <LinePart from="vivencl" to="vivenci" />
  </PartialLines>
-  <PartialLinesAlways />
  <BeginLines />
  <EndLines>
    <Ending from=".»." to="»." />
  </EndLines>
-  <WholeLines>
-    <!-- Todas las líneas -->
-    <Line from="No" to="No." />
-  </WholeLines>
  <RegularExpressions>
    <!-- Abreviaturas compuestas -->
    <RegEx find="\b[Ss](r|ra|rta)\b\.?" replaceWith="S$1." />
--- a/Dictionaries/srp_OCRFixReplaceList.xml
+++ b/Dictionaries/srp_OCRFixReplaceList.xml
@ -1,4 +1,5 @@
-<!-- Credit goes to: MilanRS [http://www.prijevodi-online.org] -->
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Credit goes to: MilanRS [http://www.prijevodi-online.org] -->
 <OCRFixReplaceList>
  <WholeWords>
    <Word from="ču" to="ću" />
@ -59,6 +60,8 @@
    <WordPart from="IVl" to="M" />
    <WordPart from="lVl" to="M" />
  </PartialWords>
+  <WholeLines />
+  <PartialLinesAlways />
  <PartialLines>
    <LinePart from="bi smo" to="bismo" />
    <LinePart from="dali je" to="da li je" />
@ -97,10 +100,8 @@
    <LinePart from="Svo vrijeme" to="Sve vrijeme" />
    <LinePart from="Cijelo vrijeme" to="Sve vrijeme" />
  </PartialLines>
-  <PartialLinesAlways />
  <BeginLines />
  <EndLines />
-  <WholeLines />
  <RegularExpressions>
    <RegEx find="ÄŤ" replaceWith="č" />
    <RegEx find="Ä" replaceWith="č" />
--- a/Dictionaries/swe_OCRFixReplaceList.xml
+++ b/Dictionaries/swe_OCRFixReplaceList.xml
@ -440,13 +440,13 @@
    <WordPart from="ejag" to="e jag" />
    <WordPart from="ärp" to="är p" />
  </PartialWords>
-  <PartialLines />
+  <WholeLines />
  <PartialLinesAlways />
+  <PartialLines />
  <BeginLines>
    <Beginning from="Ln " to="In " />
    <Beginning from="U ppfattat" to="Uppfattat" />
  </BeginLines>
  <EndLines />
-  <WholeLines />
  <RegularExpressions />
 </OCRFixReplaceList>