mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-21 18:52:36 +01:00
dictionaries: automated XML upkeep
This commit is contained in:
parent
dcc28ab676
commit
0469c7f59f
@ -1,6 +1,9 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- This list contains names/words with specific casing - and specific to Danish only -->
|
||||
<!-- This list contains names with specific casing - and specific to Danish only -->
|
||||
<names>
|
||||
<blacklist>
|
||||
<name>Rabat</name>
|
||||
</blacklist>
|
||||
<name>Aquarium</name>
|
||||
<name>Armfeldt</name>
|
||||
<name>Asgård</name>
|
||||
@ -85,7 +88,4 @@
|
||||
<name>Wharton</name>
|
||||
<name>Wylie</name>
|
||||
<name>Yardley</name>
|
||||
<blacklist>
|
||||
<name>Rabat</name>
|
||||
</blacklist>
|
||||
</names>
|
@ -1,4 +1,5 @@
|
||||
<OCRFixReplaceList>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="Haner" to="Han er" />
|
||||
<Word from="JaveL" to="Javel" />
|
||||
@ -633,10 +634,10 @@
|
||||
<WordPart from="Ã" to="Å" />
|
||||
<WordPart from="í" to="i" />
|
||||
</PartialWords>
|
||||
<PartialLines />
|
||||
<WholeLines />
|
||||
<PartialLinesAlways />
|
||||
<PartialLines />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions />
|
||||
</OCRFixReplaceList>
|
@ -1,6 +1,7 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- This list contains names/words with specific casing - and specific to German only -->
|
||||
<!-- This list contains names with specific casing - and specific to German only -->
|
||||
<names>
|
||||
<blacklist />
|
||||
<name>Abelard</name>
|
||||
<name>Ada</name>
|
||||
<name>Adal</name>
|
||||
|
@ -1,4 +1,5 @@
|
||||
<OCRFixReplaceList>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="/a" to="Ja" />
|
||||
<Word from="/ch" to="Ich" />
|
||||
@ -7047,12 +7048,12 @@
|
||||
<WordPart from="fi" to="fi" />
|
||||
<WordPart from="fl" to="fl" />
|
||||
</PartialWords>
|
||||
<WholeLines />
|
||||
<PartialLinesAlways />
|
||||
<PartialLines>
|
||||
<!-- Partial lines - but whole words only -->
|
||||
</PartialLines>
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions />
|
||||
</OCRFixReplaceList>
|
@ -1,6 +1,11 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- This list contains names/words with specific casing - and specific to English only -->
|
||||
<!-- This list contains names with specific casing - and specific to English only -->
|
||||
<names>
|
||||
<blacklist>
|
||||
<name>Bill</name>
|
||||
<name>Black</name>
|
||||
<name>Male</name>
|
||||
</blacklist>
|
||||
<name>Aarav</name>
|
||||
<name>Abbey</name>
|
||||
<name>Abbie</name>
|
||||
@ -1570,9 +1575,4 @@
|
||||
<name>Zion</name>
|
||||
<name>Zoie</name>
|
||||
<name>Zuri</name>
|
||||
<blacklist>
|
||||
<name>Male</name>
|
||||
<name>Bill</name>
|
||||
<name>Black</name>
|
||||
</blacklist>
|
||||
</names>
|
@ -2694,6 +2694,26 @@
|
||||
<WordPart from=")'" to="y" />
|
||||
<WordPart from="a’" to="d" />
|
||||
</PartialWords>
|
||||
<WholeLines>
|
||||
<!-- Whole lines - including -" etc -->
|
||||
<Line from="H ey." to="Hey." />
|
||||
<Line from="He)’-" to="Hey." />
|
||||
<Line from="N0." to="No." />
|
||||
<Line from="-N0." to="-No." />
|
||||
<Line from="Noll" to="No!!" />
|
||||
<Line from="(G ROANS)" to="(GROANS)" />
|
||||
<Line from="[G ROANS]" to="[GROANS]" />
|
||||
<Line from="(M EOWS)" to="(MEOWS)" />
|
||||
<Line from="[M EOWS]" to="[MEOWS]" />
|
||||
<Line from="Uaughs]" to="[laughs]" />
|
||||
<Line from="[chitte rs]" to="[chitters]" />
|
||||
<Line from="Hil‘ it!" to="Hit it!" />
|
||||
<Line from="<i>Hil‘ it!</i>" to="<i>Hit it!</i>" />
|
||||
<Line from="ISIGHS]" to="[SIGHS]" />
|
||||
</WholeLines>
|
||||
<PartialLinesAlways>
|
||||
<LinePart from="forbest act" to="for best act" />
|
||||
</PartialLinesAlways>
|
||||
<PartialLines>
|
||||
<LinePart from=" /be " to=" I be " />
|
||||
<LinePart from=" aren '1'" to=" aren't" />
|
||||
@ -3014,9 +3034,6 @@
|
||||
<LinePart from="you' re" to="you're" />
|
||||
<LinePart from="You' ve " to="You've " />
|
||||
</PartialLines>
|
||||
<PartialLinesAlways>
|
||||
<LinePart from="forbest act" to="for best act" />
|
||||
</PartialLinesAlways>
|
||||
<BeginLines>
|
||||
<Beginning from="lgot it" to="I got it" />
|
||||
<Beginning from="Don,t " to="Don't " />
|
||||
@ -3164,23 +3181,6 @@
|
||||
<Ending from=" i..." to=" I..." />
|
||||
<Ending from=" L." to=" I." />
|
||||
</EndLines>
|
||||
<WholeLines>
|
||||
<!-- Whole lines - including -" etc -->
|
||||
<Line from="H ey." to="Hey." />
|
||||
<Line from="He)’-" to="Hey." />
|
||||
<Line from="N0." to="No." />
|
||||
<Line from="-N0." to="-No." />
|
||||
<Line from="Noll" to="No!!" />
|
||||
<Line from="(G ROANS)" to="(GROANS)" />
|
||||
<Line from="[G ROANS]" to="[GROANS]" />
|
||||
<Line from="(M EOWS)" to="(MEOWS)" />
|
||||
<Line from="[M EOWS]" to="[MEOWS]" />
|
||||
<Line from="Uaughs]" to="[laughs]" />
|
||||
<Line from="[chitte rs]" to="[chitters]" />
|
||||
<Line from="Hil‘ it!" to="Hit it!" />
|
||||
<Line from="<i>Hil‘ it!</i>" to="<i>Hit it!</i>" />
|
||||
<Line from="ISIGHS]" to="[SIGHS]" />
|
||||
</WholeLines>
|
||||
<RegularExpressions>
|
||||
<RegEx find="([a-z]) Won't " replaceWith="$1 won't " />
|
||||
<RegEx find=" L([,\r\n :;!?]+)" replaceWith=" I$1" />
|
||||
|
@ -1,6 +1,7 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- This list contains names/words with specific casing - and specific to Spanish only -->
|
||||
<!-- This list contains names with specific casing - and specific to Spanish only -->
|
||||
<names>
|
||||
<blacklist />
|
||||
<name>Aang</name>
|
||||
<name>Aarón</name>
|
||||
<name>Abdulabri</name>
|
||||
@ -1009,7 +1010,4 @@
|
||||
<name>Zoroastro</name>
|
||||
<name>Zuko</name>
|
||||
<name>Zulú</name>
|
||||
<blacklist>
|
||||
<name></name>
|
||||
</blacklist>
|
||||
</names>
|
@ -1,6 +1,7 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- This list contains names/words with specific casing - and specific to Finnish only -->
|
||||
<!-- This list contains names with specific casing - and specific to Finnish only -->
|
||||
<names>
|
||||
<blacklist />
|
||||
<name>AA</name>
|
||||
<name>Abbie</name>
|
||||
<name>Abbylla</name>
|
||||
@ -3072,7 +3073,4 @@
|
||||
<name>Zoye</name>
|
||||
<name>Zürichissa</name>
|
||||
<name>Åkessonin</name>
|
||||
<blacklist>
|
||||
<name></name>
|
||||
</blacklist>
|
||||
</names>
|
@ -1,4 +1,5 @@
|
||||
<OCRFixReplaceList>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="kellojo" to="kello jo" />
|
||||
<Word from="onjo" to="on jo" />
|
||||
@ -987,10 +988,6 @@
|
||||
</WholeWords>
|
||||
<PartialWordsAlways />
|
||||
<PartialWords />
|
||||
<PartialLines />
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines>
|
||||
<Line from="Katsokaa pa." to="Katsokaapa." />
|
||||
<Line from="Mik!
""e“9iräı" to="Mik!
-Hengitä!" />
|
||||
@ -1028,5 +1025,9 @@
|
||||
<Line from="Haluan kertoa jotai n" to="Haluan kertoa jotain" />
|
||||
<Line from="I-Ialuatte" to="Haluatte" />
|
||||
</WholeLines>
|
||||
<PartialLinesAlways />
|
||||
<PartialLines />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<RegularExpressions />
|
||||
</OCRFixReplaceList>
|
@ -1,6 +1,7 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- This list contains names/words with specific casing - and specific to French only -->
|
||||
<!-- This list contains names with specific casing - and specific to French only -->
|
||||
<names>
|
||||
<blacklist />
|
||||
<name>Abdon</name>
|
||||
<name>Abdonie</name>
|
||||
<name>Abdonise</name>
|
||||
@ -808,7 +809,4 @@
|
||||
<name>Zéphir</name>
|
||||
<name>Zéphirin</name>
|
||||
<name>Zoé</name>
|
||||
<blacklist>
|
||||
<name></name>
|
||||
</blacklist>
|
||||
</names>
|
@ -1,4 +1,5 @@
|
||||
<OCRFixReplaceList>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="@immatriculation" to="d'immatriculation" />
|
||||
<Word from="acquer" to="acquér" />
|
||||
@ -246,13 +247,6 @@
|
||||
</WholeWords>
|
||||
<PartialWordsAlways />
|
||||
<PartialWords />
|
||||
<PartialLines>
|
||||
<LinePart from=" I'" to=" l'" />
|
||||
<LinePart from=" |'" to=" l'" />
|
||||
</PartialLines>
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines>
|
||||
<Line from=""D'ac:c:ord."" to=""D'accord."" />
|
||||
<Line from="“i QUÎ gagne, qui perd," to="ni qui gagne, qui perd," />
|
||||
@ -266,5 +260,12 @@
|
||||
<Line from="Peter H u nt." to="Peter Hunt." />
|
||||
<Line from=""C'est bien mieux dans Peau. 
 
 On peut sfléclabousser, faire du bruit."" to=""C'est bien mieux dans l'eau. 
 
 On peut s'éclabousser, faire du bruit."" />
|
||||
</WholeLines>
|
||||
<PartialLinesAlways />
|
||||
<PartialLines>
|
||||
<LinePart from=" I'" to=" l'" />
|
||||
<LinePart from=" |'" to=" l'" />
|
||||
</PartialLines>
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<RegularExpressions />
|
||||
</OCRFixReplaceList>
|
@ -1,4 +1,5 @@
|
||||
<OCRFixReplaceList>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="andele" to="anđele" />
|
||||
<Word from="andeli" to="anđeli" />
|
||||
@ -1218,6 +1219,8 @@
|
||||
</WholeWords>
|
||||
<PartialWordsAlways />
|
||||
<PartialWords />
|
||||
<WholeLines />
|
||||
<PartialLinesAlways />
|
||||
<PartialLines>
|
||||
<LinePart from="da nadjem" to="naći" />
|
||||
<LinePart from="da nadjes" to="naći" />
|
||||
@ -1255,10 +1258,8 @@
|
||||
<LinePart from="znas sto" to="znaš što" />
|
||||
<LinePart from="znaš sto" to="znaš što" />
|
||||
</PartialLines>
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions>
|
||||
<RegEx find="adas(?!v)" replaceWith="adaš" />
|
||||
<RegEx find="(?<![Pp]r|[Nn])adje(?!(v|n(e|u[olt]))\b)" replaceWith="ađe" />
|
||||
@ -1895,7 +1896,7 @@
|
||||
<RegEx find="(?<![Kk]lj)učiće" replaceWith="učit će" />
|
||||
<RegEx find="udiva" replaceWith="uđiva" />
|
||||
<RegEx find="udj([aiu])" replaceWith="uđ$1" />
|
||||
<!--ne može više zbog oportunisti i komunisti -->
|
||||
<!-- ne može više zbog oportunisti i komunisti -->
|
||||
<RegEx find="\b([Uu])nisti" replaceWith="$1ništi" />
|
||||
<RegEx find="nistav" replaceWith="ništav" />
|
||||
<RegEx find="ujuc" replaceWith="ujuć" />
|
||||
@ -1932,4 +1933,4 @@
|
||||
<RegEx find="Zurb" replaceWith="Žurb" />
|
||||
<RegEx find="zvucen" replaceWith="zvučen" />
|
||||
</RegularExpressions>
|
||||
</OCRFixReplaceList>
|
||||
</OCRFixReplaceList>
|
@ -1,4 +1,5 @@
|
||||
<OCRFixReplaceList>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="()d" to="Od" />
|
||||
<Word from="advokati" to="odvjetnici" />
|
||||
@ -50,7 +51,7 @@
|
||||
<Word from="bekstvo" to="bijeg" />
|
||||
<Word from="bekstvu" to="bijegu" />
|
||||
<Word from="begstvu" to="bijegu" />
|
||||
<!--<Word from="beo" to="bijel | bio" /> ne može jer su dvije verzije! -->
|
||||
<!-- <Word from="beo" to="bijel | bio" /> ne može jer su dvije verzije! -->
|
||||
<Word from="bes" to="bijes" />
|
||||
<Word from="besa" to="bijesa" />
|
||||
<Word from="besan" to="bijesan" />
|
||||
@ -2071,7 +2072,7 @@
|
||||
<Word from="Kolu" to="Coleu" />
|
||||
<Word from="Kolins" to="Collins" />
|
||||
<Word from="Koni" to="Connie" />
|
||||
<Word from="Konor" to="Connor"/>
|
||||
<Word from="Konor" to="Connor" />
|
||||
<Word from="Kortni" to="Courtney" />
|
||||
<Word from="Krejg" to="Craig" />
|
||||
<Word from="Krejga" to="Craiga" />
|
||||
@ -2228,6 +2229,8 @@
|
||||
</WholeWords>
|
||||
<PartialWordsAlways />
|
||||
<PartialWords />
|
||||
<WholeLines />
|
||||
<PartialLinesAlways />
|
||||
<PartialLines>
|
||||
<LinePart from="Ako ej" to="Ako je" />
|
||||
<LinePart from="ako ej" to="ako je" />
|
||||
@ -2397,10 +2400,8 @@
|
||||
<LinePart from="Želi da zna" to="Želi znati" />
|
||||
<LinePart from="želi da zna" to="želi znati" />
|
||||
</PartialLines>
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions>
|
||||
<!-- deklinacije imenica i konjugacije glagola -->
|
||||
<RegEx find="([0-9])-ogodišnj" replaceWith="$1-godišnj" />
|
||||
@ -3216,7 +3217,7 @@
|
||||
<RegEx find="žur[ck]" replaceWith="zabav" />
|
||||
<RegEx find="Žur[ck]" replaceWith="Zabav" />
|
||||
<RegEx find="([sSnNpPzZ]r?[aoi]z[vn])ać" replaceWith="$1at ć" />
|
||||
<!-- mjeseci [\b mora biti nakon jun]-->
|
||||
<!-- mjeseci [\b mora biti nakon jun] -->
|
||||
<RegEx find="([Ss])j?ečnj([au]|ima)" replaceWith="$1iječnj$2" />
|
||||
<RegEx find="\ba[uv]gust" replaceWith="kolovoz" />
|
||||
<RegEx find="septembr" replaceWith="rujn" />
|
||||
@ -3316,7 +3317,7 @@
|
||||
<RegEx find="efiniši" replaceWith="efiniraj" />
|
||||
<RegEx find="efinišu" replaceWith="efiniraju" />
|
||||
<RegEx find="empton" replaceWith="ampton" />
|
||||
<!-- bjegunac-->
|
||||
<!-- bjegunac -->
|
||||
<RegEx find="(?<!j)egun" replaceWith="jegun" />
|
||||
<RegEx find="(?<!j)elokup" replaceWith="jelokup" />
|
||||
<RegEx find="enlj" replaceWith="enj" />
|
||||
@ -3442,7 +3443,7 @@
|
||||
<RegEx find="posel" replaceWith="posjel" />
|
||||
<RegEx find="produkova" replaceWith="producira" />
|
||||
<RegEx find="\bpominj" replaceWith="spominj" />
|
||||
<!-- ignoriše / koncentriše /operiše /toleriše /-->
|
||||
<!-- ignoriše / koncentriše /operiše /toleriše / -->
|
||||
<RegEx find="([te]|ku|pi|no)riše" replaceWith="$1rira" />
|
||||
<RegEx find="(?<![Pp]r|[Nn])adje(?!(v|n(e|u[olt]))\b)" replaceWith="ađe" />
|
||||
<RegEx find="par nedj?elja" replaceWith="par tjedana" />
|
||||
|
@ -1,12 +1,13 @@
|
||||
<OCRFixReplaceList>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords />
|
||||
<PartialWordsAlways />
|
||||
<PartialWords />
|
||||
<PartialLines />
|
||||
<WholeLines />
|
||||
<PartialLinesAlways />
|
||||
<PartialLines />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions>
|
||||
<!-- nagy I-l javítások -->
|
||||
<RegEx find="([\x41-\x5a\x61-\x7a\xc1-\xfc])II" replaceWith="$1ll" />
|
||||
|
@ -5,6 +5,7 @@ This file is case sensitive.
|
||||
This file is generated/updated by Multi Translator
|
||||
-->
|
||||
<names>
|
||||
<blacklist />
|
||||
<name>1A</name>
|
||||
<name>2 Chainz</name>
|
||||
<name>2 Pac</name>
|
||||
@ -2653,8 +2654,8 @@ This file is generated/updated by Multi Translator
|
||||
<name>Iceland</name>
|
||||
<name>Icelander</name>
|
||||
<name>Icelandic</name>
|
||||
<name>I'd</name>
|
||||
<name>ID</name>
|
||||
<name>I'd</name>
|
||||
<name>Idaho</name>
|
||||
<name>Idris</name>
|
||||
<name>Idris Elba</name>
|
||||
|
@ -9,8 +9,8 @@
|
||||
<word>fotograafje</word>
|
||||
<word>gemaar</word>
|
||||
<word>gps</word>
|
||||
<word>hielenlikkerij</word>
|
||||
<word>hielenlikkerij</word>
|
||||
<word>hielenlikkerij</word>
|
||||
<word>hostessen</word>
|
||||
<word>inbak</word>
|
||||
<word>insignificante</word>
|
||||
|
@ -1,4 +1,5 @@
|
||||
<OCRFixReplaceList>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="aandachtmag" to="aandacht mag" />
|
||||
<Word from="agrariers" to="agrariërs" />
|
||||
@ -110,12 +111,16 @@
|
||||
<Word from="zonderjou" to="zonder jou" />
|
||||
</WholeWords>
|
||||
<PartialWordsAlways />
|
||||
<PartialWords />
|
||||
<PartialLines />
|
||||
<PartialWords>
|
||||
<!-- Will be used to check words not in dictionary -->
|
||||
<!-- If new word(s) exists in spelling dictionary, it is (they are) accepted -->
|
||||
<WordPart from="ij" to="ij" />
|
||||
</PartialWords>
|
||||
<WholeLines />
|
||||
<PartialLinesAlways />
|
||||
<PartialLines />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions>
|
||||
<RegEx find="\blk(?=\p{Ll}{2})" replaceWith="Ik" />
|
||||
<RegEx find="\bln(?=\p{Ll}{2})" replaceWith="In" />
|
||||
|
@ -1,6 +1,22 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- This list contains names/words with specific casing - and specific to Norwegian only -->
|
||||
<!-- This list contains names with specific casing - and specific to Norwegian only -->
|
||||
<names>
|
||||
<blacklist>
|
||||
<name>Ane</name>
|
||||
<name>Ben</name>
|
||||
<name>Bo</name>
|
||||
<name>Dag</name>
|
||||
<name>Fet</name>
|
||||
<name>Fred</name>
|
||||
<name>Gro</name>
|
||||
<name>Hem</name>
|
||||
<name>Jo</name>
|
||||
<name>Per</name>
|
||||
<name>Rune</name>
|
||||
<name>Saga</name>
|
||||
<name>Tom</name>
|
||||
<name>Ål</name>
|
||||
</blacklist>
|
||||
<name>Aage</name>
|
||||
<name>Aagot</name>
|
||||
<name>Aase</name>
|
||||
@ -2856,20 +2872,4 @@
|
||||
<name>Åsne</name>
|
||||
<name>Åsnes</name>
|
||||
<name>Åsta</name>
|
||||
<blacklist>
|
||||
<name>Ane</name>
|
||||
<name>Ben</name>
|
||||
<name>Bo</name>
|
||||
<name>Dag</name>
|
||||
<name>Fet</name>
|
||||
<name>Fred</name>
|
||||
<name>Gro</name>
|
||||
<name>Hem</name>
|
||||
<name>Jo</name>
|
||||
<name>Per</name>
|
||||
<name>Rune</name>
|
||||
<name>Saga</name>
|
||||
<name>Tom</name>
|
||||
<name>Ål</name>
|
||||
</blacklist>
|
||||
</names>
|
@ -48,10 +48,10 @@
|
||||
<WordPart from="Ã" to="Å" />
|
||||
<WordPart from="í" to="i" />
|
||||
</PartialWords>
|
||||
<PartialLines />
|
||||
<WholeLines />
|
||||
<PartialLinesAlways />
|
||||
<PartialLines />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions />
|
||||
</OCRFixReplaceList>
|
@ -1,4 +1,5 @@
|
||||
<OCRFixReplaceList>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords />
|
||||
<PartialWordsAlways />
|
||||
<PartialWords>
|
||||
@ -34,10 +35,10 @@
|
||||
<WordPart from="Ã" to="Å" />
|
||||
<WordPart from="í" to="i" />
|
||||
</PartialWords>
|
||||
<PartialLines />
|
||||
<WholeLines />
|
||||
<PartialLinesAlways />
|
||||
<PartialLines />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions />
|
||||
</OCRFixReplaceList>
|
@ -1,4 +1,5 @@
|
||||
<OCRFixReplaceList>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="abitual" to="habitual" />
|
||||
<Word from="àcerca" to="acerca" />
|
||||
@ -445,6 +446,8 @@
|
||||
</WholeWords>
|
||||
<PartialWordsAlways />
|
||||
<PartialWords />
|
||||
<WholeLines />
|
||||
<PartialLinesAlways />
|
||||
<PartialLines>
|
||||
<LinePart from="IN 6-E" to="N 6 E" />
|
||||
<LinePart from="in tegrar-se" to="integrar-se" />
|
||||
@ -474,10 +477,8 @@
|
||||
<LinePart from="R egião" to="Região" />
|
||||
<LinePart from="unsuficien temente" to="insuficientemente" />
|
||||
</PartialLines>
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions>
|
||||
<!-- <RegEx find="\bi\b" replaceWith="I" /> just an example - do not use this regex -->
|
||||
<RegEx find="([0-9]) +º" replaceWith="$1º" />
|
||||
|
@ -1,6 +1,7 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- This list contains names/words with specific casing - and specific to Portuguese only -->
|
||||
<!-- This list contains names with specific casing - and specific to Portuguese only -->
|
||||
<names>
|
||||
<blacklist />
|
||||
<name>Aarão</name>
|
||||
<name>Abdénago</name>
|
||||
<name>Abedenego</name>
|
||||
|
@ -1,6 +1,7 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- This list contains names/words with specific casing - and specific to Russian only -->
|
||||
<!-- This list contains names with specific casing - and specific to Russian only -->
|
||||
<names>
|
||||
<blacklist />
|
||||
<name>Абакум</name>
|
||||
<name>Абакумович</name>
|
||||
<name>Абакумовна</name>
|
||||
@ -2891,7 +2892,4 @@
|
||||
<name>Ярославна</name>
|
||||
<name>Ярославович</name>
|
||||
<name>Ярославовна</name>
|
||||
<blacklist>
|
||||
<name></name>
|
||||
</blacklist>
|
||||
</names>
|
@ -1,4 +1,5 @@
|
||||
<OCRFixReplaceList>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="НЄЙ" to="НЕЙ" />
|
||||
<Word from="ОРГЗНИЗМОБ" to="ОРГАНИЗМА" />
|
||||
@ -248,10 +249,10 @@
|
||||
<WordPart from="ШЗ" to="ША" />
|
||||
<WordPart from="І\/І" to="М" />
|
||||
</PartialWords>
|
||||
<PartialLines />
|
||||
<WholeLines />
|
||||
<PartialLinesAlways />
|
||||
<PartialLines />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions />
|
||||
</OCRFixReplaceList>
|
@ -1,4 +1,5 @@
|
||||
<OCRFixReplaceList>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<!-- Abreviaturas simples -->
|
||||
<Word from="KBs" to="kB" />
|
||||
@ -368,6 +369,11 @@
|
||||
</WholeWords>
|
||||
<PartialWordsAlways />
|
||||
<PartialWords />
|
||||
<WholeLines>
|
||||
<!-- Todas las líneas -->
|
||||
<Line from="No" to="No." />
|
||||
</WholeLines>
|
||||
<PartialLinesAlways />
|
||||
<PartialLines>
|
||||
<!-- Varios -->
|
||||
<LinePart from="de gratis" to="gratis" />
|
||||
@ -710,15 +716,10 @@
|
||||
<LinePart from="misterl" to="misteri" />
|
||||
<LinePart from="vivencl" to="vivenci" />
|
||||
</PartialLines>
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines>
|
||||
<Ending from=".»." to="»." />
|
||||
</EndLines>
|
||||
<WholeLines>
|
||||
<!-- Todas las líneas -->
|
||||
<Line from="No" to="No." />
|
||||
</WholeLines>
|
||||
<RegularExpressions>
|
||||
<!-- Abreviaturas compuestas -->
|
||||
<RegEx find="\b[Ss](r|ra|rta)\b\.?" replaceWith="S$1." />
|
||||
|
@ -1,4 +1,5 @@
|
||||
<!-- Credit goes to: MilanRS [http://www.prijevodi-online.org] -->
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- Credit goes to: MilanRS [http://www.prijevodi-online.org] -->
|
||||
<OCRFixReplaceList>
|
||||
<WholeWords>
|
||||
<Word from="ču" to="ću" />
|
||||
@ -59,6 +60,8 @@
|
||||
<WordPart from="IVl" to="M" />
|
||||
<WordPart from="lVl" to="M" />
|
||||
</PartialWords>
|
||||
<WholeLines />
|
||||
<PartialLinesAlways />
|
||||
<PartialLines>
|
||||
<LinePart from="bi smo" to="bismo" />
|
||||
<LinePart from="dali je" to="da li je" />
|
||||
@ -97,10 +100,8 @@
|
||||
<LinePart from="Svo vrijeme" to="Sve vrijeme" />
|
||||
<LinePart from="Cijelo vrijeme" to="Sve vrijeme" />
|
||||
</PartialLines>
|
||||
<PartialLinesAlways />
|
||||
<BeginLines />
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions>
|
||||
<RegEx find="ÄŤ" replaceWith="č" />
|
||||
<RegEx find="Ä" replaceWith="č" />
|
||||
|
@ -440,13 +440,13 @@
|
||||
<WordPart from="ejag" to="e jag" />
|
||||
<WordPart from="ärp" to="är p" />
|
||||
</PartialWords>
|
||||
<PartialLines />
|
||||
<WholeLines />
|
||||
<PartialLinesAlways />
|
||||
<PartialLines />
|
||||
<BeginLines>
|
||||
<Beginning from="Ln " to="In " />
|
||||
<Beginning from="U ppfattat" to="Uppfattat" />
|
||||
</BeginLines>
|
||||
<EndLines />
|
||||
<WholeLines />
|
||||
<RegularExpressions />
|
||||
</OCRFixReplaceList>
|
Loading…
Reference in New Issue
Block a user