SubtitleEdit/Dictionaries/srp_OCRFixReplaceList.xml

137 lines
7.0 KiB
XML
Raw Normal View History

2015-06-24 14:05:26 +02:00
<!-- Credit goes to: MilanRS [http://www.prijevodi-online.org] -->
<OCRFixReplaceList>
<WholeWords>
2015-06-23 11:42:06 +02:00
<Word from="neču" to="neću" />
<Word from="nečeš" to="nećeš" />
<Word from="neče" to="neće" />
<Word from="nečemo" to="nećemo" />
<Word from="nečete" to="nećete" />
</WholeWords>
2015-06-24 14:05:26 +02:00
<PartialWordsAlways />
<PartialWords>
<WordPart from="¤" to="o" />
<WordPart from="vv" to="w" />
<WordPart from="IVI" to="M" />
<WordPart from="lVI" to="M" />
<WordPart from="IVl" to="M" />
<WordPart from="lVl" to="M" />
</PartialWords>
<PartialLines>
2015-06-24 14:05:26 +02:00
<LinePart from="Jel si to" to="Jesi li to" />
<LinePart from="Jel' si to" to="Da li si to" />
<LinePart from="jel si to" to="da li si to" />
<LinePart from="jel' si to" to="jesi li to" />
<LinePart from="Jel si ti" to="Da li si ti" />
<LinePart from="Jel' si ti" to="Da li si ti" />
<LinePart from="jel si ti" to="da li si ti" />
<LinePart from="jel' si ti" to="da li si ti" />
<LinePart from="jel ste " to="jeste li " />
<LinePart from="Jel ste " to="Jeste li " />
<LinePart from="jel' ste " to="jeste li " />
<LinePart from="Jel' ste " to="Jeste li " />
<LinePart from="od kako" to="otkako" />
</PartialLines>
2015-06-24 14:05:26 +02:00
<PartialLinesAlways />
<BeginLines />
<EndLines />
<WholeLines />
<RegularExpressions>
2015-06-28 19:44:04 +02:00
<RegEx find="(?&lt;=[a-zčđšž])Ij(?=[a-zčđšž])" replaceWith="lj" />
<RegEx find="(?&lt;=[^A-ZČĐŠŽa-zčđšž])Iju(?=bav|d|t)" replaceWith="lju" />
<!-- kad ima razmak izmedju tagova </i> <i> -->
2015-06-28 19:44:04 +02:00
<!-- <RegEx find="(&gt;) +(&lt;)" replaceWith="$1$2" /> -->
2015-06-24 14:05:26 +02:00
<!-- ',"' to '",' -->
2015-06-28 19:44:04 +02:00
<RegEx find="(?&lt;=\w),&quot;(?=\s|$)" replaceWith="&quot;," />
<RegEx find=",\.{3}|\.{3},|\.{2} \." replaceWith="..." />
<!-- "1 :", "2 :"... "n :" to "n:" -->
2015-07-02 19:01:38 +02:00
<RegEx find="([0-9]) +: +(\D)" replaceWith="$1: $2" />
2015-06-24 14:05:26 +02:00
<!-- Two or more consecutive "," to "..." -->
<RegEx find=",{2,}" replaceWith="..." />
2015-06-24 14:05:26 +02:00
<!-- Two or more consecutive "-" to "..." -->
<RegEx find="-{2,}" replaceWith="..." />
2015-06-24 14:05:26 +02:00
<RegEx find="([^().])\.{2}([^().:])" replaceWith="$1...$2" />
<!-- separator stotica i decimalnog ostatka 1,499,000.00 -> 1.499.000,00 -->
2015-06-24 14:05:26 +02:00
<RegEx find="([0-9]{3})\.([0-9]{2}[^0-9])" replaceWith="$1,$2" />
<RegEx find="([0-9]),([0-9]{3}\D)" replaceWith="$1.$2" />
<!-- Apostrophes -->
2015-06-24 14:05:26 +02:00
<RegEx find="´´" replaceWith="&quot;" />
<!-- <RegEx find="[´`]" replaceWith="'" /> -->
<!-- <RegEx find="[“”]" replaceWith="&quot;" /> -->
2015-06-24 14:05:26 +02:00
<RegEx find="''" replaceWith="&quot;" />
<!-- Two or more consecutive '"' to one '"' -->
<RegEx find="&quot;{2,}" replaceWith="&quot;" />
<!-- Fix zero and capital 'o' ripping mistakes -->
2015-06-28 19:44:04 +02:00
<RegEx find="(?&lt;=[0-9]\.?)O" replaceWith="0" />
<RegEx find="\b0(?=[A-ZČĐŠŽa-zčđšž])" replaceWith="O" />
<!-- Brisanje crte - na početku 1. reda (i kada ima dva reda) -->
2015-06-24 14:05:26 +02:00
<RegEx find="\A- ?([A-ZČĐŠŽa-zčđšž0-9„'&quot;]|\.{3})" replaceWith="$1" />
2015-07-02 19:01:38 +02:00
<RegEx find="\A(&lt;[ibu]&gt;)- ?" replaceWith="$1" />
<RegEx find=" - " replaceWith=" -" />
<!-- Brisanje razmaka iza crte - na početku 2. reda -->
2015-07-02 19:01:38 +02:00
<RegEx find="(?&lt;=\n(&lt;[ibu]&gt;)?)- (?=[A-ZČĐŠŽčš0-9„'&quot;&lt;])" replaceWith="-" />
<!-- Korigovanje crte - kad je u sredini prvog reda -->
2015-06-24 14:05:26 +02:00
<RegEx find="([.!?&quot;&gt;]) - ([A-ZČĐŠŽčš'&quot;&lt;])" replaceWith="$1 -$2" />
<!-- Zatvoren tag pa razmak poslije crtice -->
2015-06-24 14:05:26 +02:00
<RegEx find="(&gt;) - ([A-ZČĐŠŽčš„'&quot;])" replaceWith="$1 -$2" />
<!-- Zatvoren tag pa crtica razmak -->
2015-06-24 14:05:26 +02:00
<RegEx find="(&gt;)- ([A-ZČĐŠŽčš„'&quot;])" replaceWith="$1-$2" />
<!-- Zagrada pa crtica razmak -->
2015-06-24 14:05:26 +02:00
<RegEx find="\(- ([A-ZČĐŠŽčš„'&quot;])" replaceWith="(-$1" />
<!-- Smart space after dot -->
<!-- osim kad je zadnje t (rijec kolt) -->
2015-06-28 19:44:04 +02:00
<RegEx find="(?&lt;=[a-su-zá-úñä-ü])\.(?=[^\s\n().:?!*^“”'&quot;&lt;])" replaceWith=". " />
<!-- Oznaka za kalibar. Npr. "Colt .45" -->
<!-- Da bi radilo, da bi ovaj razmak bio dozvoljen, u SW idite Alt+I i odcekirajte "Razmaci ispred tacke" -->
2015-06-28 19:44:04 +02:00
<RegEx find="t\.(?=[0-9]{2})" replaceWith="t ." />
<!-- Joey(j)a -->
2015-06-28 19:44:04 +02:00
<RegEx find="(?&lt;=\b[A-Z][a-z])eyj(?=[a-z])" replaceWith="ey" />
<!-- Sređuje zarez sa razmakom -->
2015-06-28 19:44:04 +02:00
<RegEx find="(?&lt;=[A-ZČĐŠŽa-zčđšžá-úñä-ü&quot;]),(?=[^\s(),?!“&lt;])" replaceWith=", " />
<RegEx find=" +,(?=[A-ZČĐŠŽa-zčđšž])" replaceWith=", " />
<RegEx find=" +, +" replaceWith=", " />
<RegEx find=" +,$" replaceWith="," />
<RegEx find="([?!])-" replaceWith="$1 -" />
<!-- Space after last of some consecutive dots (eg. "...") -->
2015-06-28 19:44:04 +02:00
<RegEx find="(?&lt;=[a-zčđšž])(\.{3}|!)(?=[a-zčđšž])" replaceWith="$1 " />
<!-- Delete space after "..." that is at the beginning of the line. You may delete this line if you don't like it -->
2015-06-24 14:05:26 +02:00
<!-- <RegEx find="^\.{3} +" replaceWith="..." /> -->
<!-- "tekst ... tekst" mijenja u "tekst... tekst" -->
2015-06-28 19:44:04 +02:00
<RegEx find="(?&lt;=[A-ZČĐŠŽa-zčđšž]) +\.{3} +" replaceWith="... " />
<RegEx find="(?&lt;=\S)\. +&quot;" replaceWith=".&quot;" />
<RegEx find="&quot; +\." replaceWith="&quot;." />
<RegEx find="(?&lt;=\S\.{3}) +&quot;(?=\s|$)" replaceWith="&quot;" />
<RegEx find=" +\.{3}$" replaceWith="..." />
<RegEx find="(?&lt;=[a-zčđšž])(?: +\.{3}|\.{2}$)" replaceWith="..." />
2015-06-24 14:05:26 +02:00
<!-- Razmak ispred zagrade -->
2015-06-28 19:44:04 +02:00
<RegEx find="(?&lt;=[A-ZČĐŠŽa-zčđšž])\(" replaceWith=" (" />
2015-06-24 14:05:26 +02:00
<!-- Razmak iza upitnika -->
2015-06-28 19:44:04 +02:00
<RegEx find="\?(?=[A-ZČĐŠŽčš])" replaceWith="? " />
<RegEx find="(?&lt;=^|&gt;)\.{3} +(?=[A-ZČĐŠŽčš])" replaceWith="..." />
2015-06-24 14:05:26 +02:00
<!-- Brise ... kad je na poc. reda "... -->
2015-06-28 19:44:04 +02:00
<RegEx find="^&quot;\.{3} +" replaceWith="&quot;" />
<RegEx find="(?&lt;=[0-9])\$" replaceWith=" $" />
<!-- ti š -> t š by Strider -->
<!-- Zamijeni sva "**ti šu*" s "**t šu*" i "**ti še*" s "**t še*" -->
<!-- <RegEx find="([a-z])ti (š+[eu])" replaceWith="$1t $2" /> -->
<!-- <RegEx find="([A-Za-z])ti( |\r?\n)(š[eu])" replaceWith="$1t$2$3" /> -->
<!-- <RegEx find="(?i)\b(ni)t (š[eu])" replaceWith="$1ti $2" /> -->
<!-- Razmak poslije <i> i poslije .. -->
2015-07-02 19:01:38 +02:00
<RegEx find="^(&lt;[ibu]&gt;) +" replaceWith="$1" />
<RegEx find="^\.{2} +" replaceWith="..." />
<!-- Razmak ? "</i> -->
2015-06-28 19:44:04 +02:00
<RegEx find="([.?!]) +(&quot;&lt;)" replaceWith="$1$2" />
<!-- Bez razmaka kod Npr.: -->
2015-06-28 19:44:04 +02:00
<RegEx find="(?&lt;=[Nn]pr\.) *: *" replaceWith=": " />
<RegEx find="\. ," replaceWith=".," />
<RegEx find="([?!])\." replaceWith="$1" />
<!-- Da ne kvari potpise sa ..:: -->
<RegEx find="\.{3}::" replaceWith="..::" />
<RegEx find="::\.{3}" replaceWith="::.." />
2015-06-28 19:44:04 +02:00
<RegEx find="\.{2} +::" replaceWith="..::" />
<!-- Skracenice bez razmaka -->
<RegEx find="d\. o\.o\." replaceWith="d.o.o." />
<!-- Kad red počinje sa ...pa malo slovo -->
2015-06-24 14:05:26 +02:00
<!-- <RegEx find="^\.{3}([a-zčđšž&quot;&lt;])" replaceWith="$1" /> -->
2015-06-28 19:44:04 +02:00
<!-- <RegEx find=" +([.?!])" replaceWith="$1" /> -->
</RegularExpressions>
2015-06-24 14:05:26 +02:00
</OCRFixReplaceList>