SubtitleEdit/Dictionaries/srp_OCRFixReplaceList.xml
2015-07-02 19:01:38 +02:00

137 lines
7.0 KiB
XML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!-- Credit goes to: MilanRS [http://www.prijevodi-online.org] -->
<OCRFixReplaceList>
<WholeWords>
<Word from="neču" to="neću" />
<Word from="nečeš" to="nećeš" />
<Word from="neče" to="neće" />
<Word from="nečemo" to="nećemo" />
<Word from="nečete" to="nećete" />
</WholeWords>
<PartialWordsAlways />
<PartialWords>
<WordPart from="¤" to="o" />
<WordPart from="vv" to="w" />
<WordPart from="IVI" to="M" />
<WordPart from="lVI" to="M" />
<WordPart from="IVl" to="M" />
<WordPart from="lVl" to="M" />
</PartialWords>
<PartialLines>
<LinePart from="Jel si to" to="Jesi li to" />
<LinePart from="Jel' si to" to="Da li si to" />
<LinePart from="jel si to" to="da li si to" />
<LinePart from="jel' si to" to="jesi li to" />
<LinePart from="Jel si ti" to="Da li si ti" />
<LinePart from="Jel' si ti" to="Da li si ti" />
<LinePart from="jel si ti" to="da li si ti" />
<LinePart from="jel' si ti" to="da li si ti" />
<LinePart from="jel ste " to="jeste li " />
<LinePart from="Jel ste " to="Jeste li " />
<LinePart from="jel' ste " to="jeste li " />
<LinePart from="Jel' ste " to="Jeste li " />
<LinePart from="od kako" to="otkako" />
</PartialLines>
<PartialLinesAlways />
<BeginLines />
<EndLines />
<WholeLines />
<RegularExpressions>
<RegEx find="(?&lt;=[a-zčđšž])Ij(?=[a-zčđšž])" replaceWith="lj" />
<RegEx find="(?&lt;=[^A-ZČĐŠŽa-zčđšž])Iju(?=bav|d|t)" replaceWith="lju" />
<!-- kad ima razmak izmedju tagova </i> <i> -->
<!-- <RegEx find="(&gt;) +(&lt;)" replaceWith="$1$2" /> -->
<!-- ',"' to '",' -->
<RegEx find="(?&lt;=\w),&quot;(?=\s|$)" replaceWith="&quot;," />
<RegEx find=",\.{3}|\.{3},|\.{2} \." replaceWith="..." />
<!-- "1 :", "2 :"... "n :" to "n:" -->
<RegEx find="([0-9]) +: +(\D)" replaceWith="$1: $2" />
<!-- Two or more consecutive "," to "..." -->
<RegEx find=",{2,}" replaceWith="..." />
<!-- Two or more consecutive "-" to "..." -->
<RegEx find="-{2,}" replaceWith="..." />
<RegEx find="([^().])\.{2}([^().:])" replaceWith="$1...$2" />
<!-- separator stotica i decimalnog ostatka 1,499,000.00 -> 1.499.000,00 -->
<RegEx find="([0-9]{3})\.([0-9]{2}[^0-9])" replaceWith="$1,$2" />
<RegEx find="([0-9]),([0-9]{3}\D)" replaceWith="$1.$2" />
<!-- Apostrophes -->
<RegEx find="´´" replaceWith="&quot;" />
<!-- <RegEx find="[´`]" replaceWith="'" /> -->
<!-- <RegEx find="[“”]" replaceWith="&quot;" /> -->
<RegEx find="''" replaceWith="&quot;" />
<!-- Two or more consecutive '"' to one '"' -->
<RegEx find="&quot;{2,}" replaceWith="&quot;" />
<!-- Fix zero and capital 'o' ripping mistakes -->
<RegEx find="(?&lt;=[0-9]\.?)O" replaceWith="0" />
<RegEx find="\b0(?=[A-ZČĐŠŽa-zčđšž])" replaceWith="O" />
<!-- Brisanje crte - na početku 1. reda (i kada ima dva reda) -->
<RegEx find="\A- ?([A-ZČĐŠŽa-zčđšž0-9„'&quot;]|\.{3})" replaceWith="$1" />
<RegEx find="\A(&lt;[ibu]&gt;)- ?" replaceWith="$1" />
<RegEx find=" - " replaceWith=" -" />
<!-- Brisanje razmaka iza crte - na početku 2. reda -->
<RegEx find="(?&lt;=\n(&lt;[ibu]&gt;)?)- (?=[A-ZČĐŠŽčš0-9„'&quot;&lt;])" replaceWith="-" />
<!-- Korigovanje crte - kad je u sredini prvog reda -->
<RegEx find="([.!?&quot;&gt;]) - ([A-ZČĐŠŽčš'&quot;&lt;])" replaceWith="$1 -$2" />
<!-- Zatvoren tag pa razmak poslije crtice -->
<RegEx find="(&gt;) - ([A-ZČĐŠŽčš„'&quot;])" replaceWith="$1 -$2" />
<!-- Zatvoren tag pa crtica razmak -->
<RegEx find="(&gt;)- ([A-ZČĐŠŽčš„'&quot;])" replaceWith="$1-$2" />
<!-- Zagrada pa crtica razmak -->
<RegEx find="\(- ([A-ZČĐŠŽčš„'&quot;])" replaceWith="(-$1" />
<!-- Smart space after dot -->
<!-- osim kad je zadnje t (rijec kolt) -->
<RegEx find="(?&lt;=[a-su-zá-úñä-ü])\.(?=[^\s\n().:?!*^“”'&quot;&lt;])" replaceWith=". " />
<!-- Oznaka za kalibar. Npr. "Colt .45" -->
<!-- Da bi radilo, da bi ovaj razmak bio dozvoljen, u SW idite Alt+I i odcekirajte "Razmaci ispred tacke" -->
<RegEx find="t\.(?=[0-9]{2})" replaceWith="t ." />
<!-- Joey(j)a -->
<RegEx find="(?&lt;=\b[A-Z][a-z])eyj(?=[a-z])" replaceWith="ey" />
<!-- Sređuje zarez sa razmakom -->
<RegEx find="(?&lt;=[A-ZČĐŠŽa-zčđšžá-úñä-ü&quot;]),(?=[^\s(),?!“&lt;])" replaceWith=", " />
<RegEx find=" +,(?=[A-ZČĐŠŽa-zčđšž])" replaceWith=", " />
<RegEx find=" +, +" replaceWith=", " />
<RegEx find=" +,$" replaceWith="," />
<RegEx find="([?!])-" replaceWith="$1 -" />
<!-- Space after last of some consecutive dots (eg. "...") -->
<RegEx find="(?&lt;=[a-zčđšž])(\.{3}|!)(?=[a-zčđšž])" replaceWith="$1 " />
<!-- Delete space after "..." that is at the beginning of the line. You may delete this line if you don't like it -->
<!-- <RegEx find="^\.{3} +" replaceWith="..." /> -->
<!-- "tekst ... tekst" mijenja u "tekst... tekst" -->
<RegEx find="(?&lt;=[A-ZČĐŠŽa-zčđšž]) +\.{3} +" replaceWith="... " />
<RegEx find="(?&lt;=\S)\. +&quot;" replaceWith=".&quot;" />
<RegEx find="&quot; +\." replaceWith="&quot;." />
<RegEx find="(?&lt;=\S\.{3}) +&quot;(?=\s|$)" replaceWith="&quot;" />
<RegEx find=" +\.{3}$" replaceWith="..." />
<RegEx find="(?&lt;=[a-zčđšž])(?: +\.{3}|\.{2}$)" replaceWith="..." />
<!-- Razmak ispred zagrade -->
<RegEx find="(?&lt;=[A-ZČĐŠŽa-zčđšž])\(" replaceWith=" (" />
<!-- Razmak iza upitnika -->
<RegEx find="\?(?=[A-ZČĐŠŽčš])" replaceWith="? " />
<RegEx find="(?&lt;=^|&gt;)\.{3} +(?=[A-ZČĐŠŽčš])" replaceWith="..." />
<!-- Brise ... kad je na poc. reda "... -->
<RegEx find="^&quot;\.{3} +" replaceWith="&quot;" />
<RegEx find="(?&lt;=[0-9])\$" replaceWith=" $" />
<!-- ti š -> t š by Strider -->
<!-- Zamijeni sva "**ti šu*" s "**t šu*" i "**ti še*" s "**t še*" -->
<!-- <RegEx find="([a-z])ti (š+[eu])" replaceWith="$1t $2" /> -->
<!-- <RegEx find="([A-Za-z])ti( |\r?\n)(š[eu])" replaceWith="$1t$2$3" /> -->
<!-- <RegEx find="(?i)\b(ni)t (š[eu])" replaceWith="$1ti $2" /> -->
<!-- Razmak poslije <i> i poslije .. -->
<RegEx find="^(&lt;[ibu]&gt;) +" replaceWith="$1" />
<RegEx find="^\.{2} +" replaceWith="..." />
<!-- Razmak ? "</i> -->
<RegEx find="([.?!]) +(&quot;&lt;)" replaceWith="$1$2" />
<!-- Bez razmaka kod Npr.: -->
<RegEx find="(?&lt;=[Nn]pr\.) *: *" replaceWith=": " />
<RegEx find="\. ," replaceWith=".," />
<RegEx find="([?!])\." replaceWith="$1" />
<!-- Da ne kvari potpise sa ..:: -->
<RegEx find="\.{3}::" replaceWith="..::" />
<RegEx find="::\.{3}" replaceWith="::.." />
<RegEx find="\.{2} +::" replaceWith="..::" />
<!-- Skracenice bez razmaka -->
<RegEx find="d\. o\.o\." replaceWith="d.o.o." />
<!-- Kad red počinje sa ...pa malo slovo -->
<!-- <RegEx find="^\.{3}([a-zčđšž&quot;&lt;])" replaceWith="$1" /> -->
<!-- <RegEx find=" +([.?!])" replaceWith="$1" /> -->
</RegularExpressions>
</OCRFixReplaceList>