SubtitleEdit/Dictionaries/srp_OCRFixReplaceList.xml

147 lines
6.9 KiB
XML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<OCRFixReplaceList>
<!----- credit goes to: MilanRS [http://www.prijevodi-online.org] --------->
<WholeWords
<Word from="neču" to="neću" />
<Word from="nečeš" to="nećeš" />
<Word from="neče" to="neće" />
<Word from="nečemo" to="nećemo" />
<Word from="nečete" to="nećete" />
</WholeWords>
<PartialWords>
<WordPart from="¤" to="o" />
<WordPart from="vv" to="w" />
<WordPart from="IVI" to="M" />
<WordPart from="lVI" to="M" />
<WordPart from="IVl" to="M" />
<WordPart from="lVl" to="M" />
</PartialWords>
<PartialLines>
<LinePart from="Jel si to" to="Jesi li to"/ >
<LinePart from="Jel' si to" to="Da li si to"/ >
<LinePart from="jel si to" to="da li si to"/ >
<LinePart from="jel' si to" to="jesi li to"/ >
<LinePart from="Jel si ti" to="Da li si ti"/ >
<LinePart from="Jel' si ti" to="Da li si ti"/ >
<LinePart from="jel si ti" to="da li si ti"/ >
<LinePart from="jel' si ti" to="da li si ti"/ >
<LinePart from="jel ste " to="jeste li "/ >
<LinePart from="Jel ste " to="Jeste li "/ >
<LinePart from="jel' ste " to="jeste li "/ >
<LinePart from="Jel' ste " to="Jeste li "/ >
<LinePart from="od kako" to="otkako" />
</PartialLines>
<BeginLines />
<EndLines />
<WholeLines />
<RegularExpressions>
<RegEx find="([a-zžšđš])Ij([a-zžšđš])" replaceWith="$1lj$2" />
<RegEx find="([^a-zšđčšžA-ZŠÐ蚎])Ij(ubav|ud|ut)" replaceWith="$1lj$2" />
<!-- kad ima razmak izmedju tagova </i> <i> -->
<RegEx find="(&gt;) +(&lt;)" replaceWith="$1$2" />
<!-- ," -->
<RegEx find="(\w),&quot;(\s|$)" replaceWith="$1&quot;,$2" />
<RegEx find=",\.{3}|\.{3},|\.{2} \." replaceWith="..." />
<!-- "1 :", "2 :"... "n :" to "n:" -->
<RegEx find="(\d) +: +(\D)" replaceWith="$1: $2" />
<!-- ",,," or similar to "..." -->
<RegEx find=",{2,}" replaceWith="..." />
<![CDATA[ "--" or more to "..." ]]>
<RegEx find="-{2,}" replaceWith="..." />
<RegEx find="([^(.)])\.{2}([^(.:)])" replaceWith="$1...$2" />
<!-- separator stotica i decimalnog ostatka 1,499,000.00 -> 1.499.000,00 -->
<RegEx find="([0-9]{2})\.([0-9]{2}[^0-9])" replaceWith="$1,$2" />
<RegEx find="([0-9]),([0-9]{3}\D)" replaceWith="$1.$2" />
<!-- Apostrophes -->
<Regex find="´´" replaceWith="&quot;" />
<!-- <RegEx find="[´`]" replaceWith="'" /> -->
<!-- <RegEx find="[“”]" replaceWith="&quot;" /> -->
<Regex find="''" replaceWith="&quot;" />
<!-- '"' more than two times together is replaced by only one '"' -->
<Regex find="&quot;{2,}" replaceWith="&quot;" />
<!-- Fix zero and capital 'o' ripping mistakes -->
<RegEx find="([0-9])O" replaceWith="${1}0" />
<RegEx find="([0-9])\.O" replaceWith="$1.0" />
<RegEx find="\b0([A-Za-z])" replaceWith="O$1" />
<!-- Brisanje crte - na početku 1. reda (i kada ima dva reda) -->
<RegEx find="\A- ?([A-ZŠšŽčÐa-zššžčđ0-9&#132;&quot;']|\.{3})" replaceWith="$1" />
<RegEx find="\A(&lt;[i|b|u]&gt;)- ?" replaceWith="$1" />
<RegEx find=" - " replaceWith=" -" />
<!-- Brisanje razmaka iza crte - na početku 2. reda -->
<RegEx find="([\n](&lt;[i|b|u]&gt;)?)- ([0-9A-ZŠšŽčÐ&lt;&#132;&quot;'])" replaceWith="$1-$3" />
<!-- Korigovanje crte - kad je u sredini prvog reda -->
<RegEx find="([.!?&gt;&quot;]) - ([A-ZŠšŽčÐ&lt;&quot;'])" replaceWith="$1 -$2" />
<!-- Zatvoren tag pa razmak poslije crtice -->
<RegEx find="(&gt;) - ([A-ZŠšŽčÐ&#132;&quot;'])" replaceWith="$1 -$2" />
<!-- Zatvoren tag pa crtica razmak -->
<RegEx find="(&gt;)- ([A-ZŠšŽčÐ&#132;&quot;'])" replaceWith="$1-$2" />
<!-- Zagrada pa crtica razmak -->
<RegEx find="\(- ([A-ZŠšŽčÐ&#132;&quot;'])" replaceWith="(-$1" />
<!-- Smart space after dot -->
<!-- osim kad je zadnje t (rijec kolt) -->
<RegEx find="([a-su-zá-úñä-ü])\.([^(\s\n.:?!!*&lt;&quot;'”ˆ“)])" replaceWith="$1. $2" />
<!-- Oznaka za kalibar. Npr. "Colt .45" -->
<!-- Da bi radilo, da bi ovaj razmak bio dozvoljen, u SW idite Alt+I i odcekirajte "Razmaci ispred tacke" -->
<RegEx find="t\.([0-9][0-9])" replaceWith="t .$1" />
<!-- Joey(j)a -->
<RegEx find="([A-Z][a-z])eyj([a-z])" replaceWith="$1ey$2" />
<!-- Sređuje zarez sa razmakom -->
<RegEx find="([A-ZŽščŠÐa-zžčššđá-úñä-ü&quot;]),([^(\s\n&lt;“,?!)])" replaceWith="$1, $2" />
<RegEx find=" , " replaceWith=", " />
<RegEx find=" ,([a-zžšđčšA-ZŠšŽčÐ])" replaceWith=", $1" />
<RegEx find=" ,$" replaceWith="," />
<RegEx find="([?!])-" replaceWith="$1 -" />
<!-- Space after last of some consecutive dots (eg. "...") -->
<RegEx find="([a-zšđčšž])(\.{3}|!)([a-zšđčšž])" replaceWith="$1$2 $3" />
<!-- Delete space after "..." that is at the beginning of the line. You may delete this line if you don't like it -->
<!-- <RegEx find="^\.{3} " replaceWith="..." /> -->
<RegEx find="([a-zžšđčšA-ZŠšŽčÐ]) \.{3} " replaceWith="$1... " /> <!-- tekst ... tekst mijenja u tekst... tekst -->
<RegEx find="(\S)\. &quot;" replaceWith="$1.&quot;" />
<RegEx find="&quot; \." replaceWith="&quot;." />
<RegEx find="(\S\.{3}) &quot;(\s|$)" replaceWith="$1&quot;$2" />
<RegEx find=" \.{3}$" replaceWith="..." />
<RegEx find="([a-zžščđš])( \.{3}|\.{2}$)" replaceWith="$1..." />
<RegEx find="([a-zžšđšA-ZŽŠÐš])\(" replaceWith="$1 (" /> <!-- Razmak ispred zagrade -->
<RegEx find="\?([A-ZŽŠÐš])" replaceWith="? $1" /> <!-- Razmak iza upitnika -->
<RegEx find="(^|&gt;)\.{3} ([A-ZŽščŠÐ])" replaceWith="$1...$2" />
<RegEx find="^&quot;\.{3} " replaceWith="&quot;" /> <!-- Brise ... kad je na poc. reda "... -->
<RegEx find="([0-9])\$" replaceWith="$1 $" />
<!-- ti š -> t š by Strider -->
<!-- Zamijeni sva "**ti šu*" s "**t šu*" i "**ti še*" s "**t še*" -->
<!-- <RegEx find="([a-z])ti (š+[eu])" replaceWith="$1t $2" /> -->
<!-- <RegEx find="([A-Za-z])ti( |\r?\n)(š[eu])" replaceWith="$1t$2$3" /> -->
<!-- <RegEx find="(?i)\b(ni)t (š[eu])" replaceWith="$1ti $2" /> -->
<!-- Razmak poslije <i> i poslije .. -->
<RegEx find="^(&lt;[i|b|u]&gt;) +" replaceWith="$1" />
<RegEx find="^\.{2} +" replaceWith="..." />
<!-- Razmak ? "</i> -->
<RegEx find="([.?!]) +(&quot;&lt;)" replaceWith="$1$3" />
<!-- Bez razmaka kod Npr.: -->
<RegEx find="([Nn])pr\. +:" replaceWith="$1pr.:" />
<RegEx find="\. ," replaceWith=".," />
<RegEx find="([?!])\." replaceWith="$1" />
<!-- Da ne kvari potpise sa ..:: -->
<RegEx find="\.{3}::" replaceWith="..::" />
<RegEx find="::\.{3}" replaceWith="::.." />
<RegEx find="\.{2} ::" replaceWith="..::" />
<!-- Skracenice bez razmaka -->
<RegEx find="d\. o\.o\." replaceWith="d.o.o." />
<!-- Kad red počinje sa ...pa malo slovo -->
<!-- <RegEx find="^\.{3}([a-zšđčšž&quot;&lt;])" replaceWith="$1" /> -->
<!-- <RegEx find=" ([?!.])" replaceWith="$1" /> -->
</RegularExpressions>
</OCRFixReplaceList>