SubtitleEdit/Dictionaries/srp_OCRFixReplaceList.xml

<!-- Credit goes to: MilanRS [http://www.prijevodi-online.org] -->
<OCRFixReplaceList>
  <WholeWords>
    <Word from="neču" to="neću" />
    <Word from="nečeš" to="nećeš" />
    <Word from="neče" to="neće" />
    <Word from="nečemo" to="nećemo" />
    <Word from="nečete" to="nećete" />
  </WholeWords>
  <PartialWordsAlways />
  <PartialWords>
    <WordPart from="¤" to="o" />
    <WordPart from="vv" to="w" />
    <WordPart from="IVI" to="M" />
    <WordPart from="lVI" to="M" />
    <WordPart from="IVl" to="M" />
    <WordPart from="lVl" to="M" />
  </PartialWords>
  <PartialLines>
    <LinePart from="Jel si to" to="Jesi li to" />
    <LinePart from="Jel' si to" to="Da li si to" />
    <LinePart from="jel si to" to="da li si to" />
    <LinePart from="jel' si to" to="jesi li to" />
    <LinePart from="Jel si ti" to="Da li si ti" />
    <LinePart from="Jel' si ti" to="Da li si ti" />
    <LinePart from="jel si ti" to="da li si ti" />
    <LinePart from="jel' si ti" to="da li si ti" />
    <LinePart from="jel ste " to="jeste li " />
    <LinePart from="Jel ste " to="Jeste li " />
    <LinePart from="jel' ste " to="jeste li " />
    <LinePart from="Jel' ste " to="Jeste li " />
    <LinePart from="od kako" to="otkako" />
  </PartialLines>
  <PartialLinesAlways />
  <BeginLines />
  <EndLines />
  <WholeLines />
  <RegularExpressions>
    <RegEx find="([a-zčđšž])Ij([a-zčđšž])" replaceWith="$1lj$2" />
    <RegEx find="([^A-ZČĐŠŽa-zčđšž])Ij(ubav|ud|ut)" replaceWith="$1lj$2" />
    <!-- kad ima razmak izmedju tagova </i> <i> -->
    <RegEx find="(&gt;) +(&lt;)" replaceWith="$1$2" />
    <!-- ',"' to '",' -->
    <RegEx find="(\w),&quot;(\s|$)" replaceWith="$1&quot;,$2" />
    <RegEx find=",\.{3}|\.{3},|\.{2} \." replaceWith="..." />
    <!-- "1 :", "2 :"... "n :" to "n:" -->
    <RegEx find="(\d) +: +(\D)" replaceWith="$1: $2" />
    <!-- Two or more consecutive "," to "..." -->
    <RegEx find=",{2,}" replaceWith="..." />
    <!-- Two or more consecutive "-" to "..." -->
    <RegEx find="-{2,}" replaceWith="..." />
    <RegEx find="([^().])\.{2}([^().:])" replaceWith="$1...$2" />
    <!-- separator stotica i decimalnog ostatka 1,499,000.00 -> 1.499.000,00 -->
    <RegEx find="([0-9]{3})\.([0-9]{2}[^0-9])" replaceWith="$1,$2" />
    <RegEx find="([0-9]),([0-9]{3}\D)" replaceWith="$1.$2" />
    <!-- Apostrophes -->
    <RegEx find="´´" replaceWith="&quot;" />
    <!-- <RegEx find="[´`]" replaceWith="'" /> -->
    <!-- <RegEx find="[“”]" replaceWith="&quot;" /> -->
    <RegEx find="''" replaceWith="&quot;" />
    <!-- Two or more consecutive '"' to one '"' -->
    <RegEx find="&quot;{2,}" replaceWith="&quot;" />
    <!-- Fix zero and capital 'o' ripping mistakes -->
    <RegEx find="([0-9])O" replaceWith="${1}0" />
    <RegEx find="([0-9])\.O" replaceWith="$1.0" />
    <RegEx find="\b0([A-Za-z])" replaceWith="O$1" />
    <!-- Brisanje crte - na početku 1. reda (i kada ima dva reda) -->
    <RegEx find="\A- ?([A-ZČĐŠŽa-zčđšž0-9„'&quot;]|\.{3})" replaceWith="$1" />
    <RegEx find="\A(&lt;[i|b|u]&gt;)- ?" replaceWith="$1" />
    <RegEx find="  - " replaceWith=" -" />
    <!-- Brisanje razmaka iza crte - na početku 2. reda -->
    <RegEx find="([\n](&lt;[i|b|u]&gt;)?)- ([0-9A-ZČĐŠŽčš„'&quot;&lt;])" replaceWith="$1-$3" />
    <!-- Korigovanje crte - kad je u sredini prvog reda -->
    <RegEx find="([.!?&quot;&gt;]) - ([A-ZČĐŠŽčš'&quot;&lt;])" replaceWith="$1 -$2" />
    <!-- Zatvoren tag pa razmak poslije crtice -->
    <RegEx find="(&gt;) - ([A-ZČĐŠŽčš„'&quot;])" replaceWith="$1 -$2" />
    <!-- Zatvoren tag pa crtica razmak -->
    <RegEx find="(&gt;)- ([A-ZČĐŠŽčš„'&quot;])" replaceWith="$1-$2" />
    <!-- Zagrada pa crtica razmak -->
    <RegEx find="\(- ([A-ZČĐŠŽčš„'&quot;])" replaceWith="(-$1" />
    <!-- Smart space after dot -->
    <!-- osim kad je zadnje t (rijec kolt) -->
    <RegEx find="([a-su-zá-úñä-ü])\.([^\s\n().:?!*^“”'&quot;&lt;])" replaceWith="$1. $2" />
    <!-- Oznaka za kalibar. Npr. "Colt .45" -->
    <!-- Da bi radilo, da bi ovaj razmak bio dozvoljen, u SW idite Alt+I i odcekirajte "Razmaci ispred tacke" -->
    <RegEx find="t\.([0-9][0-9])" replaceWith="t .$1" />
    <!-- Joey(j)a -->
    <RegEx find="([A-Z][a-z])eyj([a-z])" replaceWith="$1ey$2" />
    <!-- Sređuje zarez sa razmakom -->
    <RegEx find="([A-ZČĐŠŽa-zčđšžá-úñä-ü&quot;]),([^\s\n(),?!“&lt;])" replaceWith="$1, $2" />
    <RegEx find=" , " replaceWith=", " />
    <RegEx find=" ,([A-ZČĐŠŽa-zčđšž])" replaceWith=", $1" />
    <RegEx find=" ,$" replaceWith="," />
    <RegEx find="([?!])-" replaceWith="$1 -" />
    <!-- Space after last of some consecutive dots (eg. "...") -->
    <RegEx find="([a-zčđšž])(\.{3}|!)([a-zčđšž])" replaceWith="$1$2 $3" />
    <!-- Delete space after "..." that is at the beginning of the line. You may delete this line if you don't like it -->
    <!-- <RegEx find="^\.{3} +" replaceWith="..." /> -->
    <!-- "tekst ... tekst" mijenja u "tekst... tekst" -->
    <RegEx find="([A-ZČĐŠŽa-zčđšž]) \.{3} " replaceWith="$1... " />
    <RegEx find="(\S)\. &quot;" replaceWith="$1.&quot;" />
    <RegEx find="&quot; \." replaceWith="&quot;." />
    <RegEx find="(\S\.{3}) &quot;(\s|$)" replaceWith="$1&quot;$2" />
    <RegEx find=" \.{3}$" replaceWith="..." />
    <RegEx find="([a-zčđšž])( \.{3}|\.{2}$)" replaceWith="$1..." />
    <!-- Razmak ispred zagrade -->
    <RegEx find="([A-ZČĐŠŽa-zčđšž])\(" replaceWith="$1 (" />
    <!-- Razmak iza upitnika -->
    <RegEx find="\?([A-ZČĐŠŽčš])" replaceWith="? $1" />
    <RegEx find="(^|&gt;)\.{3} ([A-ZČĐŠŽčš])" replaceWith="$1...$2" />
    <!-- Brise ... kad je na poc. reda "... -->
    <RegEx find="^&quot;\.{3} " replaceWith="&quot;" />
    <RegEx find="([0-9])\$" replaceWith="$1 $" />
    <!-- ti š -> t š by Strider -->
    <!-- Zamijeni sva "**ti šu*" s "**t šu*" i "**ti še*" s "**t še*" -->
    <!-- <RegEx find="([a-z])ti (š+[eu])" replaceWith="$1t $2" /> -->
    <!-- <RegEx find="([A-Za-z])ti( |\r?\n)(š[eu])" replaceWith="$1t$2$3" /> -->
    <!-- <RegEx find="(?i)\b(ni)t (š[eu])" replaceWith="$1ti $2" /> -->
    <!-- Razmak poslije <i> i poslije .. -->
    <RegEx find="^(&lt;[i|b|u]&gt;) +" replaceWith="$1" />
    <RegEx find="^\.{2} +" replaceWith="..." />
    <!-- Razmak ? "</i> -->
    <RegEx find="([.?!]) +(&quot;&lt;)" replaceWith="$1$3" />
    <!-- Bez razmaka kod Npr.: -->
    <RegEx find="([Nn])pr\. +:" replaceWith="$1pr.:" />
    <RegEx find="\. ," replaceWith=".," />
    <RegEx find="([?!])\." replaceWith="$1" />
    <!-- Da ne kvari potpise sa ..:: -->
    <RegEx find="\.{3}::" replaceWith="..::" />
    <RegEx find="::\.{3}" replaceWith="::.." />
    <RegEx find="\.{2} ::" replaceWith="..::" />
    <!-- Skracenice bez razmaka -->
    <RegEx find="d\. o\.o\." replaceWith="d.o.o." />
    <!-- Kad red počinje sa ...pa malo slovo -->
    <!-- <RegEx find="^\.{3}([a-zčđšž&quot;&lt;])" replaceWith="$1" /> -->
    <!-- <RegEx find=" ([.?!])" replaceWith="$1" /> -->
  </RegularExpressions>
</OCRFixReplaceList>