1
0
mirror of https://github.com/RPCS3/soundtouch.git synced 2024-11-09 20:33:03 +01:00

Fixed integer overflow bug in integer versions of cross-correlation routines.

This commit is contained in:
oparviai 2014-01-06 19:40:40 +00:00
parent a61c28e36a
commit 746a90d610
3 changed files with 17 additions and 16 deletions

View File

@ -747,6 +747,7 @@ submitted bugfixes since SoundTouch v1.3.1: </p>
<li> Sandro Cumerlato</li>
<li> Justin Frankel </li>
<li> Jason Garland </li>
<li> Masa H. </li>
<li> Takashi Iwai </li>
<li> Mathias Möhl</li>
<li> Yuval Naveh </li>

View File

@ -742,12 +742,12 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare) co
for (i = 0; i < channels * overlapLength; i += 4)
{
corr += (mixingPos[i] * compare[i] +
mixingPos[i + 1] * compare[i + 1] +
mixingPos[i + 2] * compare[i + 2] +
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
corr += (mixingPos[i + 2] * compare[i + 2] +
mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
norm += (mixingPos[i] * mixingPos[i] +
mixingPos[i + 1] * mixingPos[i + 1] +
mixingPos[i + 2] * mixingPos[i + 2] +
mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
norm += (mixingPos[i + 2] * mixingPos[i + 2] +
mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits;
}

View File

@ -93,19 +93,19 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2) const
// _mm_add_pi32 : 2*32bit add
// _m_psrad : 32bit right-shift
temp = _mm_add_pi32(_mm_madd_pi16(pVec1[0], pVec2[0]),
_mm_madd_pi16(pVec1[1], pVec2[1]));
temp2 = _mm_add_pi32(_mm_madd_pi16(pVec1[0], pVec1[0]),
_mm_madd_pi16(pVec1[1], pVec1[1]));
accu = _mm_add_pi32(accu, _mm_sra_pi32(temp, shifter));
normaccu = _mm_add_pi32(normaccu, _mm_sra_pi32(temp2, shifter));
temp = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[0], pVec2[0]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[1], pVec2[1]), shifter));
temp2 = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[0], pVec1[0]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[1], pVec1[1]), shifter));
accu = _mm_add_pi32(accu, temp);
normaccu = _mm_add_pi32(normaccu, temp2);
temp = _mm_add_pi32(_mm_madd_pi16(pVec1[2], pVec2[2]),
_mm_madd_pi16(pVec1[3], pVec2[3]));
temp2 = _mm_add_pi32(_mm_madd_pi16(pVec1[2], pVec1[2]),
_mm_madd_pi16(pVec1[3], pVec1[3]));
accu = _mm_add_pi32(accu, _mm_sra_pi32(temp, shifter));
normaccu = _mm_add_pi32(normaccu, _mm_sra_pi32(temp2, shifter));
temp = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[2], pVec2[2]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[3], pVec2[3]), shifter));
temp2 = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[2], pVec1[2]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[3], pVec1[3]), shifter));
accu = _mm_add_pi32(accu, temp);
normaccu = _mm_add_pi32(normaccu, temp2);
pVec1 += 4;
pVec2 += 4;