1
0
mirror of https://github.com/RPCS3/soundtouch.git synced 2024-11-10 04:42:50 +01:00

- Redesigned quickseek algorithm for improved sound quality in quickseek mode

- Adaptive integer divider scaling for improved sound quality when using integer processing
- Version 1.9.1-pre
This commit is contained in:
oparviai 2015-08-08 21:00:15 +00:00
parent c9507ff7f1
commit db04025351
6 changed files with 293 additions and 152 deletions

View File

@ -13,7 +13,7 @@
</head> </head>
<body class="normal"> <body class="normal">
<hr> <hr>
<h1>SoundTouch audio processing library v1.9</h1> <h1>SoundTouch audio processing library v1.9.1-pre</h1>
<p class="normal">SoundTouch library Copyright © Olli Parviainen 2001-2015</p> <p class="normal">SoundTouch library Copyright © Olli Parviainen 2001-2015</p>
<hr> <hr>
<h2>1. Introduction </h2> <h2>1. Introduction </h2>
@ -355,8 +355,8 @@ computation burden</td>
<h3>3.5 Performance Optimizations </h3> <h3>3.5 Performance Optimizations </h3>
<p><strong>General optimizations:</strong></p> <p><strong>General optimizations:</strong></p>
<p>The time-stretch routine has a 'quick' mode that substantially <p>The time-stretch routine has a 'quick' mode that substantially
speeds up the algorithm but may degrade the sound quality by a small speeds up the algorithm but may slightly compromise the sound quality.
amount. This mode is activated by calling SoundTouch::setSetting() This mode is activated by calling SoundTouch::setSetting()
function with parameter&nbsp; id of SETTING_USE_QUICKSEEK and value function with parameter&nbsp; id of SETTING_USE_QUICKSEEK and value
"1", i.e. </p> "1", i.e. </p>
<blockquote> <blockquote>
@ -566,6 +566,13 @@ this corresponds to lowering the pitch by -0.318 semitones:</p>
<hr> <hr>
<h2>5. Change History</h2> <h2>5. Change History</h2>
<h3>5.1. SoundTouch library Change History </h3> <h3>5.1. SoundTouch library Change History </h3>
<p><b>1.9.1-pre:</b></p>
<ul>
<li>Improved SoundTouch::flush() function so that it returns precisely the desired amount of samples for exact output duration control</li>
<li>Redesigned quickseek algorithm for improved sound quality when using the quickseek mode. The new quickseek algorithm can find 99% as good results as the default full-scan mode.</li>
<li>Added adaptive integer divider scaling for improved sound quality when using integer processing algorithm
</li>
</ul>
<p><b>1.9:</b></p> <p><b>1.9:</b></p>
<ul> <ul>
<li>Added support for parallel computation support via OpenMP primitives for better performance in multicore systems. <li>Added support for parallel computation support via OpenMP primitives for better performance in multicore systems.
@ -852,7 +859,8 @@ General Public License for more details.</p>
License along with this library; if not, write to the Free Software License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA</p> Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA</p>
<hr><!-- <hr><!--
$Id$ --> $Id$
-->
<p> <p>
<i>README.html file updated in May-2015</i></p> <i>README.html file updated in May-2015</i></p>
</body> </body>

View File

@ -79,10 +79,10 @@ namespace soundtouch
{ {
/// Soundtouch library version string /// Soundtouch library version string
#define SOUNDTOUCH_VERSION "1.9.0" #define SOUNDTOUCH_VERSION "1.9.1-pre"
/// SoundTouch library version id /// SoundTouch library version id
#define SOUNDTOUCH_VERSION_ID (10900) #define SOUNDTOUCH_VERSION_ID (10901)
// //
// Available setting IDs for the 'setSetting' & 'get_setting' functions: // Available setting IDs for the 'setSetting' & 'get_setting' functions:

View File

@ -63,7 +63,7 @@ using namespace soundtouch;
*****************************************************************************/ *****************************************************************************/
// Table for the hierarchical mixing position seeking algorithm // Table for the hierarchical mixing position seeking algorithm
static const short _scanOffsets[5][24]={ const short _scanOffsets[5][24]={
{ 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806, { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806,
868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0}, 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0},
{-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0, {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0,
@ -94,7 +94,9 @@ TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
bAutoSeqSetting = true; bAutoSeqSetting = true;
bAutoSeekSetting = true; bAutoSeekSetting = true;
// outDebt = 0; maxnorm = 0;
maxnormf = 1e8;
skipFract = 0; skipFract = 0;
tempo = 1.0f; tempo = 1.0f;
@ -282,7 +284,6 @@ inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, ui
} }
// Seeks for the optimal overlap-mixing position. The 'stereo' version of the // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
// routine // routine
// //
@ -336,6 +337,11 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
} }
} }
} }
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
adaptNormalizer();
#endif
// clear cross correlation routine state if necessary (is so e.g. in MMX routines). // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
clearCrossCorrState(); clearCrossCorrState();
@ -343,64 +349,161 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
} }
// Seeks for the optimal overlap-mixing position. The 'stereo' version of the // Quick seek algorithm for improved runtime-performance: First roughly scans through the
// routine // correlation area, and then scan surroundings of two best preliminary correlation candidates
// with improved precision
// //
// The best position is determined as the position where the two overlapped // Based on testing:
// sample sequences are 'most alike', in terms of the highest cross-correlation // - This algorithm gives on average 99% as good match as the full algorith
// value over the overlapping period // - this quick seek algorithm finds the best match on ~90% of cases
// - on those 10% of cases when this algorithm doesn't find best match,
// it still finds on average ~90% match vs. the best possible match
int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos) int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
{ {
int j; #define _MIN(a, b) (((a) < (b)) ? (a) : (b))
#define SCANSTEP 16
#define SCANWIND 8
int bestOffs; int bestOffs;
double bestCorr, corr; int i;
int scanCount, corrOffset, tempOffset; int bestOffs2;
float bestCorr, corr;
float bestCorr2;
double norm;
// note: 'float' types used in this function in case that the platform would need to use software-fp
bestCorr = FLT_MIN; bestCorr = FLT_MIN;
bestOffs = _scanOffsets[0][0]; bestOffs = SCANWIND;
corrOffset = 0; bestCorr2 = FLT_MIN;
tempOffset = 0; bestOffs2 = 0;
// Scans for the best correlation value using four-pass hierarchical search. int best = 0;
// Scans for the best correlation value by testing each possible position
// over the permitted range. Look for two best matches on the first pass to
// increase possibility of ideal match.
// //
// The look-up table 'scans' has hierarchical position adjusting steps. // Begin from "SCANSTEP" instead of SCANWIND to make the calculation
// In first pass the routine searhes for the highest correlation with // catch the 'middlepoint' of seekLength vector as that's the a-priori
// relatively coarse steps, then rescans the neighbourhood of the highest // expected best match position
// correlation with better resolution and so on. //
for (scanCount = 0;scanCount < 4; scanCount ++) // Roughly:
// - 15% of cases find best result directly on the first round,
// - 75% cases find better match on 2nd round around the best match from 1st round
// - 10% cases find better match on 2nd round around the 2nd-best-match from 1st round
for (i = SCANSTEP; i < seekLength - SCANWIND - 1; i += SCANSTEP)
{ {
j = 0; // Calculates correlation value for the mixing position corresponding
while (_scanOffsets[scanCount][j]) // to 'i'
corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
// heuristic rule to slightly favour values close to mid of the seek range
float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
// Checks for the highest correlation value
if (corr > bestCorr)
{ {
double norm; // found new best match. keep the previous best as 2nd best match
tempOffset = corrOffset + _scanOffsets[scanCount][j]; bestCorr2 = bestCorr;
if (tempOffset >= seekLength) break; bestOffs2 = bestOffs;
bestCorr = corr;
bestOffs = i;
}
else if (corr > bestCorr2)
{
// not new best, but still new 2nd best match
bestCorr2 = corr;
bestOffs2 = i;
}
}
// Scans surroundings of the found best match with small stepping
int end = _MIN(bestOffs + SCANWIND + 1, seekLength);
for (i = bestOffs - SCANWIND; i < end; i++)
{
if (i == bestOffs) continue; // this offset already calculated, thus skip
// Calculates correlation value for the mixing position corresponding // Calculates correlation value for the mixing position corresponding
// to 'tempOffset' // to 'i'
corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer, norm); corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
// heuristic rule to slightly favour values close to mid of the range // heuristic rule to slightly favour values close to mid of the range
double tmp = (double)(2 * tempOffset - seekLength) / seekLength; float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp)); corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
// Checks for the highest correlation value // Checks for the highest correlation value
if (corr > bestCorr) if (corr > bestCorr)
{ {
bestCorr = corr; bestCorr = corr;
bestOffs = tempOffset; bestOffs = i;
best = 1;
} }
j ++;
} }
corrOffset = bestOffs;
// Scans surroundings of the 2nd best match with small stepping
end = _MIN(bestOffs2 + SCANWIND + 1, seekLength);
for (i = bestOffs2 - SCANWIND; i < end; i++)
{
if (i == bestOffs2) continue; // this offset already calculated, thus skip
// Calculates correlation value for the mixing position corresponding
// to 'i'
corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
// heuristic rule to slightly favour values close to mid of the range
float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
// Checks for the highest correlation value
if (corr > bestCorr)
{
bestCorr = corr;
bestOffs = i;
best = 2;
} }
}
// clear cross correlation routine state if necessary (is so e.g. in MMX routines). // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
clearCrossCorrState(); clearCrossCorrState();
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
adaptNormalizer();
#endif
return bestOffs; return bestOffs;
} }
/// For integer algorithm: adapt normalization factor divider with music so that
/// it'll not be pessimistically restrictive that can degrade quality on quieter sections
/// yet won't cause integer overflows either
void TDStretch::adaptNormalizer()
{
// Do not adapt normalizer over too silent sequences to avoid averaging filter depleting to
// too low values during pauses in music
if ((maxnorm > 1000) || (maxnormf > 40000000))
{
//norm averaging filter
maxnormf = 0.9f * maxnormf + 0.1f * (float)maxnorm;
if ((maxnorm > 800000000) && (overlapDividerBitsNorm < 16))
{
// large values, so increase divider
overlapDividerBitsNorm++;
if (maxnorm > 1600000000) overlapDividerBitsNorm++; // extra large value => extra increase
}
else if ((maxnormf < 1000000) && (overlapDividerBitsNorm > 0))
{
// extra small values, decrease divider
overlapDividerBitsNorm--;
}
}
maxnorm = 0;
}
/// clear cross correlation routine state if necessary /// clear cross correlation routine state if necessary
void TDStretch::clearCrossCorrState() void TDStretch::clearCrossCorrState()
{ {
@ -422,7 +525,7 @@ void TDStretch::calcSeqParameters()
#define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) #define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
#define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW)) #define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))
// seek-window-ms setting values at above low & top tempo // seek-window-ms setting values at above low & top tempoq
#define AUTOSEEK_AT_MIN 25.0 #define AUTOSEEK_AT_MIN 25.0
#define AUTOSEEK_AT_MAX 15.0 #define AUTOSEEK_AT_MAX 15.0
#define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) #define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
@ -736,13 +839,15 @@ void TDStretch::calculateOverlapLength(int aoverlapMs)
// calculate overlap length so that it's power of 2 - thus it's easy to do // calculate overlap length so that it's power of 2 - thus it's easy to do
// integer division by right-shifting. Term "-1" at end is to account for // integer division by right-shifting. Term "-1" at end is to account for
// the extra most significatnt bit left unused in result by signed multiplication // the extra most significatnt bit left unused in result by signed multiplication
overlapDividerBits = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1; overlapDividerBitsPure = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1;
if (overlapDividerBits > 9) overlapDividerBits = 9; if (overlapDividerBitsPure > 9) overlapDividerBitsPure = 9;
if (overlapDividerBits < 3) overlapDividerBits = 3; if (overlapDividerBitsPure < 3) overlapDividerBitsPure = 3;
newOvl = (int)pow(2.0, (int)overlapDividerBits + 1); // +1 => account for -1 above newOvl = (int)pow(2.0, (int)overlapDividerBitsPure + 1); // +1 => account for -1 above
acceptNewOverlapLength(newOvl); acceptNewOverlapLength(newOvl);
overlapDividerBitsNorm = overlapDividerBitsPure;
// calculate sloping divider so that crosscorrelation operation won't // calculate sloping divider so that crosscorrelation operation won't
// overflow 32-bit register. Max. sum of the crosscorrelation sum without // overflow 32-bit register. Max. sum of the crosscorrelation sum without
// divider would be 2^30*(N^3-N)/3, where N = overlap length // divider would be 2^30*(N^3-N)/3, where N = overlap length
@ -750,10 +855,10 @@ void TDStretch::calculateOverlapLength(int aoverlapMs)
} }
double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm) const double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm)
{ {
long corr; long corr;
long lnorm; unsigned long lnorm;
int i; int i;
corr = lnorm = 0; corr = lnorm = 0;
@ -763,15 +868,19 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do
for (i = 0; i < channels * overlapLength; i += 4) for (i = 0; i < channels * overlapLength; i += 4)
{ {
corr += (mixingPos[i] * compare[i] + corr += (mixingPos[i] * compare[i] +
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow
corr += (mixingPos[i + 2] * compare[i + 2] + corr += (mixingPos[i + 2] * compare[i + 2] +
mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits; mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBitsNorm;
lnorm += (mixingPos[i] * mixingPos[i] + lnorm += (mixingPos[i] * mixingPos[i] +
mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow
lnorm += (mixingPos[i + 2] * mixingPos[i + 2] + lnorm += (mixingPos[i + 2] * mixingPos[i + 2] +
mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits; mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBitsNorm;
} }
if (lnorm > maxnorm)
{
maxnorm = lnorm;
}
// Normalize result by dividing by sqrt(norm) - this step is easiest // Normalize result by dividing by sqrt(norm) - this step is easiest
// done using floating point operation // done using floating point operation
norm = (double)lnorm; norm = (double)lnorm;
@ -780,17 +889,17 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do
/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) const double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm)
{ {
long corr; long corr;
long lnorm; unsigned long lnorm;
int i; int i;
// cancel first normalizer tap from previous round // cancel first normalizer tap from previous round
lnorm = 0; lnorm = 0;
for (i = 1; i <= channels; i ++) for (i = 1; i <= channels; i ++)
{ {
lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBits; lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBitsNorm;
} }
corr = 0; corr = 0;
@ -800,18 +909,23 @@ double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *c
for (i = 0; i < channels * overlapLength; i += 4) for (i = 0; i < channels * overlapLength; i += 4)
{ {
corr += (mixingPos[i] * compare[i] + corr += (mixingPos[i] * compare[i] +
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow
corr += (mixingPos[i + 2] * compare[i + 2] + corr += (mixingPos[i + 2] * compare[i + 2] +
mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits; mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBitsNorm;
} }
// update normalizer with last samples of this round // update normalizer with last samples of this round
for (int j = 0; j < channels; j ++) for (int j = 0; j < channels; j ++)
{ {
i --; i --;
lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBits; lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBitsNorm;
} }
norm += (double)lnorm; norm += (double)lnorm;
if (norm > maxnorm)
{
maxnorm = (unsigned long)norm;
}
// Normalize result by dividing by sqrt(norm) - this step is easiest // Normalize result by dividing by sqrt(norm) - this step is easiest
// done using floating point operation // done using floating point operation
@ -896,7 +1010,7 @@ void TDStretch::calculateOverlapLength(int overlapInMsec)
/// Calculate cross-correlation /// Calculate cross-correlation
double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm) const double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm)
{ {
double corr; double corr;
double norm; double norm;
@ -927,7 +1041,7 @@ double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, do
/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) const double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm)
{ {
double corr; double corr;
int i; int i;

View File

@ -112,39 +112,46 @@ class TDStretch : public FIFOProcessor
protected: protected:
int channels; int channels;
int sampleReq; int sampleReq;
double tempo;
SAMPLETYPE *pMidBuffer;
SAMPLETYPE *pMidBufferUnaligned;
int overlapLength; int overlapLength;
int seekLength; int seekLength;
int seekWindowLength; int seekWindowLength;
int overlapDividerBits; int overlapDividerBitsNorm;
int overlapDividerBitsPure;
int slopingDivider; int slopingDivider;
double nominalSkip;
double skipFract;
FIFOSampleBuffer outputBuffer;
FIFOSampleBuffer inputBuffer;
bool bQuickSeek;
int sampleRate; int sampleRate;
int sequenceMs; int sequenceMs;
int seekWindowMs; int seekWindowMs;
int overlapMs; int overlapMs;
unsigned long maxnorm;
float maxnormf;
double tempo;
double nominalSkip;
double skipFract;
bool bQuickSeek;
bool bAutoSeqSetting; bool bAutoSeqSetting;
bool bAutoSeekSetting; bool bAutoSeekSetting;
SAMPLETYPE *pMidBuffer;
SAMPLETYPE *pMidBufferUnaligned;
FIFOSampleBuffer outputBuffer;
FIFOSampleBuffer inputBuffer;
void acceptNewOverlapLength(int newOverlapLength); void acceptNewOverlapLength(int newOverlapLength);
virtual void clearCrossCorrState(); virtual void clearCrossCorrState();
void calculateOverlapLength(int overlapMs); void calculateOverlapLength(int overlapMs);
virtual double calcCrossCorr(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare, double &norm) const; virtual double calcCrossCorr(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare, double &norm);
virtual double calcCrossCorrAccumulate(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare, double &norm) const; virtual double calcCrossCorrAccumulate(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare, double &norm);
virtual int seekBestOverlapPositionFull(const SAMPLETYPE *refPos); virtual int seekBestOverlapPositionFull(const SAMPLETYPE *refPos);
virtual int seekBestOverlapPositionQuick(const SAMPLETYPE *refPos); virtual int seekBestOverlapPositionQuick(const SAMPLETYPE *refPos);
int seekBestOverlapPosition(const SAMPLETYPE *refPos); virtual int seekBestOverlapPosition(const SAMPLETYPE *refPos);
virtual void overlapStereo(SAMPLETYPE *output, const SAMPLETYPE *input) const; virtual void overlapStereo(SAMPLETYPE *output, const SAMPLETYPE *input) const;
virtual void overlapMono(SAMPLETYPE *output, const SAMPLETYPE *input) const; virtual void overlapMono(SAMPLETYPE *output, const SAMPLETYPE *input) const;
@ -154,6 +161,8 @@ protected:
void overlap(SAMPLETYPE *output, const SAMPLETYPE *input, uint ovlPos) const; void overlap(SAMPLETYPE *output, const SAMPLETYPE *input, uint ovlPos) const;
void calcSeqParameters(); void calcSeqParameters();
void adaptNormalizer();
/// Changes the tempo of the given sound samples. /// Changes the tempo of the given sound samples.
/// Returns amount of samples returned in the "output" buffer. /// Returns amount of samples returned in the "output" buffer.
@ -249,8 +258,8 @@ public:
class TDStretchMMX : public TDStretch class TDStretchMMX : public TDStretch
{ {
protected: protected:
double calcCrossCorr(const short *mixingPos, const short *compare, double &norm) const; double calcCrossCorr(const short *mixingPos, const short *compare, double &norm);
double calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) const; double calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm);
virtual void overlapStereo(short *output, const short *input) const; virtual void overlapStereo(short *output, const short *input) const;
virtual void clearCrossCorrState(); virtual void clearCrossCorrState();
}; };
@ -262,8 +271,8 @@ public:
class TDStretchSSE : public TDStretch class TDStretchSSE : public TDStretch
{ {
protected: protected:
double calcCrossCorr(const float *mixingPos, const float *compare, double &norm) const; double calcCrossCorr(const float *mixingPos, const float *compare, double &norm);
double calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) const; double calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm);
}; };
#endif /// SOUNDTOUCH_ALLOW_SSE #endif /// SOUNDTOUCH_ALLOW_SSE

View File

@ -68,7 +68,7 @@ using namespace soundtouch;
// Calculates cross correlation of two buffers // Calculates cross correlation of two buffers
double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &dnorm) const double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &dnorm)
{ {
const __m64 *pVec1, *pVec2; const __m64 *pVec1, *pVec2;
__m64 shifter; __m64 shifter;
@ -79,7 +79,7 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &d
pVec1 = (__m64*)pV1; pVec1 = (__m64*)pV1;
pVec2 = (__m64*)pV2; pVec2 = (__m64*)pV2;
shifter = _m_from_int(overlapDividerBits); shifter = _m_from_int(overlapDividerBitsNorm);
normaccu = accu = _mm_setzero_si64(); normaccu = accu = _mm_setzero_si64();
// Process 4 parallel sets of 2 * stereo samples or 4 * mono samples // Process 4 parallel sets of 2 * stereo samples or 4 * mono samples
@ -123,6 +123,11 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &d
// Clear MMS state // Clear MMS state
_m_empty(); _m_empty();
if (norm > (long)maxnorm)
{
maxnorm = norm;
}
// Normalize result by dividing by sqrt(norm) - this step is easiest // Normalize result by dividing by sqrt(norm) - this step is easiest
// done using floating point operation // done using floating point operation
dnorm = (double)norm; dnorm = (double)norm;
@ -134,7 +139,7 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &d
/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, double &dnorm) const double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, double &dnorm)
{ {
const __m64 *pVec1, *pVec2; const __m64 *pVec1, *pVec2;
__m64 shifter; __m64 shifter;
@ -146,13 +151,13 @@ double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2,
lnorm = 0; lnorm = 0;
for (i = 1; i <= channels; i ++) for (i = 1; i <= channels; i ++)
{ {
lnorm -= (pV1[-i] * pV1[-i]) >> overlapDividerBits; lnorm -= (pV1[-i] * pV1[-i]) >> overlapDividerBitsNorm;
} }
pVec1 = (__m64*)pV1; pVec1 = (__m64*)pV1;
pVec2 = (__m64*)pV2; pVec2 = (__m64*)pV2;
shifter = _m_from_int(overlapDividerBits); shifter = _m_from_int(overlapDividerBitsNorm);
accu = _mm_setzero_si64(); accu = _mm_setzero_si64();
// Process 4 parallel sets of 2 * stereo samples or 4 * mono samples // Process 4 parallel sets of 2 * stereo samples or 4 * mono samples
@ -191,10 +196,15 @@ double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2,
pV1 = (short *)pVec1; pV1 = (short *)pVec1;
for (int j = 1; j <= channels; j ++) for (int j = 1; j <= channels; j ++)
{ {
lnorm += (pV1[-j] * pV1[-j]) >> overlapDividerBits; lnorm += (pV1[-j] * pV1[-j]) >> overlapDividerBitsNorm;
} }
dnorm += (double)lnorm; dnorm += (double)lnorm;
if (lnorm > (long)maxnorm)
{
maxnorm = lnorm;
}
// Normalize result by dividing by sqrt(norm) - this step is easiest // Normalize result by dividing by sqrt(norm) - this step is easiest
// done using floating point operation // done using floating point operation
return (double)corr / sqrt((dnorm < 1e-9) ? 1.0 : dnorm); return (double)corr / sqrt((dnorm < 1e-9) ? 1.0 : dnorm);
@ -233,7 +243,7 @@ void TDStretchMMX::overlapStereo(short *output, const short *input) const
// Overlaplength-division by shifter. "+1" is to account for "-1" deduced in // Overlaplength-division by shifter. "+1" is to account for "-1" deduced in
// overlapDividerBits calculation earlier. // overlapDividerBits calculation earlier.
shifter = _m_from_int(overlapDividerBits + 1); shifter = _m_from_int(overlapDividerBitsPure + 1);
for (i = 0; i < overlapLength / 4; i ++) for (i = 0; i < overlapLength / 4; i ++)
{ {

View File

@ -71,7 +71,7 @@ using namespace soundtouch;
#include <math.h> #include <math.h>
// Calculates cross correlation of two buffers // Calculates cross correlation of two buffers
double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &anorm) const double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &anorm)
{ {
int i; int i;
const float *pVec1; const float *pVec1;
@ -183,7 +183,7 @@ double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &a
double TDStretchSSE::calcCrossCorrAccumulate(const float *pV1, const float *pV2, double &norm) const double TDStretchSSE::calcCrossCorrAccumulate(const float *pV1, const float *pV2, double &norm)
{ {
// call usual calcCrossCorr function because SSE does not show big benefit of // call usual calcCrossCorr function because SSE does not show big benefit of
// accumulating "norm" value, and also the "norm" rolling algorithm would get // accumulating "norm" value, and also the "norm" rolling algorithm would get