From db04025351472a4349dc76dc9a3e9f5b6583e7d2 Mon Sep 17 00:00:00 2001 From: oparviai Date: Sat, 8 Aug 2015 21:00:15 +0000 Subject: [PATCH] - Redesigned quickseek algorithm for improved sound quality in quickseek mode - Adaptive integer divider scaling for improved sound quality when using integer processing - Version 1.9.1-pre --- README.html | 76 +++++---- include/SoundTouch.h | 60 +++---- source/SoundTouch/TDStretch.cpp | 238 ++++++++++++++++++++-------- source/SoundTouch/TDStretch.h | 43 +++-- source/SoundTouch/mmx_optimized.cpp | 24 ++- source/SoundTouch/sse_optimized.cpp | 4 +- 6 files changed, 293 insertions(+), 152 deletions(-) diff --git a/README.html b/README.html index ef9aa0e..ff5ad24 100644 --- a/README.html +++ b/README.html @@ -9,11 +9,11 @@ + -->
-

SoundTouch audio processing library v1.9

+

SoundTouch audio processing library v1.9.1-pre

SoundTouch library Copyright © Olli Parviainen 2001-2015


1. Introduction

@@ -60,10 +60,10 @@ the compilation, the target program will require additional vcomp dll library to properly run. In Visual C++ 9.0 these libraries can be found in the following folders.

In Visual Studio 2008, a SP1 version may be required for these libraries. In other VC++ versions the required library will be expectedly found in similar @@ -103,8 +103,8 @@ Notice that "configure" file is not available before running the

Builds the SoundTouch library & SoundStretch utility. You can - optionally add "-j" switch after "make" to speed up the compilation in - multi-core systems.

+ optionally add "-j" switch after "make" to speed up the compilation in + multi-core systems.

@@ -355,8 +355,8 @@ computation burden

3.5 Performance Optimizations

General optimizations:

The time-stretch routine has a 'quick' mode that substantially -speeds up the algorithm but may degrade the sound quality by a small -amount. This mode is activated by calling SoundTouch::setSetting() +speeds up the algorithm but may slightly compromise the sound quality. +This mode is activated by calling SoundTouch::setSetting() function with parameter  id of SETTING_USE_QUICKSEEK and value "1", i.e.

@@ -368,7 +368,7 @@ intrinsics, providing about a 3x processing speedup for x86 compatible processors vs. non-SIMD implementation:

@@ -395,17 +395,17 @@ This include for example multi-core embedded devices.

OpenMP parallel computation can be enabled before compiling SoundTouch library as follows:


4. SoundStretch audio processing utility @@ -566,18 +566,25 @@ this corresponds to lowering the pitch by -0.318 semitones:


5. Change History

5.1. SoundTouch library Change History

+

1.9.1-pre:

+

1.9:

@@ -816,7 +823,7 @@ submitted bugfixes:

  • David Clark
  • Patrick Colis
  • Miquel Colon
  • -
  • Jim Credland
  • +
  • Jim Credland
  • Sandro Cumerlato
  • Justin Frankel
  • Masa H.
  • @@ -827,10 +834,10 @@ submitted bugfixes:

  • Yuval Naveh
  • Paulo Pizarro
  • Blaise Potard
  • -
  • Michael Pruett
  • +
  • Michael Pruett
  • Rajeev Puran
  • -
  • RJ Ryan
  • -
  • John Sheehy
  • +
  • RJ Ryan
  • +
  • John Sheehy
  • Tim Shuttleworth
  • Albert Sirvent
  • John Stumpo
  • @@ -852,7 +859,8 @@ General Public License for more details.

    License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA


    +$Id$ +-->

    README.html file updated in May-2015

    diff --git a/include/SoundTouch.h b/include/SoundTouch.h index aad41d6..d87abee 100644 --- a/include/SoundTouch.h +++ b/include/SoundTouch.h @@ -79,10 +79,10 @@ namespace soundtouch { /// Soundtouch library version string -#define SOUNDTOUCH_VERSION "1.9.0" +#define SOUNDTOUCH_VERSION "1.9.1-pre" /// SoundTouch library version id -#define SOUNDTOUCH_VERSION_ID (10900) +#define SOUNDTOUCH_VERSION_ID (10901) // // Available setting IDs for the 'setSetting' & 'get_setting' functions: @@ -154,20 +154,20 @@ private: double virtualRate; /// Virtual pitch parameter. Effective rate & tempo are calculated from these parameters. - double virtualTempo; + double virtualTempo; /// Virtual pitch parameter. Effective rate & tempo are calculated from these parameters. - double virtualPitch; + double virtualPitch; /// Flag: Has sample rate been set? bool bSrateSet; - /// Accumulator for how many samples in total will be expected as output vs. samples put in, - /// considering current processing settings. - double samplesExpectedOut; + /// Accumulator for how many samples in total will be expected as output vs. samples put in, + /// considering current processing settings. + double samplesExpectedOut; - /// Accumulator for how many samples in total have been read out from the processing so far - long samplesOutput; + /// Accumulator for how many samples in total have been read out from the processing so far + long samplesOutput; /// Calculates effective rate & tempo valuescfrom 'virtualRate', 'virtualTempo' and /// 'virtualPitch' parameters. @@ -199,28 +199,28 @@ public: /// Sets new tempo control value. Normal tempo = 1.0, smaller values /// represent slower tempo, larger faster tempo. - void setTempo(double newTempo); + void setTempo(double newTempo); /// Sets new rate control value as a difference in percents compared /// to the original rate (-50 .. +100 %) - void setRateChange(double newRate); + void setRateChange(double newRate); /// Sets new tempo control value as a difference in percents compared /// to the original tempo (-50 .. +100 %) - void setTempoChange(double newTempo); + void setTempoChange(double newTempo); /// Sets new pitch control value. Original pitch = 1.0, smaller values /// represent lower pitches, larger values higher pitch. - void setPitch(double newPitch); + void setPitch(double newPitch); /// Sets pitch change in octaves compared to the original pitch /// (-1.00 .. +1.00) - void setPitchOctaves(double newPitch); + void setPitchOctaves(double newPitch); /// Sets pitch change in semi-tones compared to the original pitch /// (-12 .. +12) void setPitchSemiTones(int newPitch); - void setPitchSemiTones(double newPitch); + void setPitchSemiTones(double newPitch); /// Sets the number of channels, 1 = mono, 2 = stereo void setChannels(uint numChannels); @@ -247,22 +247,22 @@ public: ///< contains data for both channels. ); - /// Output samples from beginning of the sample buffer. Copies requested samples to - /// output buffer and removes them from the sample buffer. If there are less than - /// 'numsample' samples in the buffer, returns all that available. - /// - /// \return Number of samples returned. - virtual uint receiveSamples(SAMPLETYPE *output, ///< Buffer where to copy output samples. - uint maxSamples ///< How many samples to receive at max. - ); + /// Output samples from beginning of the sample buffer. Copies requested samples to + /// output buffer and removes them from the sample buffer. If there are less than + /// 'numsample' samples in the buffer, returns all that available. + /// + /// \return Number of samples returned. + virtual uint receiveSamples(SAMPLETYPE *output, ///< Buffer where to copy output samples. + uint maxSamples ///< How many samples to receive at max. + ); - /// Adjusts book-keeping so that given number of samples are removed from beginning of the - /// sample buffer without copying them anywhere. - /// - /// Used to reduce the number of samples in the buffer when accessing the sample buffer directly - /// with 'ptrBegin' function. - virtual uint receiveSamples(uint maxSamples ///< Remove this many samples from the beginning of pipe. - ); + /// Adjusts book-keeping so that given number of samples are removed from beginning of the + /// sample buffer without copying them anywhere. + /// + /// Used to reduce the number of samples in the buffer when accessing the sample buffer directly + /// with 'ptrBegin' function. + virtual uint receiveSamples(uint maxSamples ///< Remove this many samples from the beginning of pipe. + ); /// Clears all the samples in the object's output and internal processing /// buffers. diff --git a/source/SoundTouch/TDStretch.cpp b/source/SoundTouch/TDStretch.cpp index 9ad27aa..d42ee6d 100644 --- a/source/SoundTouch/TDStretch.cpp +++ b/source/SoundTouch/TDStretch.cpp @@ -63,7 +63,7 @@ using namespace soundtouch; *****************************************************************************/ // Table for the hierarchical mixing position seeking algorithm -static const short _scanOffsets[5][24]={ +const short _scanOffsets[5][24]={ { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806, 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0}, {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0, @@ -94,7 +94,9 @@ TDStretch::TDStretch() : FIFOProcessor(&outputBuffer) bAutoSeqSetting = true; bAutoSeekSetting = true; -// outDebt = 0; + maxnorm = 0; + maxnormf = 1e8; + skipFract = 0; tempo = 1.0f; @@ -250,7 +252,7 @@ int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos) if (bQuickSeek) { return seekBestOverlapPositionQuick(refPos); - } + } else { return seekBestOverlapPositionFull(refPos); @@ -282,7 +284,6 @@ inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, ui } - // Seeks for the optimal overlap-mixing position. The 'stereo' version of the // routine // @@ -336,6 +337,11 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos) } } } + +#ifdef SOUNDTOUCH_INTEGER_SAMPLES + adaptNormalizer(); +#endif + // clear cross correlation routine state if necessary (is so e.g. in MMX routines). clearCrossCorrState(); @@ -343,64 +349,161 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos) } -// Seeks for the optimal overlap-mixing position. The 'stereo' version of the -// routine +// Quick seek algorithm for improved runtime-performance: First roughly scans through the +// correlation area, and then scan surroundings of two best preliminary correlation candidates +// with improved precision // -// The best position is determined as the position where the two overlapped -// sample sequences are 'most alike', in terms of the highest cross-correlation -// value over the overlapping period -int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos) +// Based on testing: +// - This algorithm gives on average 99% as good match as the full algorith +// - this quick seek algorithm finds the best match on ~90% of cases +// - on those 10% of cases when this algorithm doesn't find best match, +// it still finds on average ~90% match vs. the best possible match +int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos) { - int j; +#define _MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define SCANSTEP 16 +#define SCANWIND 8 + int bestOffs; - double bestCorr, corr; - int scanCount, corrOffset, tempOffset; + int i; + int bestOffs2; + float bestCorr, corr; + float bestCorr2; + double norm; + + // note: 'float' types used in this function in case that the platform would need to use software-fp bestCorr = FLT_MIN; - bestOffs = _scanOffsets[0][0]; - corrOffset = 0; - tempOffset = 0; + bestOffs = SCANWIND; + bestCorr2 = FLT_MIN; + bestOffs2 = 0; - // Scans for the best correlation value using four-pass hierarchical search. + int best = 0; + + // Scans for the best correlation value by testing each possible position + // over the permitted range. Look for two best matches on the first pass to + // increase possibility of ideal match. // - // The look-up table 'scans' has hierarchical position adjusting steps. - // In first pass the routine searhes for the highest correlation with - // relatively coarse steps, then rescans the neighbourhood of the highest - // correlation with better resolution and so on. - for (scanCount = 0;scanCount < 4; scanCount ++) + // Begin from "SCANSTEP" instead of SCANWIND to make the calculation + // catch the 'middlepoint' of seekLength vector as that's the a-priori + // expected best match position + // + // Roughly: + // - 15% of cases find best result directly on the first round, + // - 75% cases find better match on 2nd round around the best match from 1st round + // - 10% cases find better match on 2nd round around the 2nd-best-match from 1st round + for (i = SCANSTEP; i < seekLength - SCANWIND - 1; i += SCANSTEP) { - j = 0; - while (_scanOffsets[scanCount][j]) + // Calculates correlation value for the mixing position corresponding + // to 'i' + corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm); + // heuristic rule to slightly favour values close to mid of the seek range + float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength; + corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp)); + + // Checks for the highest correlation value + if (corr > bestCorr) { - double norm; - tempOffset = corrOffset + _scanOffsets[scanCount][j]; - if (tempOffset >= seekLength) break; - - // Calculates correlation value for the mixing position corresponding - // to 'tempOffset' - corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer, norm); - // heuristic rule to slightly favour values close to mid of the range - double tmp = (double)(2 * tempOffset - seekLength) / seekLength; - corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp)); - - // Checks for the highest correlation value - if (corr > bestCorr) - { - bestCorr = corr; - bestOffs = tempOffset; - } - j ++; + // found new best match. keep the previous best as 2nd best match + bestCorr2 = bestCorr; + bestOffs2 = bestOffs; + bestCorr = corr; + bestOffs = i; + } + else if (corr > bestCorr2) + { + // not new best, but still new 2nd best match + bestCorr2 = corr; + bestOffs2 = i; } - corrOffset = bestOffs; } + + // Scans surroundings of the found best match with small stepping + int end = _MIN(bestOffs + SCANWIND + 1, seekLength); + for (i = bestOffs - SCANWIND; i < end; i++) + { + if (i == bestOffs) continue; // this offset already calculated, thus skip + + // Calculates correlation value for the mixing position corresponding + // to 'i' + corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm); + // heuristic rule to slightly favour values close to mid of the range + float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength; + corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp)); + + // Checks for the highest correlation value + if (corr > bestCorr) + { + bestCorr = corr; + bestOffs = i; + best = 1; + } + } + + // Scans surroundings of the 2nd best match with small stepping + end = _MIN(bestOffs2 + SCANWIND + 1, seekLength); + for (i = bestOffs2 - SCANWIND; i < end; i++) + { + if (i == bestOffs2) continue; // this offset already calculated, thus skip + + // Calculates correlation value for the mixing position corresponding + // to 'i' + corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm); + // heuristic rule to slightly favour values close to mid of the range + float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength; + corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp)); + + // Checks for the highest correlation value + if (corr > bestCorr) + { + bestCorr = corr; + bestOffs = i; + best = 2; + } + } + // clear cross correlation routine state if necessary (is so e.g. in MMX routines). clearCrossCorrState(); +#ifdef SOUNDTOUCH_INTEGER_SAMPLES + adaptNormalizer(); +#endif + return bestOffs; } + +/// For integer algorithm: adapt normalization factor divider with music so that +/// it'll not be pessimistically restrictive that can degrade quality on quieter sections +/// yet won't cause integer overflows either +void TDStretch::adaptNormalizer() +{ + // Do not adapt normalizer over too silent sequences to avoid averaging filter depleting to + // too low values during pauses in music + if ((maxnorm > 1000) || (maxnormf > 40000000)) + { + //norm averaging filter + maxnormf = 0.9f * maxnormf + 0.1f * (float)maxnorm; + + if ((maxnorm > 800000000) && (overlapDividerBitsNorm < 16)) + { + // large values, so increase divider + overlapDividerBitsNorm++; + if (maxnorm > 1600000000) overlapDividerBitsNorm++; // extra large value => extra increase + } + else if ((maxnormf < 1000000) && (overlapDividerBitsNorm > 0)) + { + // extra small values, decrease divider + overlapDividerBitsNorm--; + } + } + + maxnorm = 0; +} + + /// clear cross correlation routine state if necessary void TDStretch::clearCrossCorrState() { @@ -422,7 +525,7 @@ void TDStretch::calcSeqParameters() #define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) #define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW)) - // seek-window-ms setting values at above low & top tempo + // seek-window-ms setting values at above low & top tempoq #define AUTOSEEK_AT_MIN 25.0 #define AUTOSEEK_AT_MAX 15.0 #define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) @@ -736,13 +839,15 @@ void TDStretch::calculateOverlapLength(int aoverlapMs) // calculate overlap length so that it's power of 2 - thus it's easy to do // integer division by right-shifting. Term "-1" at end is to account for // the extra most significatnt bit left unused in result by signed multiplication - overlapDividerBits = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1; - if (overlapDividerBits > 9) overlapDividerBits = 9; - if (overlapDividerBits < 3) overlapDividerBits = 3; - newOvl = (int)pow(2.0, (int)overlapDividerBits + 1); // +1 => account for -1 above + overlapDividerBitsPure = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1; + if (overlapDividerBitsPure > 9) overlapDividerBitsPure = 9; + if (overlapDividerBitsPure < 3) overlapDividerBitsPure = 3; + newOvl = (int)pow(2.0, (int)overlapDividerBitsPure + 1); // +1 => account for -1 above acceptNewOverlapLength(newOvl); + overlapDividerBitsNorm = overlapDividerBitsPure; + // calculate sloping divider so that crosscorrelation operation won't // overflow 32-bit register. Max. sum of the crosscorrelation sum without // divider would be 2^30*(N^3-N)/3, where N = overlap length @@ -750,10 +855,10 @@ void TDStretch::calculateOverlapLength(int aoverlapMs) } -double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm) const +double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm) { long corr; - long lnorm; + unsigned long lnorm; int i; corr = lnorm = 0; @@ -763,15 +868,19 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do for (i = 0; i < channels * overlapLength; i += 4) { corr += (mixingPos[i] * compare[i] + - mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow + mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow corr += (mixingPos[i + 2] * compare[i + 2] + - mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits; + mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBitsNorm; lnorm += (mixingPos[i] * mixingPos[i] + - mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow + mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow lnorm += (mixingPos[i + 2] * mixingPos[i + 2] + - mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits; + mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBitsNorm; } + if (lnorm > maxnorm) + { + maxnorm = lnorm; + } // Normalize result by dividing by sqrt(norm) - this step is easiest // done using floating point operation norm = (double)lnorm; @@ -780,17 +889,17 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value -double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) const +double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) { long corr; - long lnorm; + unsigned long lnorm; int i; // cancel first normalizer tap from previous round lnorm = 0; for (i = 1; i <= channels; i ++) { - lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBits; + lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBitsNorm; } corr = 0; @@ -800,18 +909,23 @@ double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *c for (i = 0; i < channels * overlapLength; i += 4) { corr += (mixingPos[i] * compare[i] + - mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow + mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow corr += (mixingPos[i + 2] * compare[i + 2] + - mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits; + mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBitsNorm; } // update normalizer with last samples of this round for (int j = 0; j < channels; j ++) { i --; - lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBits; + lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBitsNorm; } + norm += (double)lnorm; + if (norm > maxnorm) + { + maxnorm = (unsigned long)norm; + } // Normalize result by dividing by sqrt(norm) - this step is easiest // done using floating point operation @@ -896,7 +1010,7 @@ void TDStretch::calculateOverlapLength(int overlapInMsec) /// Calculate cross-correlation -double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm) const +double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm) { double corr; double norm; @@ -927,7 +1041,7 @@ double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, do /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value -double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) const +double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) { double corr; int i; diff --git a/source/SoundTouch/TDStretch.h b/source/SoundTouch/TDStretch.h index 6400f05..046481b 100644 --- a/source/SoundTouch/TDStretch.h +++ b/source/SoundTouch/TDStretch.h @@ -112,39 +112,46 @@ class TDStretch : public FIFOProcessor protected: int channels; int sampleReq; - double tempo; - SAMPLETYPE *pMidBuffer; - SAMPLETYPE *pMidBufferUnaligned; int overlapLength; int seekLength; int seekWindowLength; - int overlapDividerBits; + int overlapDividerBitsNorm; + int overlapDividerBitsPure; int slopingDivider; - double nominalSkip; - double skipFract; - FIFOSampleBuffer outputBuffer; - FIFOSampleBuffer inputBuffer; - bool bQuickSeek; - int sampleRate; int sequenceMs; int seekWindowMs; int overlapMs; + + unsigned long maxnorm; + float maxnormf; + + double tempo; + double nominalSkip; + double skipFract; + + bool bQuickSeek; bool bAutoSeqSetting; bool bAutoSeekSetting; + SAMPLETYPE *pMidBuffer; + SAMPLETYPE *pMidBufferUnaligned; + + FIFOSampleBuffer outputBuffer; + FIFOSampleBuffer inputBuffer; + void acceptNewOverlapLength(int newOverlapLength); virtual void clearCrossCorrState(); void calculateOverlapLength(int overlapMs); - virtual double calcCrossCorr(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare, double &norm) const; - virtual double calcCrossCorrAccumulate(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare, double &norm) const; + virtual double calcCrossCorr(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare, double &norm); + virtual double calcCrossCorrAccumulate(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare, double &norm); virtual int seekBestOverlapPositionFull(const SAMPLETYPE *refPos); virtual int seekBestOverlapPositionQuick(const SAMPLETYPE *refPos); - int seekBestOverlapPosition(const SAMPLETYPE *refPos); + virtual int seekBestOverlapPosition(const SAMPLETYPE *refPos); virtual void overlapStereo(SAMPLETYPE *output, const SAMPLETYPE *input) const; virtual void overlapMono(SAMPLETYPE *output, const SAMPLETYPE *input) const; @@ -154,6 +161,8 @@ protected: void overlap(SAMPLETYPE *output, const SAMPLETYPE *input, uint ovlPos) const; void calcSeqParameters(); + void adaptNormalizer(); + /// Changes the tempo of the given sound samples. /// Returns amount of samples returned in the "output" buffer. @@ -249,8 +258,8 @@ public: class TDStretchMMX : public TDStretch { protected: - double calcCrossCorr(const short *mixingPos, const short *compare, double &norm) const; - double calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) const; + double calcCrossCorr(const short *mixingPos, const short *compare, double &norm); + double calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm); virtual void overlapStereo(short *output, const short *input) const; virtual void clearCrossCorrState(); }; @@ -262,8 +271,8 @@ public: class TDStretchSSE : public TDStretch { protected: - double calcCrossCorr(const float *mixingPos, const float *compare, double &norm) const; - double calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) const; + double calcCrossCorr(const float *mixingPos, const float *compare, double &norm); + double calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm); }; #endif /// SOUNDTOUCH_ALLOW_SSE diff --git a/source/SoundTouch/mmx_optimized.cpp b/source/SoundTouch/mmx_optimized.cpp index cb38d98..17fe108 100644 --- a/source/SoundTouch/mmx_optimized.cpp +++ b/source/SoundTouch/mmx_optimized.cpp @@ -68,7 +68,7 @@ using namespace soundtouch; // Calculates cross correlation of two buffers -double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &dnorm) const +double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &dnorm) { const __m64 *pVec1, *pVec2; __m64 shifter; @@ -79,7 +79,7 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &d pVec1 = (__m64*)pV1; pVec2 = (__m64*)pV2; - shifter = _m_from_int(overlapDividerBits); + shifter = _m_from_int(overlapDividerBitsNorm); normaccu = accu = _mm_setzero_si64(); // Process 4 parallel sets of 2 * stereo samples or 4 * mono samples @@ -123,6 +123,11 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &d // Clear MMS state _m_empty(); + if (norm > (long)maxnorm) + { + maxnorm = norm; + } + // Normalize result by dividing by sqrt(norm) - this step is easiest // done using floating point operation dnorm = (double)norm; @@ -134,7 +139,7 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &d /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value -double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, double &dnorm) const +double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, double &dnorm) { const __m64 *pVec1, *pVec2; __m64 shifter; @@ -146,13 +151,13 @@ double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, lnorm = 0; for (i = 1; i <= channels; i ++) { - lnorm -= (pV1[-i] * pV1[-i]) >> overlapDividerBits; + lnorm -= (pV1[-i] * pV1[-i]) >> overlapDividerBitsNorm; } pVec1 = (__m64*)pV1; pVec2 = (__m64*)pV2; - shifter = _m_from_int(overlapDividerBits); + shifter = _m_from_int(overlapDividerBitsNorm); accu = _mm_setzero_si64(); // Process 4 parallel sets of 2 * stereo samples or 4 * mono samples @@ -191,10 +196,15 @@ double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, pV1 = (short *)pVec1; for (int j = 1; j <= channels; j ++) { - lnorm += (pV1[-j] * pV1[-j]) >> overlapDividerBits; + lnorm += (pV1[-j] * pV1[-j]) >> overlapDividerBitsNorm; } dnorm += (double)lnorm; + if (lnorm > (long)maxnorm) + { + maxnorm = lnorm; + } + // Normalize result by dividing by sqrt(norm) - this step is easiest // done using floating point operation return (double)corr / sqrt((dnorm < 1e-9) ? 1.0 : dnorm); @@ -233,7 +243,7 @@ void TDStretchMMX::overlapStereo(short *output, const short *input) const // Overlaplength-division by shifter. "+1" is to account for "-1" deduced in // overlapDividerBits calculation earlier. - shifter = _m_from_int(overlapDividerBits + 1); + shifter = _m_from_int(overlapDividerBitsPure + 1); for (i = 0; i < overlapLength / 4; i ++) { diff --git a/source/SoundTouch/sse_optimized.cpp b/source/SoundTouch/sse_optimized.cpp index d1c728e..1e28bc9 100644 --- a/source/SoundTouch/sse_optimized.cpp +++ b/source/SoundTouch/sse_optimized.cpp @@ -71,7 +71,7 @@ using namespace soundtouch; #include // Calculates cross correlation of two buffers -double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &anorm) const +double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &anorm) { int i; const float *pVec1; @@ -183,7 +183,7 @@ double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &a -double TDStretchSSE::calcCrossCorrAccumulate(const float *pV1, const float *pV2, double &norm) const +double TDStretchSSE::calcCrossCorrAccumulate(const float *pV1, const float *pV2, double &norm) { // call usual calcCrossCorr function because SSE does not show big benefit of // accumulating "norm" value, and also the "norm" rolling algorithm would get