/////////////////////////////////////////////////////////////////////////////// /// /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo /// while maintaining the original pitch by using a time domain WSOLA-like /// method with several performance-increasing tweaks. /// /// Notes : MMX optimized functions reside in a separate, platform-specific /// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'. /// /// This source file contains OpenMP optimizations that allow speeding up the /// corss-correlation algorithm by executing it in several threads / CPU cores /// in parallel. See the following article link for more detailed discussion /// about SoundTouch OpenMP optimizations: /// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices /// /// Author : Copyright (c) Olli Parviainen /// Author e-mail : oparviai 'at' iki.fi /// SoundTouch WWW: http://www.surina.net/soundtouch /// //////////////////////////////////////////////////////////////////////////////// // // License : // // SoundTouch audio processing library // Copyright (c) Olli Parviainen // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////////////////////// #include #include #include #include #include #include "STTypes.h" #include "cpu_detect.h" #include "TDStretch.h" using namespace soundtouch; #define max(x, y) (((x) > (y)) ? (x) : (y)) /***************************************************************************** * * Constant definitions * *****************************************************************************/ // Table for the hierarchical mixing position seeking algorithm const short _scanOffsets[5][24]={ { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806, 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0}, {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, { 121, 114, 97, 114, 98, 105, 108, 32, 104, 99, 117, 111, 116, 100, 110, 117, 111, 115, 0, 0, 0, 0, 0, 0}}; /***************************************************************************** * * Implementation of the class 'TDStretch' * *****************************************************************************/ TDStretch::TDStretch() : FIFOProcessor(&outputBuffer) { bQuickSeek = false; channels = 2; pMidBuffer = NULL; pMidBufferUnaligned = NULL; overlapLength = 0; bAutoSeqSetting = true; bAutoSeekSetting = true; tempo = 1.0f; setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS); setTempo(1.0f); clear(); } TDStretch::~TDStretch() { delete[] pMidBufferUnaligned; } // Sets routine control parameters. These control are certain time constants // defining how the sound is stretched to the desired duration. // // 'sampleRate' = sample rate of the sound // 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms) // 'seekwindowMS' = seeking window length for scanning the best overlapping // position (default = 28 ms) // 'overlapMS' = overlapping length (default = 12 ms) void TDStretch::setParameters(int aSampleRate, int aSequenceMS, int aSeekWindowMS, int aOverlapMS) { // accept only positive parameter values - if zero or negative, use old values instead if (aSampleRate > 0) { if (aSampleRate > 192000) ST_THROW_RT_ERROR("Error: Excessive samplerate"); this->sampleRate = aSampleRate; } if (aOverlapMS > 0) this->overlapMs = aOverlapMS; if (aSequenceMS > 0) { this->sequenceMs = aSequenceMS; bAutoSeqSetting = false; } else if (aSequenceMS == 0) { // if zero, use automatic setting bAutoSeqSetting = true; } if (aSeekWindowMS > 0) { this->seekWindowMs = aSeekWindowMS; bAutoSeekSetting = false; } else if (aSeekWindowMS == 0) { // if zero, use automatic setting bAutoSeekSetting = true; } calcSeqParameters(); calculateOverlapLength(overlapMs); // set tempo to recalculate 'sampleReq' setTempo(tempo); } /// Get routine control parameters, see setParameters() function. /// Any of the parameters to this function can be NULL, in such case corresponding parameter /// value isn't returned. void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const { if (pSampleRate) { *pSampleRate = sampleRate; } if (pSequenceMs) { *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs; } if (pSeekWindowMs) { *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs; } if (pOverlapMs) { *pOverlapMs = overlapMs; } } // Overlaps samples in 'midBuffer' with the samples in 'pInput' void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const { int i; SAMPLETYPE m1, m2; m1 = (SAMPLETYPE)0; m2 = (SAMPLETYPE)overlapLength; for (i = 0; i < overlapLength ; i ++) { pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength; m1 += 1; m2 -= 1; } } void TDStretch::clearMidBuffer() { memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength); } void TDStretch::clearInput() { inputBuffer.clear(); clearMidBuffer(); isBeginning = true; maxnorm = 0; maxnormf = 1e8; skipFract = 0; } // Clears the sample buffers void TDStretch::clear() { outputBuffer.clear(); clearInput(); } // Enables/disables the quick position seeking algorithm. Zero to disable, nonzero // to enable void TDStretch::enableQuickSeek(bool enable) { bQuickSeek = enable; } // Returns nonzero if the quick seeking algorithm is enabled. bool TDStretch::isQuickSeekEnabled() const { return bQuickSeek; } // Seeks for the optimal overlap-mixing position. int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos) { if (bQuickSeek) { return seekBestOverlapPositionQuick(refPos); } else { return seekBestOverlapPositionFull(refPos); } } // Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position // of 'ovlPos'. inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const { #ifndef USE_MULTICH_ALWAYS if (channels == 1) { // mono sound. overlapMono(pOutput, pInput + ovlPos); } else if (channels == 2) { // stereo sound overlapStereo(pOutput, pInput + 2 * ovlPos); } else #endif // USE_MULTICH_ALWAYS { assert(channels > 0); overlapMulti(pOutput, pInput + channels * ovlPos); } } // Seeks for the optimal overlap-mixing position. The 'stereo' version of the // routine // // The best position is determined as the position where the two overlapped // sample sequences are 'most alike', in terms of the highest cross-correlation // value over the overlapping period int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos) { int bestOffs; double bestCorr; int i; double norm; bestCorr = -FLT_MAX; bestOffs = 0; // Scans for the best correlation value by testing each possible position // over the permitted range. bestCorr = calcCrossCorr(refPos, pMidBuffer, norm); bestCorr = (bestCorr + 0.1) * 0.75; #pragma omp parallel for for (i = 1; i < seekLength; i ++) { double corr; // Calculates correlation value for the mixing position corresponding to 'i' #if defined(_OPENMP) || defined(ST_SIMD_AVOID_UNALIGNED) // in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't // iterate the loop in sequential order // in SIMD mode, avoid accumulator version to allow avoiding unaligned positions corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm); #else // In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same // as "calcCrossCorr", but saves time by reusing & updating previously stored // "norm" value corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm); #endif // heuristic rule to slightly favour values close to mid of the range double tmp = (double)(2 * i - seekLength) / (double)seekLength; corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp)); // Checks for the highest correlation value if (corr > bestCorr) { // For optimal performance, enter critical section only in case that best value found. // in such case repeat 'if' condition as it's possible that parallel execution may have // updated the bestCorr value in the mean time #pragma omp critical if (corr > bestCorr) { bestCorr = corr; bestOffs = i; } } } #ifdef SOUNDTOUCH_INTEGER_SAMPLES adaptNormalizer(); #endif // clear cross correlation routine state if necessary (is so e.g. in MMX routines). clearCrossCorrState(); return bestOffs; } // Quick seek algorithm for improved runtime-performance: First roughly scans through the // correlation area, and then scan surroundings of two best preliminary correlation candidates // with improved precision // // Based on testing: // - This algorithm gives on average 99% as good match as the full algorithm // - this quick seek algorithm finds the best match on ~90% of cases // - on those 10% of cases when this algorithm doesn't find best match, // it still finds on average ~90% match vs. the best possible match int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos) { #define _MIN(a, b) (((a) < (b)) ? (a) : (b)) #define SCANSTEP 16 #define SCANWIND 8 int bestOffs; int i; int bestOffs2; float bestCorr, corr; float bestCorr2; double norm; // note: 'float' types used in this function in case that the platform would need to use software-fp bestCorr = bestCorr2 = -FLT_MAX; bestOffs = bestOffs2 = SCANWIND; // Scans for the best correlation value by testing each possible position // over the permitted range. Look for two best matches on the first pass to // increase possibility of ideal match. // // Begin from "SCANSTEP" instead of SCANWIND to make the calculation // catch the 'middlepoint' of seekLength vector as that's the a-priori // expected best match position // // Roughly: // - 15% of cases find best result directly on the first round, // - 75% cases find better match on 2nd round around the best match from 1st round // - 10% cases find better match on 2nd round around the 2nd-best-match from 1st round for (i = SCANSTEP; i < seekLength - SCANWIND - 1; i += SCANSTEP) { // Calculates correlation value for the mixing position corresponding // to 'i' corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm); // heuristic rule to slightly favour values close to mid of the seek range float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength; corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp)); // Checks for the highest correlation value if (corr > bestCorr) { // found new best match. keep the previous best as 2nd best match bestCorr2 = bestCorr; bestOffs2 = bestOffs; bestCorr = corr; bestOffs = i; } else if (corr > bestCorr2) { // not new best, but still new 2nd best match bestCorr2 = corr; bestOffs2 = i; } } // Scans surroundings of the found best match with small stepping int end = _MIN(bestOffs + SCANWIND + 1, seekLength); for (i = bestOffs - SCANWIND; i < end; i++) { if (i == bestOffs) continue; // this offset already calculated, thus skip // Calculates correlation value for the mixing position corresponding // to 'i' corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm); // heuristic rule to slightly favour values close to mid of the range float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength; corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp)); // Checks for the highest correlation value if (corr > bestCorr) { bestCorr = corr; bestOffs = i; } } // Scans surroundings of the 2nd best match with small stepping end = _MIN(bestOffs2 + SCANWIND + 1, seekLength); for (i = bestOffs2 - SCANWIND; i < end; i++) { if (i == bestOffs2) continue; // this offset already calculated, thus skip // Calculates correlation value for the mixing position corresponding // to 'i' corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm); // heuristic rule to slightly favour values close to mid of the range float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength; corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp)); // Checks for the highest correlation value if (corr > bestCorr) { bestCorr = corr; bestOffs = i; } } // clear cross correlation routine state if necessary (is so e.g. in MMX routines). clearCrossCorrState(); #ifdef SOUNDTOUCH_INTEGER_SAMPLES adaptNormalizer(); #endif return bestOffs; } /// For integer algorithm: adapt normalization factor divider with music so that /// it'll not be pessimistically restrictive that can degrade quality on quieter sections /// yet won't cause integer overflows either void TDStretch::adaptNormalizer() { // Do not adapt normalizer over too silent sequences to avoid averaging filter depleting to // too low values during pauses in music if ((maxnorm > 1000) || (maxnormf > 40000000)) { //norm averaging filter maxnormf = 0.9f * maxnormf + 0.1f * (float)maxnorm; if ((maxnorm > 800000000) && (overlapDividerBitsNorm < 16)) { // large values, so increase divider overlapDividerBitsNorm++; if (maxnorm > 1600000000) overlapDividerBitsNorm++; // extra large value => extra increase } else if ((maxnormf < 1000000) && (overlapDividerBitsNorm > 0)) { // extra small values, decrease divider overlapDividerBitsNorm--; } } maxnorm = 0; } /// clear cross correlation routine state if necessary void TDStretch::clearCrossCorrState() { // default implementation is empty. } /// Calculates processing sequence length according to tempo setting void TDStretch::calcSeqParameters() { // Adjust tempo param according to tempo, so that variating processing sequence length is used // at various tempo settings, between the given low...top limits #define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%) #define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%) // sequence-ms setting values at above low & top tempo #define AUTOSEQ_AT_MIN 90.0 #define AUTOSEQ_AT_MAX 40.0 #define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) #define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW)) // seek-window-ms setting values at above low & top tempoq #define AUTOSEEK_AT_MIN 20.0 #define AUTOSEEK_AT_MAX 15.0 #define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) #define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW)) #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x))) double seq, seek; if (bAutoSeqSetting) { seq = AUTOSEQ_C + AUTOSEQ_K * tempo; seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN); sequenceMs = (int)(seq + 0.5); } if (bAutoSeekSetting) { seek = AUTOSEEK_C + AUTOSEEK_K * tempo; seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN); seekWindowMs = (int)(seek + 0.5); } // Update seek window lengths seekWindowLength = (sampleRate * sequenceMs) / 1000; if (seekWindowLength < 2 * overlapLength) { seekWindowLength = 2 * overlapLength; } seekLength = (sampleRate * seekWindowMs) / 1000; } // Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower // tempo, larger faster tempo. void TDStretch::setTempo(double newTempo) { int intskip; tempo = newTempo; // Calculate new sequence duration calcSeqParameters(); // Calculate ideal skip length (according to tempo value) nominalSkip = tempo * (seekWindowLength - overlapLength); intskip = (int)(nominalSkip + 0.5); // Calculate how many samples are needed in the 'inputBuffer' to // process another batch of samples //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2; sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength; } // Sets the number of channels, 1 = mono, 2 = stereo void TDStretch::setChannels(int numChannels) { if (!verifyNumberOfChannels(numChannels) || (channels == numChannels)) return; channels = numChannels; inputBuffer.setChannels(channels); outputBuffer.setChannels(channels); // re-init overlap/buffer overlapLength=0; setParameters(sampleRate); } // nominal tempo, no need for processing, just pass the samples through // to outputBuffer /* void TDStretch::processNominalTempo() { assert(tempo == 1.0f); if (bMidBufferDirty) { // If there are samples in pMidBuffer waiting for overlapping, // do a single sliding overlapping with them in order to prevent a // clicking distortion in the output sound if (inputBuffer.numSamples() < overlapLength) { // wait until we've got overlapLength input samples return; } // Mix the samples in the beginning of 'inputBuffer' with the // samples in 'midBuffer' using sliding overlapping overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0); outputBuffer.putSamples(overlapLength); inputBuffer.receiveSamples(overlapLength); clearMidBuffer(); // now we've caught the nominal sample flow and may switch to // bypass mode } // Simply bypass samples from input to output outputBuffer.moveSamples(inputBuffer); } */ // Processes as many processing frames of the samples 'inputBuffer', store // the result into 'outputBuffer' void TDStretch::processSamples() { int ovlSkip; int offset = 0; int temp; /* Removed this small optimization - can introduce a click to sound when tempo setting crosses the nominal value if (tempo == 1.0f) { // tempo not changed from the original, so bypass the processing processNominalTempo(); return; } */ // Process samples as long as there are enough samples in 'inputBuffer' // to form a processing frame. while ((int)inputBuffer.numSamples() >= sampleReq) { if (isBeginning == false) { // apart from the very beginning of the track, // scan for the best overlapping position & do overlap-add offset = seekBestOverlapPosition(inputBuffer.ptrBegin()); // Mix the samples in the 'inputBuffer' at position of 'offset' with the // samples in 'midBuffer' using sliding overlapping // ... first partially overlap with the end of the previous sequence // (that's in 'midBuffer') overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset); outputBuffer.putSamples((uint)overlapLength); offset += overlapLength; } else { // Adjust processing offset at beginning of track by not perform initial overlapping // and compensating that in the 'input buffer skip' calculation isBeginning = false; int skip = (int)(tempo * overlapLength + 0.5 * seekLength + 0.5); #ifdef ST_SIMD_AVOID_UNALIGNED // in SIMD mode, round the skip amount to value corresponding to aligned memory address if (channels == 1) { skip &= -4; } else if (channels == 2) { skip &= -2; } #endif skipFract -= skip; assert(nominalSkip >= -skipFract); } // ... then copy sequence samples from 'inputBuffer' to output: // crosscheck that we don't have buffer overflow... if ((int)inputBuffer.numSamples() < (offset + seekWindowLength - overlapLength)) { continue; // just in case, shouldn't really happen } // length of sequence temp = (seekWindowLength - 2 * overlapLength); outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * offset, (uint)temp); // Copies the end of the current sequence from 'inputBuffer' to // 'midBuffer' for being mixed with the beginning of the next // processing sequence and so on assert((offset + temp + overlapLength) <= (int)inputBuffer.numSamples()); memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp), channels * sizeof(SAMPLETYPE) * overlapLength); // Remove the processed samples from the input buffer. Update // the difference between integer & nominal skip step to 'skipFract' // in order to prevent the error from accumulating over time. skipFract += nominalSkip; // real skip size ovlSkip = (int)skipFract; // rounded to integer skip skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip inputBuffer.receiveSamples((uint)ovlSkip); } } // Adds 'numsamples' pcs of samples from the 'samples' memory position into // the input of the object. void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples) { // Add the samples into the input buffer inputBuffer.putSamples(samples, nSamples); // Process the samples in input buffer processSamples(); } /// Set new overlap length parameter & reallocate RefMidBuffer if necessary. void TDStretch::acceptNewOverlapLength(int newOverlapLength) { int prevOvl; assert(newOverlapLength >= 0); prevOvl = overlapLength; overlapLength = newOverlapLength; if (overlapLength > prevOvl) { delete[] pMidBufferUnaligned; pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)]; // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned); clearMidBuffer(); } } // Operator 'new' is overloaded so that it automatically creates a suitable instance // depending on if we've a MMX/SSE/etc-capable CPU available or not. void * TDStretch::operator new(size_t s) { // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead! ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!"); return newInstance(); } TDStretch * TDStretch::newInstance() { uint uExtensions; uExtensions = detectCPUextensions(); // Check if MMX/SSE instruction set extensions supported by CPU #ifdef SOUNDTOUCH_ALLOW_MMX // MMX routines available only with integer sample types if (uExtensions & SUPPORT_MMX) { return ::new TDStretchMMX; } else #endif // SOUNDTOUCH_ALLOW_MMX #ifdef SOUNDTOUCH_ALLOW_SSE if (uExtensions & SUPPORT_SSE) { // SSE support return ::new TDStretchSSE; } else #endif // SOUNDTOUCH_ALLOW_SSE { // ISA optimizations not supported, use plain C version return ::new TDStretch; } } ////////////////////////////////////////////////////////////////////////////// // // Integer arithmetic specific algorithm implementations. // ////////////////////////////////////////////////////////////////////////////// #ifdef SOUNDTOUCH_INTEGER_SAMPLES // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo' // version of the routine. void TDStretch::overlapStereo(short *poutput, const short *input) const { int i; short temp; int cnt2; for (i = 0; i < overlapLength ; i ++) { temp = (short)(overlapLength - i); cnt2 = 2 * i; poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength; poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength; } } // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi' // version of the routine. void TDStretch::overlapMulti(short *poutput, const short *input) const { short m1; int i = 0; for (m1 = 0; m1 < overlapLength; m1 ++) { short m2 = (short)(overlapLength - m1); for (int c = 0; c < channels; c ++) { poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength; i++; } } } // Calculates the x having the closest 2^x value for the given value static int _getClosest2Power(double value) { return (int)(log(value) / log(2.0) + 0.5); } /// Calculates overlap period length in samples. /// Integer version rounds overlap length to closest power of 2 /// for a divide scaling operation. void TDStretch::calculateOverlapLength(int aoverlapMs) { int newOvl; assert(aoverlapMs >= 0); // calculate overlap length so that it's power of 2 - thus it's easy to do // integer division by right-shifting. Term "-1" at end is to account for // the extra most significatnt bit left unused in result by signed multiplication overlapDividerBitsPure = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1; if (overlapDividerBitsPure > 9) overlapDividerBitsPure = 9; if (overlapDividerBitsPure < 3) overlapDividerBitsPure = 3; newOvl = (int)pow(2.0, (int)overlapDividerBitsPure + 1); // +1 => account for -1 above acceptNewOverlapLength(newOvl); overlapDividerBitsNorm = overlapDividerBitsPure; // calculate sloping divider so that crosscorrelation operation won't // overflow 32-bit register. Max. sum of the crosscorrelation sum without // divider would be 2^30*(N^3-N)/3, where N = overlap length slopingDivider = (newOvl * newOvl - 1) / 3; } double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm) { long corr; unsigned long lnorm; int i; #ifdef ST_SIMD_AVOID_UNALIGNED // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary if (((ulongptr)mixingPos) & 15) return -1e50; #endif // hint compiler autovectorization that loop length is divisible by 8 int ilength = (channels * overlapLength) & -8; corr = lnorm = 0; // Same routine for stereo and mono for (i = 0; i < ilength; i += 2) { corr += (mixingPos[i] * compare[i] + mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; lnorm += (mixingPos[i] * mixingPos[i] + mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm; // do intermediate scalings to avoid integer overflow } if (lnorm > maxnorm) { // modify 'maxnorm' inside critical section to avoid multi-access conflict if in OpenMP mode #pragma omp critical if (lnorm > maxnorm) { maxnorm = lnorm; } } // Normalize result by dividing by sqrt(norm) - this step is easiest // done using floating point operation norm = (double)lnorm; return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm); } /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) { long corr; long lnorm; int i; // hint compiler autovectorization that loop length is divisible by 8 int ilength = (channels * overlapLength) & -8; // cancel first normalizer tap from previous round lnorm = 0; for (i = 1; i <= channels; i ++) { lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBitsNorm; } corr = 0; // Same routine for stereo and mono. for (i = 0; i < ilength; i += 2) { corr += (mixingPos[i] * compare[i] + mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; } // update normalizer with last samples of this round for (int j = 0; j < channels; j ++) { i --; lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBitsNorm; } norm += (double)lnorm; if (norm > maxnorm) { maxnorm = (unsigned long)norm; } // Normalize result by dividing by sqrt(norm) - this step is easiest // done using floating point operation return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm); } #endif // SOUNDTOUCH_INTEGER_SAMPLES ////////////////////////////////////////////////////////////////////////////// // // Floating point arithmetic specific algorithm implementations. // #ifdef SOUNDTOUCH_FLOAT_SAMPLES // Overlaps samples in 'midBuffer' with the samples in 'pInput' void TDStretch::overlapStereo(float *pOutput, const float *pInput) const { int i; float fScale; float f1; float f2; fScale = 1.0f / (float)overlapLength; f1 = 0; f2 = 1.0f; for (i = 0; i < 2 * (int)overlapLength ; i += 2) { pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2; pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2; f1 += fScale; f2 -= fScale; } } // Overlaps samples in 'midBuffer' with the samples in 'input'. void TDStretch::overlapMulti(float *pOutput, const float *pInput) const { int i; float fScale; float f1; float f2; fScale = 1.0f / (float)overlapLength; f1 = 0; f2 = 1.0f; i=0; for (int i2 = 0; i2 < overlapLength; i2 ++) { // note: Could optimize this slightly by taking into account that always channels > 2 for (int c = 0; c < channels; c ++) { pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2; i++; } f1 += fScale; f2 -= fScale; } } /// Calculates overlapInMsec period length in samples. void TDStretch::calculateOverlapLength(int overlapInMsec) { int newOvl; assert(overlapInMsec >= 0); newOvl = (sampleRate * overlapInMsec) / 1000; if (newOvl < 16) newOvl = 16; // must be divisible by 8 newOvl -= newOvl % 8; acceptNewOverlapLength(newOvl); } /// Calculate cross-correlation double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm) { float corr; float norm; int i; #ifdef ST_SIMD_AVOID_UNALIGNED // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary if (((ulongptr)mixingPos) & 15) return -1e50; #endif // hint compiler autovectorization that loop length is divisible by 8 int ilength = (channels * overlapLength) & -8; corr = norm = 0; // Same routine for stereo and mono for (i = 0; i < ilength; i ++) { corr += mixingPos[i] * compare[i]; norm += mixingPos[i] * mixingPos[i]; } anorm = norm; return corr / sqrt((norm < 1e-9 ? 1.0 : norm)); } /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) { float corr; int i; corr = 0; // cancel first normalizer tap from previous round for (i = 1; i <= channels; i ++) { norm -= mixingPos[-i] * mixingPos[-i]; } // hint compiler autovectorization that loop length is divisible by 8 int ilength = (channels * overlapLength) & -8; // Same routine for stereo and mono for (i = 0; i < ilength; i ++) { corr += mixingPos[i] * compare[i]; } // update normalizer with last samples of this round for (int j = 0; j < channels; j ++) { i --; norm += mixingPos[i] * mixingPos[i]; } return corr / sqrt((norm < 1e-9 ? 1.0 : norm)); } #endif // SOUNDTOUCH_FLOAT_SAMPLES