TDStretch.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808
  1. ////////////////////////////////////////////////////////////////////////////////
  2. ///
  3. /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo
  4. /// while maintaining the original pitch by using a time domain WSOLA-like
  5. /// method with several performance-increasing tweaks.
  6. ///
  7. /// Note : MMX optimized functions reside in a separate, platform-specific
  8. /// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
  9. ///
  10. /// Author : Copyright (c) Olli Parviainen
  11. /// Author e-mail : oparviai 'at' iki.fi
  12. /// SoundTouch WWW: http://www.surina.net/soundtouch
  13. ///
  14. ////////////////////////////////////////////////////////////////////////////////
  15. //
  16. // Last changed : $Date: 2012-11-08 20:53:01 +0200 (Thu, 08 Nov 2012) $
  17. // File revision : $Revision: 1.12 $
  18. //
  19. // $Id: TDStretch.cpp 160 2012-11-08 18:53:01Z oparviai $
  20. //
  21. ////////////////////////////////////////////////////////////////////////////////
  22. //
  23. // License :
  24. //
  25. // SoundTouch audio processing library
  26. // Copyright (c) Olli Parviainen
  27. //
  28. // This library is free software; you can redistribute it and/or
  29. // modify it under the terms of the GNU Lesser General Public
  30. // License as published by the Free Software Foundation; either
  31. // version 2.1 of the License, or (at your option) any later version.
  32. //
  33. // This library is distributed in the hope that it will be useful,
  34. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  35. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  36. // Lesser General Public License for more details.
  37. //
  38. // You should have received a copy of the GNU Lesser General Public
  39. // License along with this library; if not, write to the Free Software
  40. // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  41. //
  42. ////////////////////////////////////////////////////////////////////////////////
  43. #include <string.h>
  44. #include <limits.h>
  45. #include <assert.h>
  46. #include <math.h>
  47. #include <float.h>
  48. #include "STTypes.h"
  49. #include "cpu_detect.h"
  50. #include "TDStretch.h"
  51. #include <stdio.h>
  52. using namespace soundtouch;
  53. #define max(x, y) (((x) > (y)) ? (x) : (y))
  54. /*****************************************************************************
  55. *
  56. * Constant definitions
  57. *
  58. *****************************************************************************/
  59. // Table for the hierarchical mixing position seeking algorithm
  60. static const short _scanOffsets[5][24]={
  61. { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806,
  62. 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0},
  63. {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0,
  64. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
  65. { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0,
  66. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
  67. { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0,
  68. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
  69. { 121, 114, 97, 114, 98, 105, 108, 32, 104, 99, 117, 111,
  70. 116, 100, 110, 117, 111, 115, 0, 0, 0, 0, 0, 0}};
  71. /*****************************************************************************
  72. *
  73. * Implementation of the class 'TDStretch'
  74. *
  75. *****************************************************************************/
  76. TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
  77. {
  78. bQuickSeek = FALSE;
  79. channels = 2;
  80. pMidBuffer = NULL;
  81. pMidBufferUnaligned = NULL;
  82. overlapLength = 0;
  83. bAutoSeqSetting = TRUE;
  84. bAutoSeekSetting = TRUE;
  85. // outDebt = 0;
  86. skipFract = 0;
  87. tempo = 1.0f;
  88. setParameters(8000, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
  89. setTempo(1.0f);
  90. clear();
  91. }
  92. TDStretch::~TDStretch()
  93. {
  94. delete[] pMidBufferUnaligned;
  95. }
  96. // Sets routine control parameters. These control are certain time constants
  97. // defining how the sound is stretched to the desired duration.
  98. //
  99. // 'sampleRate' = sample rate of the sound
  100. // 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)
  101. // 'seekwindowMS' = seeking window length for scanning the best overlapping
  102. // position (default = 28 ms)
  103. // 'overlapMS' = overlapping length (default = 12 ms)
  104. void TDStretch::setParameters(int aSampleRate, int aSequenceMS,
  105. int aSeekWindowMS, int aOverlapMS)
  106. {
  107. // accept only positive parameter values - if zero or negative, use old values instead
  108. if (aSampleRate > 0) this->sampleRate = aSampleRate;
  109. if (aOverlapMS > 0) this->overlapMs = aOverlapMS;
  110. if (aSequenceMS > 0)
  111. {
  112. this->sequenceMs = aSequenceMS;
  113. bAutoSeqSetting = FALSE;
  114. }
  115. else if (aSequenceMS == 0)
  116. {
  117. // if zero, use automatic setting
  118. bAutoSeqSetting = TRUE;
  119. }
  120. if (aSeekWindowMS > 0)
  121. {
  122. this->seekWindowMs = aSeekWindowMS;
  123. bAutoSeekSetting = FALSE;
  124. }
  125. else if (aSeekWindowMS == 0)
  126. {
  127. // if zero, use automatic setting
  128. bAutoSeekSetting = TRUE;
  129. }
  130. calcSeqParameters();
  131. calculateOverlapLength(overlapMs);
  132. // set tempo to recalculate 'sampleReq'
  133. setTempo(tempo);
  134. }
  135. /// Get routine control parameters, see setParameters() function.
  136. /// Any of the parameters to this function can be NULL, in such case corresponding parameter
  137. /// value isn't returned.
  138. void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const
  139. {
  140. if (pSampleRate)
  141. {
  142. *pSampleRate = sampleRate;
  143. }
  144. if (pSequenceMs)
  145. {
  146. *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs;
  147. }
  148. if (pSeekWindowMs)
  149. {
  150. *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs;
  151. }
  152. if (pOverlapMs)
  153. {
  154. *pOverlapMs = overlapMs;
  155. }
  156. }
  157. // Overlaps samples in 'midBuffer' with the samples in 'pInput'
  158. void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
  159. {
  160. int i;
  161. SAMPLETYPE m1, m2;
  162. m1 = (SAMPLETYPE)0;
  163. m2 = (SAMPLETYPE)overlapLength;
  164. for (i = 0; i < overlapLength ; i ++)
  165. {
  166. pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
  167. m1 += 1;
  168. m2 -= 1;
  169. }
  170. }
  171. void TDStretch::clearMidBuffer()
  172. {
  173. memset(pMidBuffer, 0, 2 * sizeof(SAMPLETYPE) * overlapLength);
  174. }
  175. void TDStretch::clearInput()
  176. {
  177. inputBuffer.clear();
  178. clearMidBuffer();
  179. }
  180. // Clears the sample buffers
  181. void TDStretch::clear()
  182. {
  183. outputBuffer.clear();
  184. clearInput();
  185. }
  186. // Enables/disables the quick position seeking algorithm. Zero to disable, nonzero
  187. // to enable
  188. void TDStretch::enableQuickSeek(SBOOL enable)
  189. {
  190. bQuickSeek = enable;
  191. }
  192. // Returns nonzero if the quick seeking algorithm is enabled.
  193. SBOOL TDStretch::isQuickSeekEnabled() const
  194. {
  195. return bQuickSeek;
  196. }
  197. // Seeks for the optimal overlap-mixing position.
  198. int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
  199. {
  200. if (bQuickSeek)
  201. {
  202. return seekBestOverlapPositionQuick(refPos);
  203. }
  204. else
  205. {
  206. return seekBestOverlapPositionFull(refPos);
  207. }
  208. }
  209. // Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position
  210. // of 'ovlPos'.
  211. inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
  212. {
  213. if (channels == 2)
  214. {
  215. // stereo sound
  216. overlapStereo(pOutput, pInput + 2 * ovlPos);
  217. } else {
  218. // mono sound.
  219. overlapMono(pOutput, pInput + ovlPos);
  220. }
  221. }
  222. // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
  223. // routine
  224. //
  225. // The best position is determined as the position where the two overlapped
  226. // sample sequences are 'most alike', in terms of the highest cross-correlation
  227. // value over the overlapping period
  228. int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
  229. {
  230. int bestOffs;
  231. double bestCorr, corr;
  232. int i;
  233. bestCorr = FLT_MIN;
  234. bestOffs = 0;
  235. // Scans for the best correlation value by testing each possible position
  236. // over the permitted range.
  237. for (i = 0; i < seekLength; i ++)
  238. {
  239. // Calculates correlation value for the mixing position corresponding
  240. // to 'i'
  241. corr = calcCrossCorr(refPos + channels * i, pMidBuffer);
  242. // heuristic rule to slightly favour values close to mid of the range
  243. double tmp = (double)(2 * i - seekLength) / (double)seekLength;
  244. corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
  245. // Checks for the highest correlation value
  246. if (corr > bestCorr)
  247. {
  248. bestCorr = corr;
  249. bestOffs = i;
  250. }
  251. }
  252. // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
  253. clearCrossCorrState();
  254. return bestOffs;
  255. }
  256. // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
  257. // routine
  258. //
  259. // The best position is determined as the position where the two overlapped
  260. // sample sequences are 'most alike', in terms of the highest cross-correlation
  261. // value over the overlapping period
  262. int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
  263. {
  264. int j;
  265. int bestOffs;
  266. double bestCorr, corr;
  267. int scanCount, corrOffset, tempOffset;
  268. bestCorr = FLT_MIN;
  269. bestOffs = _scanOffsets[0][0];
  270. corrOffset = 0;
  271. tempOffset = 0;
  272. // Scans for the best correlation value using four-pass hierarchical search.
  273. //
  274. // The look-up table 'scans' has hierarchical position adjusting steps.
  275. // In first pass the routine searhes for the highest correlation with
  276. // relatively coarse steps, then rescans the neighbourhood of the highest
  277. // correlation with better resolution and so on.
  278. for (scanCount = 0;scanCount < 4; scanCount ++)
  279. {
  280. j = 0;
  281. while (_scanOffsets[scanCount][j])
  282. {
  283. tempOffset = corrOffset + _scanOffsets[scanCount][j];
  284. if (tempOffset >= seekLength) break;
  285. // Calculates correlation value for the mixing position corresponding
  286. // to 'tempOffset'
  287. corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer);
  288. // heuristic rule to slightly favour values close to mid of the range
  289. double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
  290. corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
  291. // Checks for the highest correlation value
  292. if (corr > bestCorr)
  293. {
  294. bestCorr = corr;
  295. bestOffs = tempOffset;
  296. }
  297. j ++;
  298. }
  299. corrOffset = bestOffs;
  300. }
  301. // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
  302. clearCrossCorrState();
  303. return bestOffs;
  304. }
  305. /// clear cross correlation routine state if necessary
  306. void TDStretch::clearCrossCorrState()
  307. {
  308. // default implementation is empty.
  309. }
  310. /// Calculates processing sequence length according to tempo setting
  311. void TDStretch::calcSeqParameters()
  312. {
  313. // Adjust tempo param according to tempo, so that variating processing sequence length is used
  314. // at varius tempo settings, between the given low...top limits
  315. #define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%)
  316. #define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%)
  317. // sequence-ms setting values at above low & top tempo
  318. #define AUTOSEQ_AT_MIN 125.0
  319. #define AUTOSEQ_AT_MAX 50.0
  320. #define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
  321. #define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))
  322. // seek-window-ms setting values at above low & top tempo
  323. #define AUTOSEEK_AT_MIN 25.0
  324. #define AUTOSEEK_AT_MAX 15.0
  325. #define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
  326. #define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))
  327. #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x)))
  328. double seq, seek;
  329. if (bAutoSeqSetting)
  330. {
  331. seq = AUTOSEQ_C + AUTOSEQ_K * tempo;
  332. seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN);
  333. sequenceMs = (int)(seq + 0.5);
  334. }
  335. if (bAutoSeekSetting)
  336. {
  337. seek = AUTOSEEK_C + AUTOSEEK_K * tempo;
  338. seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN);
  339. seekWindowMs = (int)(seek + 0.5);
  340. }
  341. // Update seek window lengths
  342. seekWindowLength = (sampleRate * sequenceMs) / 1000;
  343. if (seekWindowLength < 2 * overlapLength)
  344. {
  345. seekWindowLength = 2 * overlapLength;
  346. }
  347. seekLength = (sampleRate * seekWindowMs) / 1000;
  348. }
  349. // Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower
  350. // tempo, larger faster tempo.
  351. void TDStretch::setTempo(float newTempo)
  352. {
  353. int intskip;
  354. tempo = newTempo;
  355. // Calculate new sequence duration
  356. calcSeqParameters();
  357. // Calculate ideal skip length (according to tempo value)
  358. nominalSkip = tempo * (seekWindowLength - overlapLength);
  359. intskip = (int)(nominalSkip + 0.5f);
  360. // Calculate how many samples are needed in the 'inputBuffer' to
  361. // process another batch of samples
  362. //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2;
  363. sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength;
  364. }
  365. // Sets the number of channels, 1 = mono, 2 = stereo
  366. void TDStretch::setChannels(int numChannels)
  367. {
  368. assert(numChannels > 0);
  369. if (channels == numChannels) return;
  370. assert(numChannels == 1 || numChannels == 2);
  371. channels = numChannels;
  372. inputBuffer.setChannels(channels);
  373. outputBuffer.setChannels(channels);
  374. }
  375. // nominal tempo, no need for processing, just pass the samples through
  376. // to outputBuffer
  377. /*
  378. void TDStretch::processNominalTempo()
  379. {
  380. assert(tempo == 1.0f);
  381. if (bMidBufferDirty)
  382. {
  383. // If there are samples in pMidBuffer waiting for overlapping,
  384. // do a single sliding overlapping with them in order to prevent a
  385. // clicking distortion in the output sound
  386. if (inputBuffer.numSamples() < overlapLength)
  387. {
  388. // wait until we've got overlapLength input samples
  389. return;
  390. }
  391. // Mix the samples in the beginning of 'inputBuffer' with the
  392. // samples in 'midBuffer' using sliding overlapping
  393. overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0);
  394. outputBuffer.putSamples(overlapLength);
  395. inputBuffer.receiveSamples(overlapLength);
  396. clearMidBuffer();
  397. // now we've caught the nominal sample flow and may switch to
  398. // bypass mode
  399. }
  400. // Simply bypass samples from input to output
  401. outputBuffer.moveSamples(inputBuffer);
  402. }
  403. */
  404. #include <stdio.h>
  405. // Processes as many processing frames of the samples 'inputBuffer', store
  406. // the result into 'outputBuffer'
  407. void TDStretch::processSamples()
  408. {
  409. int ovlSkip, offset;
  410. int temp;
  411. /* Removed this small optimization - can introduce a click to sound when tempo setting
  412. crosses the nominal value
  413. if (tempo == 1.0f)
  414. {
  415. // tempo not changed from the original, so bypass the processing
  416. processNominalTempo();
  417. return;
  418. }
  419. */
  420. // Process samples as long as there are enough samples in 'inputBuffer'
  421. // to form a processing frame.
  422. while ((int)inputBuffer.numSamples() >= sampleReq)
  423. {
  424. // If tempo differs from the normal ('SCALE'), scan for the best overlapping
  425. // position
  426. offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
  427. // Mix the samples in the 'inputBuffer' at position of 'offset' with the
  428. // samples in 'midBuffer' using sliding overlapping
  429. // ... first partially overlap with the end of the previous sequence
  430. // (that's in 'midBuffer')
  431. overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
  432. outputBuffer.putSamples((uint)overlapLength);
  433. // ... then copy sequence samples from 'inputBuffer' to output:
  434. // length of sequence
  435. temp = (seekWindowLength - 2 * overlapLength);
  436. // crosscheck that we don't have buffer overflow...
  437. if ((int)inputBuffer.numSamples() < (offset + temp + overlapLength * 2))
  438. {
  439. continue; // just in case, shouldn't really happen
  440. }
  441. outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), (uint)temp);
  442. // Copies the end of the current sequence from 'inputBuffer' to
  443. // 'midBuffer' for being mixed with the beginning of the next
  444. // processing sequence and so on
  445. assert((offset + temp + overlapLength * 2) <= (int)inputBuffer.numSamples());
  446. memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp + overlapLength),
  447. channels * sizeof(SAMPLETYPE) * overlapLength);
  448. // Remove the processed samples from the input buffer. Update
  449. // the difference between integer & nominal skip step to 'skipFract'
  450. // in order to prevent the error from accumulating over time.
  451. skipFract += nominalSkip; // real skip size
  452. ovlSkip = (int)skipFract; // rounded to integer skip
  453. skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip
  454. inputBuffer.receiveSamples((uint)ovlSkip);
  455. }
  456. }
  457. // Adds 'numsamples' pcs of samples from the 'samples' memory position into
  458. // the input of the object.
  459. void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples)
  460. {
  461. // Add the samples into the input buffer
  462. inputBuffer.putSamples(samples, nSamples);
  463. // Process the samples in input buffer
  464. processSamples();
  465. }
  466. /// Set new overlap length parameter & reallocate RefMidBuffer if necessary.
  467. void TDStretch::acceptNewOverlapLength(int newOverlapLength)
  468. {
  469. int prevOvl;
  470. assert(newOverlapLength >= 0);
  471. prevOvl = overlapLength;
  472. overlapLength = newOverlapLength;
  473. if (overlapLength > prevOvl)
  474. {
  475. delete[] pMidBufferUnaligned;
  476. pMidBufferUnaligned = new SAMPLETYPE[overlapLength * 2 + 16 / sizeof(SAMPLETYPE)];
  477. // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency
  478. pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned);
  479. clearMidBuffer();
  480. }
  481. }
  482. // Operator 'new' is overloaded so that it automatically creates a suitable instance
  483. // depending on if we've a MMX/SSE/etc-capable CPU available or not.
  484. void * TDStretch::operator new(size_t s)
  485. {
  486. // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!
  487. ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");
  488. return newInstance();
  489. }
  490. TDStretch * TDStretch::newInstance()
  491. {
  492. uint uExtensions;
  493. uExtensions = detectCPUextensions();
  494. // Check if MMX/SSE instruction set extensions supported by CPU
  495. #ifdef SOUNDTOUCH_ALLOW_MMX
  496. // MMX routines available only with integer sample types
  497. if (uExtensions & SUPPORT_MMX)
  498. {
  499. return ::new TDStretchMMX;
  500. }
  501. else
  502. #endif // SOUNDTOUCH_ALLOW_MMX
  503. #ifdef SOUNDTOUCH_ALLOW_SSE
  504. if (uExtensions & SUPPORT_SSE)
  505. {
  506. // SSE support
  507. return ::new TDStretchSSE;
  508. }
  509. else
  510. #endif // SOUNDTOUCH_ALLOW_SSE
  511. {
  512. // ISA optimizations not supported, use plain C version
  513. return ::new TDStretch;
  514. }
  515. }
  516. //////////////////////////////////////////////////////////////////////////////
  517. //
  518. // Integer arithmetics specific algorithm implementations.
  519. //
  520. //////////////////////////////////////////////////////////////////////////////
  521. #ifdef SOUNDTOUCH_INTEGER_SAMPLES
  522. // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo'
  523. // version of the routine.
  524. void TDStretch::overlapStereo(short *poutput, const short *input) const
  525. {
  526. int i;
  527. short temp;
  528. int cnt2;
  529. for (i = 0; i < overlapLength ; i ++)
  530. {
  531. temp = (short)(overlapLength - i);
  532. cnt2 = 2 * i;
  533. poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength;
  534. poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;
  535. }
  536. }
  537. // Calculates the x having the closest 2^x value for the given value
  538. static int _getClosest2Power(double value)
  539. {
  540. return (int)(log(value) / log(2.0) + 0.5);
  541. }
  542. /// Calculates overlap period length in samples.
  543. /// Integer version rounds overlap length to closest power of 2
  544. /// for a divide scaling operation.
  545. void TDStretch::calculateOverlapLength(int aoverlapMs)
  546. {
  547. int newOvl;
  548. assert(aoverlapMs >= 0);
  549. // calculate overlap length so that it's power of 2 - thus it's easy to do
  550. // integer division by right-shifting. Term "-1" at end is to account for
  551. // the extra most significatnt bit left unused in result by signed multiplication
  552. overlapDividerBits = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1;
  553. if (overlapDividerBits > 9) overlapDividerBits = 9;
  554. if (overlapDividerBits < 3) overlapDividerBits = 3;
  555. newOvl = (int)pow(2.0, (int)overlapDividerBits + 1); // +1 => account for -1 above
  556. acceptNewOverlapLength(newOvl);
  557. // calculate sloping divider so that crosscorrelation operation won't
  558. // overflow 32-bit register. Max. sum of the crosscorrelation sum without
  559. // divider would be 2^30*(N^3-N)/3, where N = overlap length
  560. slopingDivider = (newOvl * newOvl - 1) / 3;
  561. }
  562. double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare) const
  563. {
  564. long corr;
  565. long norm;
  566. int i;
  567. corr = norm = 0;
  568. // Same routine for stereo and mono. For stereo, unroll loop for better
  569. // efficiency and gives slightly better resolution against rounding.
  570. // For mono it same routine, just unrolls loop by factor of 4
  571. for (i = 0; i < channels * overlapLength; i += 4)
  572. {
  573. corr += (mixingPos[i] * compare[i] +
  574. mixingPos[i + 1] * compare[i + 1] +
  575. mixingPos[i + 2] * compare[i + 2] +
  576. mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
  577. norm += (mixingPos[i] * mixingPos[i] +
  578. mixingPos[i + 1] * mixingPos[i + 1] +
  579. mixingPos[i + 2] * mixingPos[i + 2] +
  580. mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits;
  581. }
  582. // Normalize result by dividing by sqrt(norm) - this step is easiest
  583. // done using floating point operation
  584. if (norm == 0) norm = 1; // to avoid div by zero
  585. return (double)corr / sqrt((double)norm);
  586. }
  587. #endif // SOUNDTOUCH_INTEGER_SAMPLES
  588. //////////////////////////////////////////////////////////////////////////////
  589. //
  590. // Floating point arithmetics specific algorithm implementations.
  591. //
  592. #ifdef SOUNDTOUCH_FLOAT_SAMPLES
  593. // Overlaps samples in 'midBuffer' with the samples in 'pInput'
  594. void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
  595. {
  596. int i;
  597. float fScale;
  598. float f1;
  599. float f2;
  600. fScale = 1.0f / (float)overlapLength;
  601. f1 = 0;
  602. f2 = 1.0f;
  603. for (i = 0; i < 2 * (int)overlapLength ; i += 2)
  604. {
  605. pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2;
  606. pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2;
  607. f1 += fScale;
  608. f2 -= fScale;
  609. }
  610. }
  611. /// Calculates overlapInMsec period length in samples.
  612. void TDStretch::calculateOverlapLength(int overlapInMsec)
  613. {
  614. int newOvl;
  615. assert(overlapInMsec >= 0);
  616. newOvl = (sampleRate * overlapInMsec) / 1000;
  617. if (newOvl < 16) newOvl = 16;
  618. // must be divisible by 8
  619. newOvl -= newOvl % 8;
  620. acceptNewOverlapLength(newOvl);
  621. }
  622. double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare) const
  623. {
  624. double corr;
  625. double norm;
  626. int i;
  627. corr = norm = 0;
  628. // Same routine for stereo and mono. For Stereo, unroll by factor of 2.
  629. // For mono it's same routine yet unrollsd by factor of 4.
  630. for (i = 0; i < channels * overlapLength; i += 4)
  631. {
  632. corr += mixingPos[i] * compare[i] +
  633. mixingPos[i + 1] * compare[i + 1];
  634. norm += mixingPos[i] * mixingPos[i] +
  635. mixingPos[i + 1] * mixingPos[i + 1];
  636. // unroll the loop for better CPU efficiency:
  637. corr += mixingPos[i + 2] * compare[i + 2] +
  638. mixingPos[i + 3] * compare[i + 3];
  639. norm += mixingPos[i + 2] * mixingPos[i + 2] +
  640. mixingPos[i + 3] * mixingPos[i + 3];
  641. }
  642. if (norm < 1e-9) norm = 1.0; // to avoid div by zero
  643. return corr / sqrt(norm);
  644. }
  645. #endif // SOUNDTOUCH_FLOAT_SAMPLES