BPMDetect.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. ////////////////////////////////////////////////////////////////////////////////
  2. ///
  3. /// Beats-per-minute (BPM) detection routine.
  4. ///
  5. /// The beat detection algorithm works as follows:
  6. /// - Use function 'inputSamples' to input a chunks of samples to the class for
  7. /// analysis. It's a good idea to enter a large sound file or stream in smallish
  8. /// chunks of around few kilosamples in order not to extinguish too much RAM memory.
  9. /// - Inputted sound data is decimated to approx 500 Hz to reduce calculation burden,
  10. /// which is basically ok as low (bass) frequencies mostly determine the beat rate.
  11. /// Simple averaging is used for anti-alias filtering because the resulting signal
  12. /// quality isn't of that high importance.
  13. /// - Decimated sound data is enveloped, i.e. the amplitude shape is detected by
  14. /// taking absolute value that's smoothed by sliding average. Signal levels that
  15. /// are below a couple of times the general RMS amplitude level are cut away to
  16. /// leave only notable peaks there.
  17. /// - Repeating sound patterns (e.g. beats) are detected by calculating short-term
  18. /// autocorrelation function of the enveloped signal.
  19. /// - After whole sound data file has been analyzed as above, the bpm level is
  20. /// detected by function 'getBpm' that finds the highest peak of the autocorrelation
  21. /// function, calculates it's precise location and converts this reading to bpm's.
  22. ///
  23. /// Author : Copyright (c) Olli Parviainen
  24. /// Author e-mail : oparviai 'at' iki.fi
  25. /// SoundTouch WWW: http://www.surina.net/soundtouch
  26. ///
  27. ////////////////////////////////////////////////////////////////////////////////
  28. //
  29. // Last changed : $Date: 2012-08-30 22:45:25 +0300 (Thu, 30 Aug 2012) $
  30. // File revision : $Revision: 4 $
  31. //
  32. // $Id: BPMDetect.cpp 149 2012-08-30 19:45:25Z oparviai $
  33. //
  34. ////////////////////////////////////////////////////////////////////////////////
  35. //
  36. // License :
  37. //
  38. // SoundTouch audio processing library
  39. // Copyright (c) Olli Parviainen
  40. //
  41. // This library is free software; you can redistribute it and/or
  42. // modify it under the terms of the GNU Lesser General Public
  43. // License as published by the Free Software Foundation; either
  44. // version 2.1 of the License, or (at your option) any later version.
  45. //
  46. // This library is distributed in the hope that it will be useful,
  47. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  48. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  49. // Lesser General Public License for more details.
  50. //
  51. // You should have received a copy of the GNU Lesser General Public
  52. // License along with this library; if not, write to the Free Software
  53. // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  54. //
  55. ////////////////////////////////////////////////////////////////////////////////
  56. #include <math.h>
  57. #include <assert.h>
  58. #include <string.h>
  59. #include <stdio.h>
  60. #include "FIFOSampleBuffer.h"
  61. #include "PeakFinder.h"
  62. #include "BPMDetect.h"
  63. using namespace soundtouch;
  64. #define INPUT_BLOCK_SAMPLES 2048
  65. #define DECIMATED_BLOCK_SAMPLES 256
  66. /// decay constant for calculating RMS volume sliding average approximation
  67. /// (time constant is about 10 sec)
  68. const float avgdecay = 0.99986f;
  69. /// Normalization coefficient for calculating RMS sliding average approximation.
  70. const float avgnorm = (1 - avgdecay);
  71. ////////////////////////////////////////////////////////////////////////////////
  72. // Enable following define to create bpm analysis file:
  73. // #define _CREATE_BPM_DEBUG_FILE
  74. #ifdef _CREATE_BPM_DEBUG_FILE
  75. #define DEBUGFILE_NAME "c:\\temp\\soundtouch-bpm-debug.txt"
  76. static void _SaveDebugData(const float *data, int minpos, int maxpos, double coeff)
  77. {
  78. FILE *fptr = fopen(DEBUGFILE_NAME, "wt");
  79. int i;
  80. if (fptr)
  81. {
  82. printf("\n\nWriting BPM debug data into file " DEBUGFILE_NAME "\n\n");
  83. for (i = minpos; i < maxpos; i ++)
  84. {
  85. fprintf(fptr, "%d\t%.1lf\t%f\n", i, coeff / (double)i, data[i]);
  86. }
  87. fclose(fptr);
  88. }
  89. }
  90. #else
  91. #define _SaveDebugData(a,b,c,d)
  92. #endif
  93. ////////////////////////////////////////////////////////////////////////////////
  94. BPMDetect::BPMDetect(int numChannels, int aSampleRate)
  95. {
  96. this->sampleRate = aSampleRate;
  97. this->channels = numChannels;
  98. decimateSum = 0;
  99. decimateCount = 0;
  100. envelopeAccu = 0;
  101. // Initialize RMS volume accumulator to RMS level of 1500 (out of 32768) that's
  102. // safe initial RMS signal level value for song data. This value is then adapted
  103. // to the actual level during processing.
  104. #ifdef SOUNDTOUCH_INTEGER_SAMPLES
  105. // integer samples
  106. RMSVolumeAccu = (1500 * 1500) / avgnorm;
  107. #else
  108. // float samples, scaled to range [-1..+1[
  109. RMSVolumeAccu = (0.045f * 0.045f) / avgnorm;
  110. #endif
  111. // choose decimation factor so that result is approx. 1000 Hz
  112. decimateBy = sampleRate / 1000;
  113. assert(decimateBy > 0);
  114. assert(INPUT_BLOCK_SAMPLES < decimateBy * DECIMATED_BLOCK_SAMPLES);
  115. // Calculate window length & starting item according to desired min & max bpms
  116. windowLen = (60 * sampleRate) / (decimateBy * MIN_BPM);
  117. windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM);
  118. assert(windowLen > windowStart);
  119. // allocate new working objects
  120. xcorr = new float[windowLen];
  121. memset(xcorr, 0, windowLen * sizeof(float));
  122. // allocate processing buffer
  123. buffer = new FIFOSampleBuffer();
  124. // we do processing in mono mode
  125. buffer->setChannels(1);
  126. buffer->clear();
  127. }
  128. BPMDetect::~BPMDetect()
  129. {
  130. delete[] xcorr;
  131. delete buffer;
  132. }
  133. /// convert to mono, low-pass filter & decimate to about 500 Hz.
  134. /// return number of outputted samples.
  135. ///
  136. /// Decimation is used to remove the unnecessary frequencies and thus to reduce
  137. /// the amount of data needed to be processed as calculating autocorrelation
  138. /// function is a very-very heavy operation.
  139. ///
  140. /// Anti-alias filtering is done simply by averaging the samples. This is really a
  141. /// poor-man's anti-alias filtering, but it's not so critical in this kind of application
  142. /// (it'd also be difficult to design a high-quality filter with steep cut-off at very
  143. /// narrow band)
  144. int BPMDetect::decimate(SAMPLETYPE *dest, const SAMPLETYPE *src, int numsamples)
  145. {
  146. int count, outcount;
  147. LONG_SAMPLETYPE out;
  148. assert(channels > 0);
  149. assert(decimateBy > 0);
  150. outcount = 0;
  151. for (count = 0; count < numsamples; count ++)
  152. {
  153. int j;
  154. // convert to mono and accumulate
  155. for (j = 0; j < channels; j ++)
  156. {
  157. decimateSum += src[j];
  158. }
  159. src += j;
  160. decimateCount ++;
  161. if (decimateCount >= decimateBy)
  162. {
  163. // Store every Nth sample only
  164. out = (LONG_SAMPLETYPE)(decimateSum / (decimateBy * channels));
  165. decimateSum = 0;
  166. decimateCount = 0;
  167. #ifdef SOUNDTOUCH_INTEGER_SAMPLES
  168. // check ranges for sure (shouldn't actually be necessary)
  169. if (out > 32767)
  170. {
  171. out = 32767;
  172. }
  173. else if (out < -32768)
  174. {
  175. out = -32768;
  176. }
  177. #endif // SOUNDTOUCH_INTEGER_SAMPLES
  178. dest[outcount] = (SAMPLETYPE)out;
  179. outcount ++;
  180. }
  181. }
  182. return outcount;
  183. }
  184. // Calculates autocorrelation function of the sample history buffer
  185. void BPMDetect::updateXCorr(int process_samples)
  186. {
  187. int offs;
  188. SAMPLETYPE *pBuffer;
  189. assert(buffer->numSamples() >= (uint)(process_samples + windowLen));
  190. pBuffer = buffer->ptrBegin();
  191. for (offs = windowStart; offs < windowLen; offs ++)
  192. {
  193. LONG_SAMPLETYPE sum;
  194. int i;
  195. sum = 0;
  196. for (i = 0; i < process_samples; i ++)
  197. {
  198. sum += pBuffer[i] * pBuffer[i + offs]; // scaling the sub-result shouldn't be necessary
  199. }
  200. // xcorr[offs] *= xcorr_decay; // decay 'xcorr' here with suitable coefficients
  201. // if it's desired that the system adapts automatically to
  202. // various bpms, e.g. in processing continouos music stream.
  203. // The 'xcorr_decay' should be a value that's smaller than but
  204. // close to one, and should also depend on 'process_samples' value.
  205. xcorr[offs] += (float)sum;
  206. }
  207. }
  208. // Calculates envelope of the sample data
  209. void BPMDetect::calcEnvelope(SAMPLETYPE *samples, int numsamples)
  210. {
  211. const static double decay = 0.7f; // decay constant for smoothing the envelope
  212. const static double norm = (1 - decay);
  213. int i;
  214. LONG_SAMPLETYPE out;
  215. double val;
  216. for (i = 0; i < numsamples; i ++)
  217. {
  218. // calc average RMS volume
  219. RMSVolumeAccu *= avgdecay;
  220. val = (float)fabs((float)samples[i]);
  221. RMSVolumeAccu += val * val;
  222. // cut amplitudes that are below cutoff ~2 times RMS volume
  223. // (we're interested in peak values, not the silent moments)
  224. if (val < 0.5 * sqrt(RMSVolumeAccu * avgnorm))
  225. {
  226. val = 0;
  227. }
  228. // smooth amplitude envelope
  229. envelopeAccu *= decay;
  230. envelopeAccu += val;
  231. out = (LONG_SAMPLETYPE)(envelopeAccu * norm);
  232. #ifdef SOUNDTOUCH_INTEGER_SAMPLES
  233. // cut peaks (shouldn't be necessary though)
  234. if (out > 32767) out = 32767;
  235. #endif // SOUNDTOUCH_INTEGER_SAMPLES
  236. samples[i] = (SAMPLETYPE)out;
  237. }
  238. }
  239. void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples)
  240. {
  241. SAMPLETYPE decimated[DECIMATED_BLOCK_SAMPLES];
  242. // iterate so that max INPUT_BLOCK_SAMPLES processed per iteration
  243. while (numSamples > 0)
  244. {
  245. int block;
  246. int decSamples;
  247. block = (numSamples > INPUT_BLOCK_SAMPLES) ? INPUT_BLOCK_SAMPLES : numSamples;
  248. // decimate. note that converts to mono at the same time
  249. decSamples = decimate(decimated, samples, block);
  250. samples += block * channels;
  251. numSamples -= block;
  252. // envelope new samples and add them to buffer
  253. calcEnvelope(decimated, decSamples);
  254. buffer->putSamples(decimated, decSamples);
  255. }
  256. // when the buffer has enought samples for processing...
  257. if ((int)buffer->numSamples() > windowLen)
  258. {
  259. int processLength;
  260. // how many samples are processed
  261. processLength = (int)buffer->numSamples() - windowLen;
  262. // ... calculate autocorrelations for oldest samples...
  263. updateXCorr(processLength);
  264. // ... and remove them from the buffer
  265. buffer->receiveSamples(processLength);
  266. }
  267. }
  268. void BPMDetect::removeBias()
  269. {
  270. int i;
  271. float minval = 1e12f; // arbitrary large number
  272. for (i = windowStart; i < windowLen; i ++)
  273. {
  274. if (xcorr[i] < minval)
  275. {
  276. minval = xcorr[i];
  277. }
  278. }
  279. for (i = windowStart; i < windowLen; i ++)
  280. {
  281. xcorr[i] -= minval;
  282. }
  283. }
  284. float BPMDetect::getBpm()
  285. {
  286. double peakPos;
  287. double coeff;
  288. PeakFinder peakFinder;
  289. coeff = 60.0 * ((double)sampleRate / (double)decimateBy);
  290. // save bpm debug analysis data if debug data enabled
  291. _SaveDebugData(xcorr, windowStart, windowLen, coeff);
  292. // remove bias from xcorr data
  293. removeBias();
  294. // find peak position
  295. peakPos = peakFinder.detectPeak(xcorr, windowStart, windowLen);
  296. assert(decimateBy != 0);
  297. if (peakPos < 1e-9) return 0.0; // detection failed.
  298. // calculate BPM
  299. return (float) (coeff / peakPos);
  300. }