| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370 |
- ////////////////////////////////////////////////////////////////////////////////
- ///
- /// Beats-per-minute (BPM) detection routine.
- ///
- /// The beat detection algorithm works as follows:
- /// - Use function 'inputSamples' to input a chunks of samples to the class for
- /// analysis. It's a good idea to enter a large sound file or stream in smallish
- /// chunks of around few kilosamples in order not to extinguish too much RAM memory.
- /// - Inputted sound data is decimated to approx 500 Hz to reduce calculation burden,
- /// which is basically ok as low (bass) frequencies mostly determine the beat rate.
- /// Simple averaging is used for anti-alias filtering because the resulting signal
- /// quality isn't of that high importance.
- /// - Decimated sound data is enveloped, i.e. the amplitude shape is detected by
- /// taking absolute value that's smoothed by sliding average. Signal levels that
- /// are below a couple of times the general RMS amplitude level are cut away to
- /// leave only notable peaks there.
- /// - Repeating sound patterns (e.g. beats) are detected by calculating short-term
- /// autocorrelation function of the enveloped signal.
- /// - After whole sound data file has been analyzed as above, the bpm level is
- /// detected by function 'getBpm' that finds the highest peak of the autocorrelation
- /// function, calculates it's precise location and converts this reading to bpm's.
- ///
- /// Author : Copyright (c) Olli Parviainen
- /// Author e-mail : oparviai 'at' iki.fi
- /// SoundTouch WWW: http://www.surina.net/soundtouch
- ///
- ////////////////////////////////////////////////////////////////////////////////
- //
- // Last changed : $Date: 2012-08-30 22:45:25 +0300 (Thu, 30 Aug 2012) $
- // File revision : $Revision: 4 $
- //
- // $Id: BPMDetect.cpp 149 2012-08-30 19:45:25Z oparviai $
- //
- ////////////////////////////////////////////////////////////////////////////////
- //
- // License :
- //
- // SoundTouch audio processing library
- // Copyright (c) Olli Parviainen
- //
- // This library is free software; you can redistribute it and/or
- // modify it under the terms of the GNU Lesser General Public
- // License as published by the Free Software Foundation; either
- // version 2.1 of the License, or (at your option) any later version.
- //
- // This library is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- // Lesser General Public License for more details.
- //
- // You should have received a copy of the GNU Lesser General Public
- // License along with this library; if not, write to the Free Software
- // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- //
- ////////////////////////////////////////////////////////////////////////////////
- #include <math.h>
- #include <assert.h>
- #include <string.h>
- #include <stdio.h>
- #include "FIFOSampleBuffer.h"
- #include "PeakFinder.h"
- #include "BPMDetect.h"
- using namespace soundtouch;
- #define INPUT_BLOCK_SAMPLES 2048
- #define DECIMATED_BLOCK_SAMPLES 256
- /// decay constant for calculating RMS volume sliding average approximation
- /// (time constant is about 10 sec)
- const float avgdecay = 0.99986f;
- /// Normalization coefficient for calculating RMS sliding average approximation.
- const float avgnorm = (1 - avgdecay);
- ////////////////////////////////////////////////////////////////////////////////
- // Enable following define to create bpm analysis file:
- // #define _CREATE_BPM_DEBUG_FILE
- #ifdef _CREATE_BPM_DEBUG_FILE
- #define DEBUGFILE_NAME "c:\\temp\\soundtouch-bpm-debug.txt"
- static void _SaveDebugData(const float *data, int minpos, int maxpos, double coeff)
- {
- FILE *fptr = fopen(DEBUGFILE_NAME, "wt");
- int i;
- if (fptr)
- {
- printf("\n\nWriting BPM debug data into file " DEBUGFILE_NAME "\n\n");
- for (i = minpos; i < maxpos; i ++)
- {
- fprintf(fptr, "%d\t%.1lf\t%f\n", i, coeff / (double)i, data[i]);
- }
- fclose(fptr);
- }
- }
- #else
- #define _SaveDebugData(a,b,c,d)
- #endif
- ////////////////////////////////////////////////////////////////////////////////
- BPMDetect::BPMDetect(int numChannels, int aSampleRate)
- {
- this->sampleRate = aSampleRate;
- this->channels = numChannels;
- decimateSum = 0;
- decimateCount = 0;
- envelopeAccu = 0;
- // Initialize RMS volume accumulator to RMS level of 1500 (out of 32768) that's
- // safe initial RMS signal level value for song data. This value is then adapted
- // to the actual level during processing.
- #ifdef SOUNDTOUCH_INTEGER_SAMPLES
- // integer samples
- RMSVolumeAccu = (1500 * 1500) / avgnorm;
- #else
- // float samples, scaled to range [-1..+1[
- RMSVolumeAccu = (0.045f * 0.045f) / avgnorm;
- #endif
- // choose decimation factor so that result is approx. 1000 Hz
- decimateBy = sampleRate / 1000;
- assert(decimateBy > 0);
- assert(INPUT_BLOCK_SAMPLES < decimateBy * DECIMATED_BLOCK_SAMPLES);
- // Calculate window length & starting item according to desired min & max bpms
- windowLen = (60 * sampleRate) / (decimateBy * MIN_BPM);
- windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM);
- assert(windowLen > windowStart);
- // allocate new working objects
- xcorr = new float[windowLen];
- memset(xcorr, 0, windowLen * sizeof(float));
- // allocate processing buffer
- buffer = new FIFOSampleBuffer();
- // we do processing in mono mode
- buffer->setChannels(1);
- buffer->clear();
- }
- BPMDetect::~BPMDetect()
- {
- delete[] xcorr;
- delete buffer;
- }
- /// convert to mono, low-pass filter & decimate to about 500 Hz.
- /// return number of outputted samples.
- ///
- /// Decimation is used to remove the unnecessary frequencies and thus to reduce
- /// the amount of data needed to be processed as calculating autocorrelation
- /// function is a very-very heavy operation.
- ///
- /// Anti-alias filtering is done simply by averaging the samples. This is really a
- /// poor-man's anti-alias filtering, but it's not so critical in this kind of application
- /// (it'd also be difficult to design a high-quality filter with steep cut-off at very
- /// narrow band)
- int BPMDetect::decimate(SAMPLETYPE *dest, const SAMPLETYPE *src, int numsamples)
- {
- int count, outcount;
- LONG_SAMPLETYPE out;
- assert(channels > 0);
- assert(decimateBy > 0);
- outcount = 0;
- for (count = 0; count < numsamples; count ++)
- {
- int j;
- // convert to mono and accumulate
- for (j = 0; j < channels; j ++)
- {
- decimateSum += src[j];
- }
- src += j;
- decimateCount ++;
- if (decimateCount >= decimateBy)
- {
- // Store every Nth sample only
- out = (LONG_SAMPLETYPE)(decimateSum / (decimateBy * channels));
- decimateSum = 0;
- decimateCount = 0;
- #ifdef SOUNDTOUCH_INTEGER_SAMPLES
- // check ranges for sure (shouldn't actually be necessary)
- if (out > 32767)
- {
- out = 32767;
- }
- else if (out < -32768)
- {
- out = -32768;
- }
- #endif // SOUNDTOUCH_INTEGER_SAMPLES
- dest[outcount] = (SAMPLETYPE)out;
- outcount ++;
- }
- }
- return outcount;
- }
- // Calculates autocorrelation function of the sample history buffer
- void BPMDetect::updateXCorr(int process_samples)
- {
- int offs;
- SAMPLETYPE *pBuffer;
-
- assert(buffer->numSamples() >= (uint)(process_samples + windowLen));
- pBuffer = buffer->ptrBegin();
- for (offs = windowStart; offs < windowLen; offs ++)
- {
- LONG_SAMPLETYPE sum;
- int i;
- sum = 0;
- for (i = 0; i < process_samples; i ++)
- {
- sum += pBuffer[i] * pBuffer[i + offs]; // scaling the sub-result shouldn't be necessary
- }
- // xcorr[offs] *= xcorr_decay; // decay 'xcorr' here with suitable coefficients
- // if it's desired that the system adapts automatically to
- // various bpms, e.g. in processing continouos music stream.
- // The 'xcorr_decay' should be a value that's smaller than but
- // close to one, and should also depend on 'process_samples' value.
- xcorr[offs] += (float)sum;
- }
- }
- // Calculates envelope of the sample data
- void BPMDetect::calcEnvelope(SAMPLETYPE *samples, int numsamples)
- {
- const static double decay = 0.7f; // decay constant for smoothing the envelope
- const static double norm = (1 - decay);
- int i;
- LONG_SAMPLETYPE out;
- double val;
- for (i = 0; i < numsamples; i ++)
- {
- // calc average RMS volume
- RMSVolumeAccu *= avgdecay;
- val = (float)fabs((float)samples[i]);
- RMSVolumeAccu += val * val;
- // cut amplitudes that are below cutoff ~2 times RMS volume
- // (we're interested in peak values, not the silent moments)
- if (val < 0.5 * sqrt(RMSVolumeAccu * avgnorm))
- {
- val = 0;
- }
- // smooth amplitude envelope
- envelopeAccu *= decay;
- envelopeAccu += val;
- out = (LONG_SAMPLETYPE)(envelopeAccu * norm);
- #ifdef SOUNDTOUCH_INTEGER_SAMPLES
- // cut peaks (shouldn't be necessary though)
- if (out > 32767) out = 32767;
- #endif // SOUNDTOUCH_INTEGER_SAMPLES
- samples[i] = (SAMPLETYPE)out;
- }
- }
- void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples)
- {
- SAMPLETYPE decimated[DECIMATED_BLOCK_SAMPLES];
- // iterate so that max INPUT_BLOCK_SAMPLES processed per iteration
- while (numSamples > 0)
- {
- int block;
- int decSamples;
- block = (numSamples > INPUT_BLOCK_SAMPLES) ? INPUT_BLOCK_SAMPLES : numSamples;
- // decimate. note that converts to mono at the same time
- decSamples = decimate(decimated, samples, block);
- samples += block * channels;
- numSamples -= block;
- // envelope new samples and add them to buffer
- calcEnvelope(decimated, decSamples);
- buffer->putSamples(decimated, decSamples);
- }
- // when the buffer has enought samples for processing...
- if ((int)buffer->numSamples() > windowLen)
- {
- int processLength;
- // how many samples are processed
- processLength = (int)buffer->numSamples() - windowLen;
- // ... calculate autocorrelations for oldest samples...
- updateXCorr(processLength);
- // ... and remove them from the buffer
- buffer->receiveSamples(processLength);
- }
- }
- void BPMDetect::removeBias()
- {
- int i;
- float minval = 1e12f; // arbitrary large number
- for (i = windowStart; i < windowLen; i ++)
- {
- if (xcorr[i] < minval)
- {
- minval = xcorr[i];
- }
- }
- for (i = windowStart; i < windowLen; i ++)
- {
- xcorr[i] -= minval;
- }
- }
- float BPMDetect::getBpm()
- {
- double peakPos;
- double coeff;
- PeakFinder peakFinder;
- coeff = 60.0 * ((double)sampleRate / (double)decimateBy);
- // save bpm debug analysis data if debug data enabled
- _SaveDebugData(xcorr, windowStart, windowLen, coeff);
- // remove bias from xcorr data
- removeBias();
- // find peak position
- peakPos = peakFinder.detectPeak(xcorr, windowStart, windowLen);
- assert(decimateBy != 0);
- if (peakPos < 1e-9) return 0.0; // detection failed.
- // calculate BPM
- return (float) (coeff / peakPos);
- }
|