Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdexcept>
- #include "identify.hpp"
- #include <cassert>
- #include <cmath>
- #include <algorithm>
- #include <iostream>
- #include <set>
- #include "audioio.h"
- #include "mkl.h"
- AudioIdentify::AudioIdentify(unsigned int windowsize,\
- unsigned int overlap,\
- unsigned int neighborhood_size,\
- unsigned int fan_value,\
- float threshold)
- {
- this->windowsize = windowsize;
- this->overlap = overlap;
- this->neighborhood_size = neighborhood_size;
- this->fan_value = fan_value;
- this->threshold = threshold;
- }
- MKL_LONG computefft(float* input, float* output, int size)
- {
- MKL_LONG status;
- DFTI_DESCRIPTOR_HANDLE handle;
- status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, size);
- status = DftiSetValue(handle, DFTI_NUMBER_OF_TRANSFORMS, 1);
- status = DftiSetValue(handle, DFTI_INPUT_DISTANCE, size);
- status = DftiSetValue(handle, DFTI_OUTPUT_DISTANCE, 2*((size/2)+1));
- status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
- status = DftiCommitDescriptor(handle);
- status = DftiComputeForward(handle, input, output);
- return status;
- }
- std::vector<std::pair<long, long>> \
- AudioIdentify::GetPeaks(const std::vector<std::vector<float>> &spectrogram)
- {
- std::vector<std::pair<long, long>> peaks;
- bool flag;
- long alow, blow, ahigh, bhigh;
- for(long i = 0; i < spectrogram.size(); i++)
- for(long j = 0; j < spectrogram[0].size(); j++)
- if(spectrogram[i][j] >=10)
- {
- alow = std::max(i - neighborhood_size/2, (long)0);
- blow = std::max(j - neighborhood_size/2, (long)0);
- ahigh = std::min(i + neighborhood_size/2, (long)spectrogram.size() - 1);
- bhigh = std::min(j + neighborhood_size/2, (long)spectrogram[0].size() - 1);
- flag = true;
- for(long p = alow; p <= ahigh; p++)
- for(long q = blow; q <= bhigh; q++)
- if(spectrogram[p][q] > spectrogram[i][j])
- flag = false;
- if(flag)
- peaks.push_back(std::make_pair(i,j));
- }
- return peaks;
- }
- std::set<size_t> AudioIdentify::GetFingerprints\
- (const std::vector<std::pair<long, long>> &peaks)
- {
- std::string fingerprint;
- std::hash<std::string> str_hash;
- std::set<size_t> fingerprints;
- for(unsigned int i = 0; i < peaks.size(); i++)
- for(unsigned int j = i + 1; j <= fan_value + i && j < peaks.size(); j++)
- {
- fingerprint = std::to_string(peaks[i].second) + "|" + std::to_string(peaks[j].second) + "|" + std::to_string(peaks[j].first - peaks[i].first);
- fingerprints.insert(str_hash(fingerprint));
- }
- return fingerprints;
- }
- std::set<size_t> AudioIdentify::ProcessAudio(std::string filename)
- {
- short *data = NULL;
- long nframes = 0;
- int nchannels = 0;
- std::cout<<"Reading"<<filename<<std::endl;
- int read_status = ReadWavFile(filename.c_str(), &data, &nframes, &nchannels);
- if (read_status != 0)
- std::cerr<<"Could not read wav file, status = "<<read_status<<std::endl;
- std::vector<float> sc_data(nframes);
- for(long i = 0; i < nframes; i ++)
- sc_data[i] = data[i*nchannels];
- free(data);
- data = NULL;
- std::vector<float> output(2*(windowsize/2 + 1));
- std::vector<float> modoutput(windowsize/2 + 1);
- std::vector<std::vector<float>> spectrogram;
- for(long int i = 0; i < nframes - windowsize; i += windowsize - overlap)
- {
- MKL_LONG status = computefft(&sc_data[i], &output[0], windowsize);
- for(unsigned int k = 0; k < output.size() - 2; k+=2)
- modoutput[k/2] = 10*log10((output[k]*output[k] + output[k+1]*output[k+1])*2/(windowsize*44100));
- modoutput[0] = 10*log10((output[0]*output[0] + output[1]*output[1])*1/(windowsize*44100));
- modoutput[(output.size()-2)/2] = 10*log10((output[output.size()-2]*output[output.size()-2] + output[output.size() - 1]*output[output.size()-1])*1/(windowsize*44100));
- spectrogram.push_back(modoutput);
- }
- std::vector<std::pair<long,long>> peaks = GetPeaks(spectrogram);
- return GetFingerprints(peaks);
- }
- void AudioIdentify::AddToDatabase(std::string filename)
- {
- std::set<size_t> fingerprints = ProcessAudio(filename);
- database[filename] = fingerprints;
- }
- std::pair<std::string, double> AudioIdentify::FindMatches(std::string filename)
- {
- std::set<size_t> fingerprints = ProcessAudio(filename);
- std::pair<std::string, double> bestmatch("",0);
- for(auto &i : database)
- {
- unsigned int sum = 0;
- std::set<size_t> dbprint = i.second;
- for(auto it = fingerprints.begin(); it != fingerprints.end(); ++it)
- {
- size_t searchvalue = *it;
- if(dbprint.count(searchvalue)!=0)
- sum++;
- }
- if(double(sum)/fingerprints.size() > bestmatch.second)
- {
- bestmatch.first = i.first;
- bestmatch.second = double(sum)/fingerprints.size();
- }
- }
- if(bestmatch.second <= threshold)
- {
- bestmatch.first = "";
- bestmatch.second = 0;
- }
- bestmatch.second *= 100;
- return bestmatch;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement