Advertisement
Guest User

Untitled

a guest
Feb 27th, 2015
241
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.42 KB | None | 0 0
  1. #include <stdexcept>
  2. #include "identify.hpp"
  3. #include <cassert>
  4. #include <cmath>
  5. #include <algorithm>
  6. #include <iostream>
  7. #include <set>
  8. #include "audioio.h"
  9. #include "mkl.h"
  10. AudioIdentify::AudioIdentify(unsigned int windowsize,\
  11. unsigned int overlap,\
  12. unsigned int neighborhood_size,\
  13. unsigned int fan_value,\
  14. float threshold)
  15. {
  16. this->windowsize = windowsize;
  17. this->overlap = overlap;
  18. this->neighborhood_size = neighborhood_size;
  19. this->fan_value = fan_value;
  20. this->threshold = threshold;
  21. }
  22.  
  23. MKL_LONG computefft(float* input, float* output, int size)
  24. {
  25. MKL_LONG status;
  26. DFTI_DESCRIPTOR_HANDLE handle;
  27. status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, size);
  28. status = DftiSetValue(handle, DFTI_NUMBER_OF_TRANSFORMS, 1);
  29. status = DftiSetValue(handle, DFTI_INPUT_DISTANCE, size);
  30. status = DftiSetValue(handle, DFTI_OUTPUT_DISTANCE, 2*((size/2)+1));
  31. status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
  32. status = DftiCommitDescriptor(handle);
  33. status = DftiComputeForward(handle, input, output);
  34. return status;
  35. }
  36. std::vector<std::pair<long, long>> \
  37. AudioIdentify::GetPeaks(const std::vector<std::vector<float>> &spectrogram)
  38. {
  39. std::vector<std::pair<long, long>> peaks;
  40. bool flag;
  41. long alow, blow, ahigh, bhigh;
  42. for(long i = 0; i < spectrogram.size(); i++)
  43. for(long j = 0; j < spectrogram[0].size(); j++)
  44. if(spectrogram[i][j] >=10)
  45. {
  46. alow = std::max(i - neighborhood_size/2, (long)0);
  47. blow = std::max(j - neighborhood_size/2, (long)0);
  48. ahigh = std::min(i + neighborhood_size/2, (long)spectrogram.size() - 1);
  49. bhigh = std::min(j + neighborhood_size/2, (long)spectrogram[0].size() - 1);
  50. flag = true;
  51. for(long p = alow; p <= ahigh; p++)
  52. for(long q = blow; q <= bhigh; q++)
  53. if(spectrogram[p][q] > spectrogram[i][j])
  54. flag = false;
  55.  
  56. if(flag)
  57. peaks.push_back(std::make_pair(i,j));
  58. }
  59.  
  60. return peaks;
  61. }
  62.  
  63. std::set<size_t> AudioIdentify::GetFingerprints\
  64. (const std::vector<std::pair<long, long>> &peaks)
  65. {
  66. std::string fingerprint;
  67. std::hash<std::string> str_hash;
  68. std::set<size_t> fingerprints;
  69. for(unsigned int i = 0; i < peaks.size(); i++)
  70. for(unsigned int j = i + 1; j <= fan_value + i && j < peaks.size(); j++)
  71. {
  72. fingerprint = std::to_string(peaks[i].second) + "|" + std::to_string(peaks[j].second) + "|" + std::to_string(peaks[j].first - peaks[i].first);
  73. fingerprints.insert(str_hash(fingerprint));
  74. }
  75. return fingerprints;
  76. }
  77.  
  78.  
  79. std::set<size_t> AudioIdentify::ProcessAudio(std::string filename)
  80. {
  81. short *data = NULL;
  82. long nframes = 0;
  83. int nchannels = 0;
  84. std::cout<<"Reading"<<filename<<std::endl;
  85.  
  86. int read_status = ReadWavFile(filename.c_str(), &data, &nframes, &nchannels);
  87.  
  88. if (read_status != 0)
  89. std::cerr<<"Could not read wav file, status = "<<read_status<<std::endl;
  90.  
  91. std::vector<float> sc_data(nframes);
  92.  
  93. for(long i = 0; i < nframes; i ++)
  94. sc_data[i] = data[i*nchannels];
  95.  
  96. free(data);
  97. data = NULL;
  98.  
  99. std::vector<float> output(2*(windowsize/2 + 1));
  100. std::vector<float> modoutput(windowsize/2 + 1);
  101. std::vector<std::vector<float>> spectrogram;
  102. for(long int i = 0; i < nframes - windowsize; i += windowsize - overlap)
  103. {
  104. MKL_LONG status = computefft(&sc_data[i], &output[0], windowsize);
  105.  
  106. for(unsigned int k = 0; k < output.size() - 2; k+=2)
  107. modoutput[k/2] = 10*log10((output[k]*output[k] + output[k+1]*output[k+1])*2/(windowsize*44100));
  108.  
  109. modoutput[0] = 10*log10((output[0]*output[0] + output[1]*output[1])*1/(windowsize*44100));
  110. modoutput[(output.size()-2)/2] = 10*log10((output[output.size()-2]*output[output.size()-2] + output[output.size() - 1]*output[output.size()-1])*1/(windowsize*44100));
  111.  
  112. spectrogram.push_back(modoutput);
  113. }
  114. std::vector<std::pair<long,long>> peaks = GetPeaks(spectrogram);
  115. return GetFingerprints(peaks);
  116. }
  117.  
  118. void AudioIdentify::AddToDatabase(std::string filename)
  119. {
  120. std::set<size_t> fingerprints = ProcessAudio(filename);
  121. database[filename] = fingerprints;
  122. }
  123.  
  124. std::pair<std::string, double> AudioIdentify::FindMatches(std::string filename)
  125. {
  126. std::set<size_t> fingerprints = ProcessAudio(filename);
  127. std::pair<std::string, double> bestmatch("",0);
  128.  
  129. for(auto &i : database)
  130. {
  131. unsigned int sum = 0;
  132. std::set<size_t> dbprint = i.second;
  133.  
  134. for(auto it = fingerprints.begin(); it != fingerprints.end(); ++it)
  135. {
  136. size_t searchvalue = *it;
  137. if(dbprint.count(searchvalue)!=0)
  138. sum++;
  139. }
  140.  
  141. if(double(sum)/fingerprints.size() > bestmatch.second)
  142. {
  143. bestmatch.first = i.first;
  144. bestmatch.second = double(sum)/fingerprints.size();
  145. }
  146.  
  147. }
  148.  
  149. if(bestmatch.second <= threshold)
  150. {
  151. bestmatch.first = "";
  152. bestmatch.second = 0;
  153. }
  154.  
  155. bestmatch.second *= 100;
  156. return bestmatch;
  157. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement