Advertisement
Guest User

Untitled

a guest
Feb 26th, 2020
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 15.44 KB | None | 0 0
  1. std::vector<time_value_float> Analysis::get_segments(){
  2.  
  3. bool FILEPRINT = false;
  4. auto start = std::chrono::system_clock::now();
  5.  
  6. std::vector<time_value_float> segments;
  7.  
  8. // BIN AND HOPSIZE BASED ON MEINARD MÜLLERS RECOMMENDATION IN "FUNDAMENTALS OF USIC PROCESSING"
  9. int bin_size = 2205; // 0.05 seconds, 20Hz
  10. int hop_size = 2205; // no overlap
  11.  
  12. Gist<float> gist2(bin_size, samplerate);
  13. std::vector<std::vector<float>> window;
  14. std::vector<std::vector<float>> ssm;
  15. std::vector<float> bin;
  16. bin.reserve(bin_size);
  17. std::vector<float> coeffs;
  18.  
  19. // filter kernel variables
  20. int L = 75;
  21. int N = (L*2)+1;
  22.  
  23. // ##############################
  24. // ###### 1. GET AUDIO BIN ######
  25. // ##############################
  26.  
  27. auto start_audiobin = std::chrono::system_clock::now();
  28.  
  29. for (int i = 0; i < (signal_length_mono - bin_size); i += hop_size){
  30.  
  31. for(int j = 0; j < bin_size; j++){
  32. int index = i + j;
  33. //std::cout << "index " << index << ": " << wav_values_mono[index] << std::endl;
  34. bin.push_back(wav_values_mono[index]);
  35. }
  36. gist2.processAudioFrame(bin);
  37.  
  38. // ########################################
  39. // ###### 2. GET THE FEATURE VECTORS ######
  40. // ###### AND PUT THEM INTO WINDOW ########
  41. // ########################################
  42.  
  43. // USE MAGNITUDE SPECTRUM
  44. //coeffs = gist2.getMagnitudeSpectrum();
  45.  
  46. // USE MFCC
  47. coeffs = gist2.getMelFrequencyCepstralCoefficients();
  48.  
  49. window.push_back(coeffs);
  50. coeffs.clear();
  51. bin.clear();
  52. }
  53.  
  54. auto end_audiobin = std::chrono::system_clock::now();
  55.  
  56.  
  57. // MAYBE WE WILL NEED THIS LATER
  58. // HOW TO GET THE TIMES
  59. // time of window[n] = (((n * hop_size) + (bin_size / 2)) / sample_rate)
  60.  
  61. // #################################
  62. // ###### 3. COMPUTE DISTANCE ######
  63. // #################################
  64.  
  65. // THIS HAS TO BE OPTIMIZED!!!
  66. // ONE HALF OF MATRIX DOESNT NEED TO BE CALCULATED BECAUSE [m][n] = [n][m]
  67. // EVERYTHING ABOVE OR BELOW L DOESN'T NEED TO BE CALCULATED
  68. // ZERO PADDING MUST BE APPLIED!
  69.  
  70. std::vector<float> similarity;
  71.  
  72. float ssm_max = 0;
  73. float ssm_min = 0;
  74. float ssm_count = 0;
  75. float ssm_middle = 0;
  76. float ssm_sum = 0;
  77.  
  78. float distance = 0;
  79.  
  80. auto start_distance = std::chrono::system_clock::now();
  81.  
  82. for (int m = 0; m < window.size(); m++){
  83. similarity.clear();
  84. for (int n = 0; n < window.size(); n++){
  85.  
  86. if (n <= m+L && n >= m-L) {
  87.  
  88. // BEFORE CALCULATING DISTANCE, CHECK IF THERE IS A MIRRORED VALUE BECAUSE [m][n] = [n][m]
  89. /*
  90. if (ssm[n].size() > m && ssm[m].size() > n) {
  91. ssm[m][n] = ssm[n][m];
  92. continue;
  93. }
  94. else {
  95. }
  96. */
  97.  
  98. // #################################################################
  99. // ###################### 3.1 COSINE DISTANCE ######################
  100. // #### d_cos = m*n / |m|*|n| = m*n / sqrt(m*m) * sqrt(n*n) ####
  101. // #################################################################
  102.  
  103. float p_mn = 0;
  104. for (int j = 0; j < window[m].size(); j++) {
  105. p_mn += window[m][j] * window[n][j];
  106. }
  107. float p_mm = 0;
  108. for (int j = 0; j < window[m].size(); j++) {
  109. p_mm += window[m][j] * window[m][j];
  110. }
  111. float p_nn = 0;
  112. for (int j = 0; j < window[n].size(); j++) {
  113. p_nn += window[n][j] * window[n][j];
  114. }
  115. float cosine_distance = p_mn / (sqrt(p_mm) * sqrt(p_nn));
  116.  
  117.  
  118. // USE COSINE DISTANCE
  119. distance = cosine_distance;
  120. }
  121. else {
  122. distance = 0.0;
  123. }
  124.  
  125. // USE OTHER DISTANCE
  126. //float distance = other_distance;
  127.  
  128. similarity.push_back(distance);
  129.  
  130. // SUM UP DISTANCES
  131. ssm_sum += distance;
  132.  
  133. // COUNT DISTANCES
  134. ssm_count += 1;
  135.  
  136. // GET MIN AND MAX DISTANCE
  137. if (distance < ssm_min){
  138. ssm_min = distance;
  139. }
  140. else if (distance > ssm_max){
  141. ssm_max = distance;
  142. }
  143. else {}
  144.  
  145. }
  146.  
  147. // GET MIDDLE OF DISTANCES
  148. ssm_middle = ssm_sum / ssm_count;
  149.  
  150. // ####################
  151. // ## 4. MAKE MATRIX ##
  152. // ####################
  153.  
  154. ssm.push_back(similarity);
  155.  
  156.  
  157. }
  158.  
  159.  
  160. auto end_distance = std::chrono::system_clock::now();
  161.  
  162.  
  163. // PRINT MIN, MAX AND MIDDLE
  164. std::cout << "SSM_MIN: " << ssm_min << " SSM_MAX: " << ssm_max << std::endl;
  165. std::cout << "SSM_MIDDLE: " << ssm_middle << std::endl;
  166.  
  167. // ###############################################
  168. // ## 4.1 PUT MATRIX IN A CSV FILE FOR PLOTTING ##
  169. // ###############################################
  170. auto start_file_ssm = std::chrono::system_clock::now();
  171.  
  172. if (FILEPRINT == true) {
  173. FILE *fp_ssm = std::fopen("/Users/stevendrewers/CLionProjects/Sound-to-Light-2.0/CSV/ssm.csv", "w");
  174.  
  175. for (int m = 0; m < ssm.size(); m++) {
  176. for (int n = 0; n < ssm.size(); n++) {
  177. fprintf(fp_ssm, "%f,", ssm[m][n]);
  178. }
  179. fprintf(fp_ssm, "\n");
  180. }
  181.  
  182. fclose(fp_ssm);
  183.  
  184. }
  185. auto end_file_ssm = std::chrono::system_clock::now();
  186.  
  187.  
  188.  
  189. // ###################################
  190. // ## 5. NOVELTY BASED SEGMENTATION ##
  191. // ###################################
  192.  
  193. // ####################################################
  194. // ## 5.1 CREATE RADIAL GAUSSIAN CHECKERBOARD KERNEL ##
  195. // ####################################################
  196.  
  197. std::vector<std::vector<float>> cb_kernel;
  198. std::vector<float> cb_kernel_line;
  199. float cb_value;
  200. std::vector<std::vector<float>> gaussian_kernel;
  201. std::vector<float> gaussian_kernel_line;
  202. float gaussian_value;
  203. float epsilon = 0.5;
  204. std::vector<std::vector<float>> kernel;
  205. std::vector<float> kernel_line;
  206. float value;
  207.  
  208. auto start_kernel = std::chrono::system_clock::now();
  209.  
  210. // CREATE CHECKERBOARD KERNEL
  211. for (int m = 1; m <= N; m++){
  212. for (int n = 1; n <= N; n++){
  213. if ((m <= L && n <= L) || (m >= (L+2) && n >= (L+2))){
  214. cb_value = 1;
  215. }
  216. else if (m == (L+1) || n == (L+1)) {
  217. cb_value = 0;
  218. }
  219. else {
  220. cb_value = -1;
  221. }
  222. cb_kernel_line.push_back(cb_value);
  223. }
  224. cb_kernel.push_back(cb_kernel_line);
  225. cb_kernel_line.clear();
  226. }
  227.  
  228. if (FILEPRINT == true) {
  229. FILE *fp_cb_kernel = std::fopen("/Users/stevendrewers/CLionProjects/Sound-to-Light-2.0/CSV/cb_kernel.csv", "w");
  230.  
  231. for (int m = 0; m < cb_kernel.size(); m++) {
  232. for (int n = 0; n < cb_kernel.size(); n++) {
  233. fprintf(fp_cb_kernel, "%f,", cb_kernel[m][n]);
  234. }
  235. fprintf(fp_cb_kernel, "\n");
  236. }
  237.  
  238.  
  239. fclose(fp_cb_kernel);
  240. }
  241.  
  242. // CREATE RADIAL GAUSSIAN KERNEL
  243. float increment = 2.0/N;
  244. for (float s = (-1.0 + (increment/2)); s < 1.0; s+=increment){
  245. for (float t = (-1.0 + (increment/2)); t < 1.0; t+=increment){
  246. gaussian_value = exp(-1 * pow(epsilon, 2) * ( pow(s, 2) + pow(t, 2) ));
  247. gaussian_kernel_line.push_back(gaussian_value);
  248. }
  249. gaussian_kernel.push_back(gaussian_kernel_line);
  250. gaussian_kernel_line.clear();
  251. }
  252. if (FILEPRINT == true) {
  253. FILE *fp_gaussian_kernel = std::fopen(
  254. "/Users/stevendrewers/CLionProjects/Sound-to-Light-2.0/CSV/gaussian_kernel.csv", "w");
  255.  
  256. for (int m = 0; m < gaussian_kernel.size(); m++) {
  257. for (int n = 0; n < gaussian_kernel.size(); n++) {
  258. fprintf(fp_gaussian_kernel, "%f,", gaussian_kernel[m][n]);
  259. }
  260. fprintf(fp_gaussian_kernel, "\n");
  261. }
  262.  
  263. fclose(fp_gaussian_kernel);
  264. }
  265.  
  266. // ADD THEM UP ELEMENTWISE!
  267. for (int m = 0; m < N; m++){
  268. for (int n = 0; n < N; n++){
  269. value = cb_kernel[m][n] * gaussian_kernel[m][n];
  270. kernel_line.push_back(value);
  271. }
  272. kernel.push_back(kernel_line);
  273. kernel_line.clear();
  274. }
  275. if (FILEPRINT == true) {
  276. FILE *fp_kernel = std::fopen("/Users/stevendrewers/CLionProjects/Sound-to-Light-2.0/CSV/kernel.csv", "w");
  277.  
  278. for (int m = 0; m < kernel.size(); m++) {
  279. for (int n = 0; n < kernel.size(); n++) {
  280. fprintf(fp_kernel, "%f,", kernel[m][n]);
  281. }
  282. fprintf(fp_kernel, "\n");
  283. }
  284.  
  285. fclose(fp_kernel);
  286. }
  287.  
  288. auto end_kernel = std::chrono::system_clock::now();
  289.  
  290. // 1 1 0 -1 -1
  291. // 1 1 0 -1 -1
  292. // 0 0 0 0 0
  293. // -1 -1 0 1 1
  294. // -1 -1 0 1 1
  295.  
  296.  
  297.  
  298.  
  299.  
  300. // ####################################
  301. // ## 5.1 KERNEL MIT SSM VERHEIRATEN ##
  302. // ####################################
  303.  
  304. // LAUT BUCH: DELTA_KERNEL(n) = SUM(K(k,l)*S(n+k,n+l)
  305. // LAUT BUCH MÜSSTE DAS ALLES IRGENDWIE SO GEHEN...
  306. std::vector<time_value_float> novelty_function;
  307. float novelty_value = 0;
  308.  
  309. auto start_filter = std::chrono::system_clock::now();
  310.  
  311.  
  312. for (int n = 0; n < (ssm.size()); n++){
  313. novelty_value = 0;
  314. // [n,n] müsste der punkt sein, um den herum wir uns alles ansehen.... also auf der diagonalen liegen
  315. for (int k = 0; k < N; k++){
  316. // dann über k....
  317. for (int l = 0; l < N; l++){
  318. // dann checken wie groß n gerade so ist, evtl. müssen wir gar nicht rechnen, weil zero padding
  319.  
  320. if ((n-L) < 0 || ((n-L)+k) >= ssm.size() || ((n-L)+l) >= ssm.size()){
  321. novelty_value += 0;
  322. } else {
  323. float k_val = kernel[k][l];
  324. float ssm_val = ssm[(n-L)+k][(n-L)+l];
  325. novelty_value += (k_val * ssm_val);
  326. }
  327.  
  328. }
  329. }
  330. float novelty_time = n*0.05;
  331. novelty_function.push_back({novelty_time, novelty_value});
  332. }
  333.  
  334. auto end_filter = std::chrono::system_clock::now();
  335.  
  336. auto start_file_novelty = std::chrono::system_clock::now();
  337. auto end_file_novelty = std::chrono::system_clock::now();
  338.  
  339. float novelty_middle = 0;
  340. for (int i = 0; i < novelty_function.size(); i++){
  341. novelty_middle += novelty_function[i].value;
  342. }
  343. novelty_middle /= novelty_function.size();
  344. novelty_middle *= 1.1;
  345. std::cout << "NOVELTY MIDDLE: " << novelty_middle << std::endl;
  346.  
  347. std::cout << "--- SEGMENT CHANGES ---" << std::endl;
  348. for (int i = 1; i < novelty_function.size()-1; i++){
  349. if (novelty_function[i].value >= novelty_middle
  350. && novelty_function[i-1].value < novelty_function[i].value
  351. && novelty_function[i+1].value < novelty_function[i].value ){
  352. segments.push_back({novelty_function[i].time, novelty_function[i].value});
  353. std::cout << "time: " << novelty_function[i].time << std::endl;
  354. }
  355. }
  356.  
  357. bool multiple_peaks = false;
  358. std::vector<int> indexes_of_multiple_peaks;
  359. for (int i = 0; i < segments.size()-1; i++) {
  360.  
  361. if((abs(segments[i+1].time - segments[i].time)) < 2.0 && multiple_peaks) {
  362. indexes_of_multiple_peaks.push_back(i);
  363. }
  364.  
  365. if ((abs(segments[i+1].time - segments[i].time)) < 2.0 && !multiple_peaks) {
  366. multiple_peaks = true;
  367. indexes_of_multiple_peaks.push_back(i);
  368. }
  369.  
  370. if(((abs(segments[i+1].time - segments[i].time)) >= 2.0 || i == segments.size() - 2) && multiple_peaks) {
  371. multiple_peaks = false;
  372. float max_value_of_multiple_peaks = 0;
  373. int index_of_max_value_of_multiple_peaks = 0;
  374. // find out which index has the highest value
  375. for(int i = 0; i < indexes_of_multiple_peaks.size(); i++) {
  376. if(segments[indexes_of_multiple_peaks[i]].value >= max_value_of_multiple_peaks) {
  377. max_value_of_multiple_peaks = segments[indexes_of_multiple_peaks[i]].value;
  378. index_of_max_value_of_multiple_peaks = indexes_of_multiple_peaks[i];
  379. }
  380. }
  381. // remove the index of the value we want to keep
  382. indexes_of_multiple_peaks.erase(std::remove(indexes_of_multiple_peaks.begin(), indexes_of_multiple_peaks.end(), max_value_of_multiple_peaks), indexes_of_multiple_peaks.end());
  383.  
  384. for(int j = indexes_of_multiple_peaks.size() - 1; j >= 0; j--) {
  385. std::cout << "to be erased: " << segments[indexes_of_multiple_peaks[j]].time << std::endl;
  386. for(int k = 0; k < segments.size(); k++) {
  387. if(indexes_of_multiple_peaks[j] == k) {
  388. segments.erase(segments.begin() + k);
  389. }
  390. }
  391. }
  392. i = 0;
  393. indexes_of_multiple_peaks.clear();
  394. }
  395. }
  396.  
  397. std::cout << "--- SEGMENTS JOHANNES STYLE ---" << std::endl;
  398. for (auto tvf:segments){
  399. std::cout << "time: " << tvf.time << std::endl;
  400. }
  401.  
  402.  
  403.  
  404.  
  405. if (FILEPRINT == true) {
  406. FILE *fp_novelty = std::fopen("/Users/stevendrewers/CLionProjects/Sound-to-Light-2.0/CSV/novelty.csv", "w");
  407.  
  408. for (int i = 0; i < novelty_function.size(); i++) {
  409. fprintf(fp_novelty, "%f, %f\n", novelty_function[i].time, novelty_function[i].value);
  410. }
  411. fclose(fp_novelty);
  412. }
  413.  
  414.  
  415. // #########################
  416. // ## 6. CLUSTER SEGMENTS ##
  417. // #########################
  418.  
  419. // PRINT COMPUTATION TIME
  420. auto end = std::chrono::system_clock::now();
  421. std::chrono::duration<double> elapsed_seconds = end-start;
  422. std::chrono::duration<double> elapsed_seconds_audiobin = end_audiobin-start_audiobin;
  423. std::chrono::duration<double> elapsed_seconds_distance = end_distance-start_distance;
  424. std::chrono::duration<double> elapsed_seconds_file_ssm = end_file_ssm-start_file_ssm;
  425. std::chrono::duration<double> elapsed_seconds_kernel = end_kernel-start_kernel;
  426. std::chrono::duration<double> elapsed_seconds_filter = end_filter-start_filter;
  427. std::chrono::duration<double> elapsed_seconds_file_novelty = end_file_novelty-start_file_novelty;
  428.  
  429.  
  430. std::time_t end_time = std::chrono::system_clock::to_time_t(end);
  431.  
  432. std::cout << "finished segmentation at " << std::ctime(&end_time)
  433. << "elapsed time: " << elapsed_seconds.count() << "s\n"
  434. << "elapsed_seconds_audiobin: " << elapsed_seconds_audiobin.count() << "s\n"
  435. << "elapsed_seconds_distance: " << elapsed_seconds_distance.count() << "s\n"
  436. << "elapsed_seconds_file_ssm: " << elapsed_seconds_file_ssm.count() << "s\n"
  437. << "elapsed_seconds_kernel: " << elapsed_seconds_kernel.count() << "s\n"
  438. << "elapsed_seconds_filter: " << elapsed_seconds_filter.count() << "s\n"
  439. << "elapsed_seconds_file_novelty: " << elapsed_seconds_file_novelty.count() << "s\n"
  440. ;
  441.  
  442.  
  443. // RETURN SEGMENTS
  444.  
  445. return segments;
  446. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement