Advertisement
Guest User

Untitled

a guest
Apr 18th, 2019
111
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.25 KB | None | 0 0
  1. import os
  2. import pickle
  3. from glob import iglob
  4. import numpy as np
  5. import librosa
  6.  
  7. DATA_AUDIO_DIR = './audio'
  8. TARGET_SR = 8000
  9. OUTPUT_DIR = './output'
  10. OUTPUT_DIR_TRAIN = os.path.join(OUTPUT_DIR, 'train')
  11. OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR, 'test')
  12. AUDIO_LENGTH = 10000
  13.  
  14. class_ids = {
  15. 'normal': 0,
  16. 'murmur': 1,
  17. 'extrahls': 2,
  18. 'artifact': 3,
  19. 'unlabelled': 4,
  20. }
  21.  
  22.  
  23. def extract_class_id(wav_filename):
  24. if 'normal' in wav_filename:
  25. return class_ids.get('normal')
  26. elif 'murmur' in wav_filename:
  27. return class_ids.get('murmur')
  28. elif 'extrahls' in wav_filename:
  29. return class_ids.get('extrahls')
  30. elif 'artifact' in wav_filename:
  31. return class_ids.get('artifact')
  32. elif 'unlabelled' in wav_filename:
  33. return class_ids.get('unlabelled')
  34. else:
  35. return class_ids.get('unlabelled')
  36.  
  37.  
  38. def read_audio_from_filename(filename, target_sr):
  39. audio, _ = librosa.load(filename, sr=target_sr, mono=True)
  40. audio = audio.reshape(-1, 1)
  41. return audio
  42.  
  43.  
  44. def convert_data():
  45. for i, wav_filename in enumerate(iglob(os.path.join(DATA_AUDIO_DIR, '**/**.wav'), recursive=True)):
  46. class_id = extract_class_id(wav_filename)
  47. audio_buf = read_audio_from_filename(wav_filename, target_sr=TARGET_SR)
  48. # normalize mean 0, variance 1
  49. audio_buf = (audio_buf - np.mean(audio_buf)) / np.std(audio_buf)
  50. original_length = len(audio_buf)
  51. print(i, wav_filename, original_length, np.round(np.mean(audio_buf), 4), np.std(audio_buf))
  52. if original_length < AUDIO_LENGTH:
  53. audio_buf = np.concatenate((audio_buf, np.zeros(shape=(AUDIO_LENGTH - original_length, 1))))
  54. print('PAD New length =', len(audio_buf))
  55. elif original_length > AUDIO_LENGTH:
  56. audio_buf = audio_buf[0:AUDIO_LENGTH]
  57. print('CUT New length =', len(audio_buf))
  58.  
  59. output_folder = OUTPUT_DIR_TRAIN
  60. if i // 50 == 0:
  61. output_folder = OUTPUT_DIR_TEST
  62.  
  63. output_filename = os.path.join(output_folder, str(i) + '.pkl')
  64.  
  65. out = {'class_id': class_id,
  66. 'audio': audio_buf,
  67. 'sr': TARGET_SR}
  68. with open(output_filename, 'wb') as w:
  69. pickle.dump(out, w)
  70.  
  71.  
  72. if __name__ == '__main__':
  73. convert_data()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement