Advertisement
Guest User

Untitled

a guest
Apr 18th, 2019
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.46 KB | None | 0 0
  1. import os
  2. import pickle
  3. from glob import iglob
  4. from shutil import rmtree
  5. import numpy as np
  6. from model_data import read_audio_from_filename
  7.  
  8. DATA_AUDIO_DIR = './audio'
  9. TARGET_SR = 8000
  10. OUTPUT_DIR = './output'
  11. OUTPUT_DIR_TRAIN = os.path.join(OUTPUT_DIR, 'train')
  12. OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR, 'test')
  13. AUDIO_LENGTH = 10000
  14.  
  15.  
  16. def mkdir_p(path):
  17. import errno
  18. try:
  19. os.makedirs(path)
  20. except OSError as exc:
  21. if exc.errno == errno.EEXIST and os.path.isdir(path):
  22. pass
  23. else:
  24. raise
  25.  
  26.  
  27. def del_folder(path):
  28. try:
  29. rmtree(path)
  30. except:
  31. pass
  32.  
  33.  
  34. del_folder(OUTPUT_DIR_TRAIN)
  35. del_folder(OUTPUT_DIR_TEST)
  36. mkdir_p(OUTPUT_DIR_TRAIN)
  37. mkdir_p(OUTPUT_DIR_TEST)
  38.  
  39. class_ids = {
  40. 'normal': 0,
  41. 'murmur': 1,
  42. 'extrahls': 2,
  43. 'artifact': 3,
  44. 'unlabelled': 4,
  45. }
  46.  
  47.  
  48. def extract_class_id(wav_filename):
  49. if 'normal' in wav_filename:
  50. return class_ids.get('normal')
  51. elif 'murmur' in wav_filename:
  52. return class_ids.get('murmur')
  53. elif 'extrahls' in wav_filename:
  54. return class_ids.get('extrahls')
  55. elif 'artifact' in wav_filename:
  56. return class_ids.get('artifact')
  57. elif 'unlabelled' in wav_filename:
  58. return class_ids.get('unlabelled')
  59. else:
  60. return class_ids.get('unlabelled')
  61.  
  62.  
  63. def convert_data():
  64. for i, wav_filename in enumerate(iglob(os.path.join(DATA_AUDIO_DIR, '**/**.wav'), recursive=True)):
  65. class_id = extract_class_id(wav_filename)
  66. audio_buf = read_audio_from_filename(wav_filename, target_sr=TARGET_SR)
  67. # normalize mean 0, variance 1
  68. audio_buf = (audio_buf - np.mean(audio_buf)) / np.std(audio_buf)
  69. original_length = len(audio_buf)
  70. print(i, wav_filename, original_length, np.round(np.mean(audio_buf), 4), np.std(audio_buf))
  71. if original_length < AUDIO_LENGTH:
  72. audio_buf = np.concatenate((audio_buf, np.zeros(shape=(AUDIO_LENGTH - original_length, 1))))
  73. print('PAD New length =', len(audio_buf))
  74. elif original_length > AUDIO_LENGTH:
  75. audio_buf = audio_buf[0:AUDIO_LENGTH]
  76. print('CUT New length =', len(audio_buf))
  77.  
  78. output_folder = OUTPUT_DIR_TRAIN
  79. output_filename = os.path.join(output_folder, str(i) + '.pkl')
  80.  
  81. out = {'class_id': class_id,
  82. 'audio': audio_buf,
  83. 'sr': TARGET_SR}
  84. with open(output_filename, 'wb') as w:
  85. pickle.dump(out, w)
  86.  
  87.  
  88. if __name__ == '__main__':
  89. convert_data()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement