Guest User

Untitled

a guest
Oct 16th, 2019
100
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. def combine(in_root, out_root):
  2.  
  3. all_sessions = os.listdir(in_root)
  4. map_args = list(map(lambda x: (out_root, x, in_root), all_sessions))
  5.  
  6. with multiprocessing.Pool(10) as p:
  7. p.map(combine_in_session, map_args)
  8.  
  9. def combine_in_session(args):
  10.  
  11. out_root = args[0]
  12. session = args[1]
  13. in_root = args[2]
  14.  
  15. store_dir = os.path.join(out_root, session)
  16. os.makedirs(store_dir, exist_ok=True)
  17.  
  18. all_parts = glob.glob(os.path.join(in_root, session, '*.wav'))
  19.  
  20. current_len = 0
  21. current_batch_num = 0
  22. current_max_len = np.random.randint(low=20, high=30)
  23. current_batch = []
  24.  
  25. for part in all_parts:
  26. if current_len >= current_max_len:
  27. sf.write(file=os.path.join(store_dir, 'split_{}.wav'.format(current_batch_num)),
  28. data=current_batch, samplerate=8000)
  29. current_batch = []
  30. current_batch_num += 1
  31. current_len = 0
  32.  
  33. data, rate = sf.read(part)
  34. time_len = data.shape[-1] / 8000
  35.  
  36. current_batch += data.tolist()
  37. current_len += time_len
  38.  
  39. if len(current_batch) > 0:
  40. sf.write(file=os.path.join(store_dir, 'split_{}.wav'.format(current_batch_num)),
  41. data=current_batch, samplerate=8000)
RAW Paste Data