SHARE
TWEET

Untitled

a guest Oct 16th, 2019 98 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. def combine(in_root, out_root):
  2.  
  3.     all_sessions = os.listdir(in_root)
  4.     map_args = list(map(lambda x: (out_root, x, in_root), all_sessions))
  5.  
  6.     with multiprocessing.Pool(10) as p:
  7.         p.map(combine_in_session, map_args)
  8.  
  9. def combine_in_session(args):
  10.  
  11.     out_root = args[0]
  12.     session = args[1]
  13.     in_root = args[2]
  14.  
  15.     store_dir = os.path.join(out_root, session)
  16.     os.makedirs(store_dir, exist_ok=True)
  17.  
  18.     all_parts = glob.glob(os.path.join(in_root, session, '*.wav'))
  19.  
  20.     current_len = 0
  21.     current_batch_num = 0
  22.     current_max_len = np.random.randint(low=20, high=30)
  23.     current_batch = []
  24.  
  25.     for part in all_parts:
  26.         if current_len >= current_max_len:
  27.             sf.write(file=os.path.join(store_dir, 'split_{}.wav'.format(current_batch_num)),
  28.                      data=current_batch, samplerate=8000)
  29.             current_batch = []
  30.             current_batch_num += 1
  31.             current_len = 0
  32.  
  33.         data, rate = sf.read(part)
  34.         time_len = data.shape[-1] / 8000
  35.  
  36.         current_batch += data.tolist()
  37.         current_len += time_len
  38.  
  39.     if len(current_batch) > 0:
  40.         sf.write(file=os.path.join(store_dir, 'split_{}.wav'.format(current_batch_num)),
  41.                  data=current_batch, samplerate=8000)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top