Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def combine(in_root, out_root):
- all_sessions = os.listdir(in_root)
- map_args = list(map(lambda x: (out_root, x, in_root), all_sessions))
- with multiprocessing.Pool(10) as p:
- p.map(combine_in_session, map_args)
- def combine_in_session(args):
- out_root = args[0]
- session = args[1]
- in_root = args[2]
- store_dir = os.path.join(out_root, session)
- os.makedirs(store_dir, exist_ok=True)
- all_parts = glob.glob(os.path.join(in_root, session, '*.wav'))
- current_len = 0
- current_batch_num = 0
- current_max_len = np.random.randint(low=20, high=30)
- current_batch = []
- for part in all_parts:
- if current_len >= current_max_len:
- sf.write(file=os.path.join(store_dir, 'split_{}.wav'.format(current_batch_num)),
- data=current_batch, samplerate=8000)
- current_batch = []
- current_batch_num += 1
- current_len = 0
- data, rate = sf.read(part)
- time_len = data.shape[-1] / 8000
- current_batch += data.tolist()
- current_len += time_len
- if len(current_batch) > 0:
- sf.write(file=os.path.join(store_dir, 'split_{}.wav'.format(current_batch_num)),
- data=current_batch, samplerate=8000)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement