import numpy import shutils import time def fast(): CHANNELS = 16 SAMPLE_LENGTH = CHANNELS // 8 BUFFER = 256 PROCNUM = 4 vals = [] for i in range(2**CHANNELS): vals.append(tuple(b'1' if e == '1' else b'0' for e in list("{:016b}".format(i)))) data = np.fromfile('largefile.dat', dtype=np.uint16) size = len(data) print("Data size: %d" % size) def work(data, vals, proc): size = len(data) print("Process %d data size: %d" % (proc, size)) out_list = [] for i in range(CHANNELS): out_file = open('dir/largefile_dump_%d_%d.dat' % (i, proc), 'wb') out_list.append(out_file) samples = [] j = 0 for sample in data: samples.append(vals[sample]) if j >= BUFFER: samples = zip(*samples) for channel, bits in enumerate(samples): s = b''.join(bits) out_list[channel].write(s) samples = [] j = 0 j += 1 if samples: samples = zip(*samples) for channel, bits in enumerate(samples): s = b''.join(bits) out_list[channel].write(s) for i in range(CHANNELS): out_list[i].close() print("Process %d is complete" % proc) samples_per_process = (size // SAMPLE_LENGTH) // PROCNUM procs = [] for i in range(PROCNUM): d_size = SAMPLE_LENGTH*samples_per_process if i != (PROCNUM - 1): d = data[i*d_size:(i+1)*d_size] else: d = data[i*d_size:] p = Process(target=work, args=(d, vals, i)) procs.append(p) p.start() while True: if not any(Process.is_alive(proc) for proc in procs): break time.sleep(0.5) print("STARTING TO MERGE FILES!") for i in range(CHANNELS): files = ['dir/%s' % f for f in os.listdir('dir') if '_%d_' % i in f] with open("dir/channel_%d.dat" % i, 'wb') as w: for f in files: with open(f, 'rb') as r: shutil.copyfileobj(r, w, 10*1024*1024) fast()