Guest User

Untitled

a guest
May 25th, 2018
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.99 KB | None | 0 0
  1. import pyarrow as pa
  2. import os
  3. from csv import writer
  4. from sys import argv
  5. import numpy as np
  6.  
  7. def get_data(filepath):
  8. data = None
  9. with pa.memory_map(filepath, 'rb') as f:
  10. f.seek(0)
  11. buf = f.read_buffer()
  12.  
  13. data = pa.deserialize(buf)
  14. return data
  15.  
  16. def write_csv_iter(data, destination_path, file):
  17. if sum(data.shape) > 0:
  18. it = np.nditer(data, flags=['multi_index'])
  19. with open(destination_path, 'w') as writefile:
  20. writ = writer(writefile)
  21. while not it.finished:
  22. if it[0] != 0:
  23. writ.writerow(
  24. [it.multi_index[0],
  25. it.multi_index[1],
  26. it[0]])
  27. it.iternext()
  28. else:
  29. print("Skipping {} as it is empty".format(file))
  30.  
  31.  
  32. def write_csv_nonzero(data, destination_path, file):
  33.  
  34. indices = np.nonzero(data)
  35.  
  36. if np.count_nonzero(data) > 0:
  37. with open(destination_path, 'w') as writefile:
  38. writ = writer(writefile)
  39. for row, col in zip(indices[0], indices[1]):
  40. writ.writerow([row, col, data[row, col]])
  41. else:
  42. print("Skipping {} as it is empty".format(file))
  43.  
  44. def convert_pyarrow_to_csv(path):
  45.  
  46. if not os.path.exists(os.path.join(path, 'converted')):
  47. os.mkdir(os.path.join(path, 'converted'))
  48.  
  49. for file in os.listdir(path):
  50. destination_path = os.path.join(path, 'converted', os.path.splitext(file)[0] + '.csv')
  51. if not os.path.exists(destination_path) and file.startswith('output_'):
  52. print("Converting {}".format(file))
  53.  
  54. data = get_data(os.path.join(path, file))
  55.  
  56. print("Array {} has dimensions {}".format(file, data.shape))
  57.  
  58. write_csv_nonzero(data, destination_path, file)
  59. # write_csv_iter(data, destination_path, file)
  60.  
  61.  
  62. if __name__ == '__main__':
  63.  
  64. assert len(argv) == 2, "Usage: python process.py <path_to_results_dir>"
  65. convert_pyarrow_to_csv(argv[1])
Add Comment
Please, Sign In to add comment