Advertisement
Guest User

Untitled

a guest
Nov 21st, 2019
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.50 KB | None | 0 0
  1. import sys
  2. import numpy as np
  3. import scipy.signal
  4. import glob
  5. import matplotlib.pyplot as plt
  6. import os
  7. import random
  8. import sys
  9. from pathlib import Path
  10.  
  11.  
  12. import NumpyAvro as npAvro
  13.  
  14.  
  15. from sklearn.utils import shuffle
  16.  
  17. data_set_folder = "/data/integrals/realize-job-809888bc-2281-49e0-b65a-a3b23d9942a/"
  18. data_set_files = shuffle(glob.glob(data_set_folder + "*.ds"), random_state=seed)
  19.  
  20. # If data is still downloading use this to filter files that are not completely fetched
  21. expected_file_size = 7372941
  22. print(len(data_set_files))
  23.  
  24. #data_set_files = list(filter(lambda f: os.path.getsize(f) == expected_file_size, data_set_files))
  25. print(len(data_set_files))
  26.  
  27.  
  28.  
  29. data_set_folder = "/data/integrals/realize-job-809888bc-2281-49e0-b65a-a3b23d9942a/"
  30. #exp_data_folder = "/data/integrals/expdata/expdata/HIGHEST_PEAK_OUTSIDE_ADDED_IMP_REGIONS/expdata_16k/"
  31. #exp_data_folder = "/data/integrals/expdata/expdata/HIGHEST_PEAK_OUTSIDE_PREDEFINED_IMP_REGIONS/expdata_16k/"
  32. rlz_data_files = glob.glob(data_set_folder + "*.ds")
  33. rlz_data_files.sort()
  34. print(len(rlz_data_files))
  35. for file in rlz_data_files:
  36. print(file)
  37.  
  38.  
  39.  
  40. file_idx = 0
  41. #file_idx = random.randint(0, len(data_set_files)-1)
  42.  
  43. rlz_data_file = rlz_data_files[file_idx]
  44. data_set = npAvro.read_data_set(rlz_data_file)
  45. #keys = open("/data/integrals/expdata/expdata/HIGHEST_PEAK/expdata_keys/00.txt", "r").read().splitlines()
  46. #print(keys[1])
  47. print(file_idx, rlz_data_file)
  48. type(data_set[0]),data_set[0].shape,type(data_set[1]),data_set[1].shape
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement