Advertisement
Guest User

Untitled

a guest
Dec 15th, 2019
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.14 KB | None | 0 0
  1. import pprint
  2. import pandas as panda
  3. import statistics
  4.  
  5.  
  6. class DataManager:
  7.  
  8.     def __init__(self):
  9.         pass
  10.  
  11.     def data_loader(self):
  12.         self.add_first_line('iris.data', 'SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Class')
  13.         dataset = panda.read_csv('iris.data')
  14.         separated_data = {data_column: dataset[data_column].tolist() for data_column in dataset.columns}
  15.         self.rm_first_line('iris.data')
  16.         return separated_data
  17.  
  18.     def add_first_line(self, filename, line):
  19.         with open(filename, 'r+') as f_handler:
  20.             content = f_handler.read()
  21.             f_handler.seek(0, 0)
  22.             f_handler.write(line.rstrip('\r\n') + '\n' + content)
  23.  
  24.     def rm_first_line(self, filename):
  25.         with open(filename, 'r') as fin:
  26.             data = fin.read().splitlines(True)
  27.         with open(filename, 'w') as fout:
  28.             fout.writelines(data[1:])
  29.  
  30.     def prepare_context(self):
  31.         data = self.data_loader()
  32.         context = {}
  33.         for column_name, values in data.items():
  34.             column_context = {}
  35.             try:
  36.                 if type(values[0]) is str:
  37.                     mode_value = statistics.mode(values)
  38.  
  39.                     column_context['mode_value'] = mode_value
  40.                     column_context['freq_mode_value'] = values.count(mode_value)
  41.                 else:
  42.                     mean_value = statistics.mean(values)
  43.  
  44.                     column_context['mean_value'] = mean_value
  45.                     column_context['median_value'] = statistics.median(values)
  46.                     column_context['min_value'] = min(values)
  47.                     column_context['max_value'] = max(values)
  48.                     column_context['variance_value'] = statistics.pvariance(values, mean_value)
  49.             except statistics.StatisticsError as message:
  50.                 column_context['error'] = message
  51.             finally:
  52.                 context[column_name] = column_context
  53.         return context
  54.  
  55.  
  56. def main():
  57.     data_manager = DataManager()
  58.     pprint.pprint(data_manager.prepare_context(), width=1)
  59.  
  60.  
  61. if __name__ == '__main__':
  62.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement