Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pprint
- import pandas as panda
- import statistics
- class DataManager:
- def __init__(self):
- pass
- def data_loader(self):
- self.add_first_line('iris.data', 'SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Class')
- dataset = panda.read_csv('iris.data')
- separated_data = {data_column: dataset[data_column].tolist() for data_column in dataset.columns}
- self.rm_first_line('iris.data')
- return separated_data
- def add_first_line(self, filename, line):
- with open(filename, 'r+') as f_handler:
- content = f_handler.read()
- f_handler.seek(0, 0)
- f_handler.write(line.rstrip('\r\n') + '\n' + content)
- def rm_first_line(self, filename):
- with open(filename, 'r') as fin:
- data = fin.read().splitlines(True)
- with open(filename, 'w') as fout:
- fout.writelines(data[1:])
- def prepare_context(self):
- data = self.data_loader()
- context = {}
- for column_name, values in data.items():
- column_context = {}
- try:
- if type(values[0]) is str:
- mode_value = statistics.mode(values)
- column_context['mode_value'] = mode_value
- column_context['freq_mode_value'] = values.count(mode_value)
- else:
- mean_value = statistics.mean(values)
- column_context['mean_value'] = mean_value
- column_context['median_value'] = statistics.median(values)
- column_context['min_value'] = min(values)
- column_context['max_value'] = max(values)
- column_context['variance_value'] = statistics.pvariance(values, mean_value)
- except statistics.StatisticsError as message:
- column_context['error'] = message
- finally:
- context[column_name] = column_context
- return context
- def main():
- data_manager = DataManager()
- pprint.pprint(data_manager.prepare_context(), width=1)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement