Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import random
- import names
- from matplotlib import pyplot as plt
- def roundGrade(grades):
- grade_list = [12, 10, 7, 4, 2, 0, -3]
- gradesRounded = []
- for grade in grades:
- gradesRounded.append(min(grade_list, key=lambda x:abs(x-grade)))
- return gradesRounded
- def computeFinalGrades(grades):
- data = pd.read_csv(grades)
- min_cols = 3 # Minimum number of columns for value data
- if data.shape[1] == min_cols: # Data only has one assignment
- data['Final'] = data.ix[:,2]
- else: # Data has two or more assignments
- data['Final'] = roundGrade((data.sum(axis=1) - data.min(axis=1)) /
- (data.shape[1] - min_cols))
- for name in data.ix[:,2:]: # Student is given final grade -3, if -3 is present
- data.loc[(data[name] == -3), 'Final'] = -3
- gradesFinal = data['Final']
- return gradesFinal
- def generate_test_data(assignments, entries, grade_errors):
- grade_list = [12, 10, 7, 4, 2, 0, -3]
- for _ in range(grade_errors):
- grade_list.append(round(random.uniform(-3, 12), 1))
- header = ["StudentID","Name"]
- for n in range(1, assignments + 1):
- header.append(f"Assignment{n}")
- with open('data.csv', 'w') as file:
- file.write(",".join(header))
- file.write("\n")
- for _ in range(entries):
- grades = []
- name = names.get_full_name()
- number = f"s{int(random.uniform(103456, 154321))}"
- file.write(f"{number},{name},")
- for i in range(1, assignments + 1):
- grades.append(str(random.choice(grade_list)))
- file.write(",".join(grades))
- file.write("\n")
- generate_test_data(3, 50, 10)
- def check_data(filename):
- grade_list = [12, 10, 7, 4, 2, 0, -3]
- df = pd.read_csv(filename)
- assignment_names = df.columns.values[2:]
- grouped_data = df.groupby('StudentID')['StudentID'].agg('count')
- if len(df.index) == len(grouped_data.index):
- for assignment in assignment_names:
- grades = df[assignment_names][df[assignment_names].isin(grade_list)]
- result = pd.concat([df.ix[:,:2], grades], axis=1)
- n_columns = len(result.columns.values)
- result['NaNs'] = n_columns - result.apply(lambda x: x.count(), axis=1)
- if result.isnull().values.any() == True:
- return False, result
- else:
- good_entries = result[result.NaNs <= 0]
- return True, good_entries
- else:
- invalid_ids = []
- for number in grouped_data.index:
- if grouped_data[number] > 1:
- invalid_ids.append(number)
- return False, invalid_ids
- data_check, mydata = check_data('data.csv')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement