Guest User

Untitled

a guest
May 20th, 2018
156
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.77 KB | None | 0 0
  1. import numpy as np
  2. from pandas import read_csv
  3. from math import floor, sqrt, exp
  4.  
  5.  
  6. def load_data(path_to_csv, has_header=True, has_rows_names=True):
  7. if has_header:
  8. header = 0
  9. else:
  10. header = None
  11. if has_rows_names:
  12. index = 0
  13. else:
  14. index = None
  15. data = read_csv(path_to_csv, header=header, index_col=index)
  16. data.fillna('', inplace=True)
  17. matrix = data.as_matrix()
  18. if has_header:
  19. if has_rows_names:
  20. return matrix, data.columns, data.index
  21. else:
  22. return matrix, data.columns
  23. else:
  24. if has_rows_names:
  25. return matrix, data.index
  26. else:
  27. return matrix
  28.  
  29.  
  30. def get_index_of_solvability(answers):
  31. num_students = answers.shape[0]
  32. num_questions = answers.shape[1]
  33.  
  34. solved = answers.sum(axis=0).astype(float)
  35. return np.array([solved[i] / num_students for i in range(num_questions)])
  36.  
  37.  
  38. def separate_students_answers(answers, group_size):
  39. solved = answers.sum(axis=1)
  40. sorted_indices = np.argsort(solved)
  41. sorted_answers = np.take(answers, sorted_indices, axis=0)
  42.  
  43. worst_answers = sorted_answers[0:group_size]
  44. best_answers = sorted_answers[-group_size:]
  45. return best_answers, worst_answers
  46.  
  47.  
  48. def get_index_of_dicrimination(answers):
  49. num_students = answers.shape[0]
  50. num_questions = answers.shape[1]
  51.  
  52. group_fraction = 0.27
  53. group_size = int(floor(num_students * group_fraction))
  54.  
  55. best, worst = separate_students_answers(answers, group_size)
  56. best_solved = best.sum(axis=0).astype(float)
  57. worst_solved = worst.sum(axis=0).astype(float)
  58. best_indices = np.array([best_solved[i] / group_size for i in range(num_questions)])
  59. worst_indices = np.array([worst_solved[i] / group_size for i in range(num_questions)])
  60. return best_indices - worst_indices
  61.  
  62.  
  63. def get_questions_statistics(answers, questions):
  64. index_of_solvability = get_index_of_solvability(answers)
  65. index_of_discrimination = get_index_of_dicrimination(answers)
  66.  
  67. useless_questions = questions[index_of_discrimination < 0.2]
  68. significant_questions = questions[index_of_discrimination > 0.4]
  69. difficult_questions = questions[index_of_solvability <= 0.1]
  70. easy_questions = questions[index_of_solvability >= 0.9]
  71.  
  72. return useless_questions, significant_questions, difficult_questions, easy_questions
  73.  
  74.  
  75. def get_shared_answers(answers1, answers2):
  76. shared_correct_mask = np.array([answers1[i] == answers2[i] and answers1[i] == 1 for i in range(len(answers1))])
  77. shared_wrong_mask = np.array([answers1[i] == answers2[i] and answers1[i] == 0 for i in range(len(answers1))])
  78. return np.array([np.sum(shared_correct_mask), np.sum(shared_wrong_mask)])
  79.  
  80.  
  81. def get_neighbors_coefficient(seating_plan, student1, student2):
  82. loc1 = np.where(seating_plan == student1)
  83. loc2 = np.where(seating_plan == student2)
  84. distance = sqrt((loc1[0][0] - loc2[0][0]) ** 2 + (loc1[1][0] - loc2[1][0]) ** 2)
  85. return 1 - 1 / (1 + exp(6 - 1.2 * distance))
  86.  
  87.  
  88. def detect_possible_cheating(answers, students, seating_plan):
  89. num_questions = answers.shape[1]
  90. possible_cheaters = []
  91. for i in range(len(students)):
  92. for j in range(i + 1, len(students)):
  93. left = students[i]
  94. right = students[j]
  95. answers_left = answers[i]
  96. answers_right = answers[j]
  97. shared = get_shared_answers(answers_left, answers_right)
  98. total_shared = shared[0] + shared[1]
  99. a_lot_shared = float(total_shared) / num_questions >= 0.9
  100. too_many_shared_wrong = float(shared[1]) / num_questions >= 0.2
  101. all_shared = total_shared == num_questions
  102. has_shared_wrong = shared[1] > 0
  103. if (a_lot_shared and too_many_shared_wrong) or (all_shared and has_shared_wrong):
  104. coeff = get_neighbors_coefficient(seating_plan, left, right)
  105. if coeff > 0.5:
  106. value = coeff * (total_shared / num_questions)
  107. possible_cheaters.append("{}, {}: {}".format(left, right, value))
  108.  
  109. return possible_cheaters
  110.  
  111.  
  112. def analyze_test(answers_file_name, seating_plan_file_name):
  113. answers, questions, students = load_data(answers_file_name)
  114. seating_plan = load_data(seating_plan_file_name, False, False)
  115.  
  116. useless, significant, difficult, easy = get_questions_statistics(answers, questions)
  117. print("Useless questions: {}\nSignificant questions: {}\nDifficult questions: {}\nEasy questions: {}"
  118. .format(", ".join(useless), ", ".join(significant), ", ".join(difficult), ", ".join(easy)))
  119. print("")
  120. possible_cheaters = detect_possible_cheating(answers, students, seating_plan)
  121. print("Possible cheaters (with coefficients): \n{}".format("\n".join(possible_cheaters)))
  122.  
  123.  
  124. analyze_test("data/2018-04-16_20-12-08.answ.csv", "data/2018-04-16_20-12-08.seat.csv")
Add Comment
Please, Sign In to add comment