Advertisement
Guest User

Untitled

a guest
May 5th, 2018
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.27 KB | None | 0 0
  1. '''
  2. Created on Aug 2, 2016
  3.  
  4. @author: Angus
  5. '''
  6.  
  7. import cv2
  8. import mysql.connector, os, json, gzip, ConfigParser, sys
  9. from translation.CourseRecordRemoval import RemoveCourseRecords
  10. from translation.Functions import ExtractCourseInformation, getDayDiff, getNextDay
  11. from translation.LearnerMode import learner_mode, sessions
  12. from translation.VideoMode import video_interaction
  13. from translation.QuizMode import quiz_mode, quiz_sessions
  14. from translation.ForumMode import forum_interaction, forum_sessions
  15. #from translation.SurveyMode import survey_mode
  16.  
  17.  
  18. def main(argv):
  19.  
  20. # Read configs
  21. config = ConfigParser.ConfigParser()
  22. config.read("config")
  23.  
  24. # All the configs are read as string
  25. course_log_path = config.get("data", "path")
  26. remove_filtered_logs = config.get("data", "remove_filtered_logs")
  27. log_update_list = json.loads(config.get("data", "log_update_list"))
  28. metadata_update_list = json.loads(config.get("data", "metadata_update_list"))
  29. #survey_update_map = json.loads(config.get("data", "survey_update_map"))
  30.  
  31. user = config.get("mysqld", "user")
  32. password = config.get("mysqld", "password")
  33. host = config.get("mysqld", "host")
  34. database = config.get("mysqld", "database")
  35.  
  36. # Database
  37. connection = mysql.connector.connect(user=user, password=12345678, host=host, database="DelftX Database", charset='utf8mb4')
  38. cursor = connection.cursor()
  39.  
  40. # Delete relevant records before updating the database
  41. print "Removing log records..."
  42. for course_code in log_update_list:
  43. print str("\t" + course_code)
  44. RemoveCourseRecords(course_log_path, course_code, "log", cursor)
  45. print "Removing metadata records..."
  46. for course_code in metadata_update_list:
  47. print str("\t" + course_code)
  48. RemoveCourseRecords(course_log_path, course_code, "metadata", cursor)
  49. # print "Removing survey records..."
  50. # for course_code in survey_update_map.keys():
  51. # print str("\t" + course_code)
  52. # RemoveCourseRecords(course_log_path, course_code, "survey", cursor)
  53.  
  54.  
  55.  
  56. folders = os.listdir(course_log_path)
  57. for folder in folders:
  58. if folder != "daily_logs":
  59.  
  60. # Only for Mac OS
  61. if folder == ".DS_Store":
  62. continue
  63.  
  64. course_code = folder
  65.  
  66. print "Processing\t" + course_code
  67.  
  68. # A file named "course_processing_tracker" (JSON format) is created
  69. # for each course to keep track of the processing files
  70. tracker_path = str(course_log_path + course_code + "/course_processing_tracker")
  71. if not os.path.exists(tracker_path):
  72.  
  73. output_file = open(tracker_path, "w")
  74. tracker_map = {}
  75.  
  76. # This value is used to keep track of the processing status for the course' daily log files,
  77. # i.e., "False" (not finished yet) and "True" (finished)
  78. tracker_map["status"] = False
  79.  
  80. tracker_map["processed_dates"] = []
  81. tracker_map["num_processed_dates"] = 0
  82. output_file.write(json.dumps(tracker_map))
  83. output_file.close()
  84. #
  85. # Read the "course_processing_tracker" file
  86. input_file = open(tracker_path, "r")
  87. tracker_map = json.loads(input_file.read())
  88. input_file.close()
  89.  
  90. metadata_path = str(course_log_path + course_code + "/metadata/")
  91.  
  92. # Determine whether the course_structure file is present
  93. mark = False
  94. files = os.listdir(metadata_path)
  95. for file in files:
  96. if "course_structure" in file:
  97. mark = True
  98. break
  99. if not mark:
  100. print "The course structure file is missing.\n"
  101. continue
  102.  
  103. # Learner mode
  104. if course_code in metadata_update_list:
  105. print "Learner Mode processing..."
  106. learner_mode(metadata_path, course_code, cursor)
  107. #
  108. # # # Survey mode
  109. # # survey_path = str(course_log_path + course_code + "/surveys/")
  110. # # if course_code in survey_update_map.keys():
  111. # # print "Survey Mode processing..."
  112. # # pre_id_index = int(survey_update_map[course_code][0])
  113. # # post_id_index = int(survey_update_map[course_code][1])
  114. # # survey_mode(metadata_path, survey_path, cursor, pre_id_index, post_id_index)
  115. #
  116. if tracker_map["status"]:
  117. print
  118. continue
  119. #
  120. # Retrieve the start/end date of the course
  121. course_metadata_map = ExtractCourseInformation(metadata_path)
  122. course_id = course_metadata_map["course_id"]
  123. start_date = course_metadata_map["start_date"]
  124. end_date = course_metadata_map["end_date"]
  125.  
  126. current_date = start_date
  127. while current_date <= end_date:
  128.  
  129. current_date_string = str(current_date)[0:10]
  130. if current_date_string not in tracker_map["processed_dates"]:
  131.  
  132. daily_log_file = str("delftx-edx-events-" + current_date_string + ".log.gz")
  133. if os.path.exists(str(course_log_path + "/daily_logs/" + daily_log_file)):
  134.  
  135. print daily_log_file
  136.  
  137. # Decompress log files
  138. unzip_file_path = str(course_log_path + course_code + "/unzip_daily_logs/")
  139. if not os.path.exists(unzip_file_path):
  140. os.mkdir(unzip_file_path)
  141.  
  142. output_path = str(unzip_file_path + daily_log_file[0:-3])
  143.  
  144. if not os.path.exists(output_path):
  145. output_file = open(output_path, 'w')
  146. with gzip.open(str(course_log_path + "/daily_logs/" + daily_log_file), 'r') as f:
  147. for line in f:
  148. jsonObject = json.loads(line)
  149. if course_id in jsonObject["context"]["course_id"]:
  150. output_file.write(line)
  151. output_file.close()
  152.  
  153. daily_log_path = output_path
  154. #
  155. # Video_interaction table
  156. # print "1.\t Video_interaction table processing..."
  157. remaining_video_interaction_log_path = course_log_path + course_code + "/remaining_video_interaction_logs"
  158. video_interaction(metadata_path, daily_log_path, remaining_video_interaction_log_path, cursor)
  159. #
  160. # Quiz mode
  161. # print "2.\t Quiz mode processing..."
  162. quiz_mode(daily_log_path, cursor)
  163. #
  164. # Quiz_sessions table
  165. # print "3.\t Quiz_sessions table processing..."
  166. remaining_quiz_session_log_path = course_log_path + course_code + "/remaining_quiz_session_logs"
  167. quiz_sessions(metadata_path, daily_log_path, remaining_quiz_session_log_path, cursor)
  168. #
  169. # Forum_interaction table
  170. # print "4.\t Forum_interaction table processing..."
  171. forum_interaction(metadata_path, daily_log_path, cursor)
  172. #
  173. # Forum_sessions table
  174. # print "5.\t Forum_sessions table processing..."
  175. remaining_forum_session_log_path = course_log_path + course_code + "/remaining_forum_session_logs"
  176. forum_sessions(metadata_path, daily_log_path, remaining_forum_session_log_path, cursor)
  177.  
  178. # Sessions table
  179. # print "6.\t Sessions table processing..."
  180. remaining_session_log_path = course_log_path + course_code + "/remaining_session_logs"
  181. sessions(metadata_path, daily_log_path, remaining_session_log_path, cursor)
  182.  
  183. tracker_map["processed_dates"].append(current_date_string)
  184. #
  185. current_date = getNextDay(current_date)
  186. #
  187. if len(tracker_map["processed_dates"]) == getDayDiff(start_date, end_date) + 1:
  188. tracker_map["status"] = True
  189.  
  190. if tracker_map["num_processed_dates"] != len(tracker_map["processed_dates"]):
  191. tracker_map["num_processed_dates"] = len(tracker_map["processed_dates"])
  192. output_file = open(tracker_path, "w")
  193. output_file.write(json.dumps(tracker_map))
  194. output_file.close()
  195.  
  196. # Delete the decompressed files
  197. if remove_filtered_logs == "1":
  198. log_files = os.listdir(str(course_log_path + "/daily_logs/"))
  199. for log_file in log_files:
  200. os.remove(str(course_log_path + "/daily_logs/" + log_file))
  201.  
  202.  
  203.  
  204.  
  205.  
  206.  
  207.  
  208. # ###############################################################################
  209. if __name__ == '__main__':
  210.  
  211. configFile = sys.argv[1:]
  212. if len(configFile) == 0:
  213. configFile = 'config'
  214. main(configFile)
  215.  
  216. print "All finished."
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement