Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''
- Created on Aug 2, 2016
- @author: Angus
- '''
- import cv2
- import mysql.connector, os, json, gzip, ConfigParser, sys
- from translation.CourseRecordRemoval import RemoveCourseRecords
- from translation.Functions import ExtractCourseInformation, getDayDiff, getNextDay
- from translation.LearnerMode import learner_mode, sessions
- from translation.VideoMode import video_interaction
- from translation.QuizMode import quiz_mode, quiz_sessions
- from translation.ForumMode import forum_interaction, forum_sessions
- #from translation.SurveyMode import survey_mode
- def main(argv):
- # Read configs
- config = ConfigParser.ConfigParser()
- config.read("config")
- # All the configs are read as string
- course_log_path = config.get("data", "path")
- remove_filtered_logs = config.get("data", "remove_filtered_logs")
- log_update_list = json.loads(config.get("data", "log_update_list"))
- metadata_update_list = json.loads(config.get("data", "metadata_update_list"))
- #survey_update_map = json.loads(config.get("data", "survey_update_map"))
- user = config.get("mysqld", "user")
- password = config.get("mysqld", "password")
- host = config.get("mysqld", "host")
- database = config.get("mysqld", "database")
- # Database
- connection = mysql.connector.connect(user=user, password=12345678, host=host, database="DelftX Database", charset='utf8mb4')
- cursor = connection.cursor()
- # Delete relevant records before updating the database
- print "Removing log records..."
- for course_code in log_update_list:
- print str("\t" + course_code)
- RemoveCourseRecords(course_log_path, course_code, "log", cursor)
- print "Removing metadata records..."
- for course_code in metadata_update_list:
- print str("\t" + course_code)
- RemoveCourseRecords(course_log_path, course_code, "metadata", cursor)
- # print "Removing survey records..."
- # for course_code in survey_update_map.keys():
- # print str("\t" + course_code)
- # RemoveCourseRecords(course_log_path, course_code, "survey", cursor)
- folders = os.listdir(course_log_path)
- for folder in folders:
- if folder != "daily_logs":
- # Only for Mac OS
- if folder == ".DS_Store":
- continue
- course_code = folder
- print "Processing\t" + course_code
- # A file named "course_processing_tracker" (JSON format) is created
- # for each course to keep track of the processing files
- tracker_path = str(course_log_path + course_code + "/course_processing_tracker")
- if not os.path.exists(tracker_path):
- output_file = open(tracker_path, "w")
- tracker_map = {}
- # This value is used to keep track of the processing status for the course' daily log files,
- # i.e., "False" (not finished yet) and "True" (finished)
- tracker_map["status"] = False
- tracker_map["processed_dates"] = []
- tracker_map["num_processed_dates"] = 0
- output_file.write(json.dumps(tracker_map))
- output_file.close()
- #
- # Read the "course_processing_tracker" file
- input_file = open(tracker_path, "r")
- tracker_map = json.loads(input_file.read())
- input_file.close()
- metadata_path = str(course_log_path + course_code + "/metadata/")
- # Determine whether the course_structure file is present
- mark = False
- files = os.listdir(metadata_path)
- for file in files:
- if "course_structure" in file:
- mark = True
- break
- if not mark:
- print "The course structure file is missing.\n"
- continue
- # Learner mode
- if course_code in metadata_update_list:
- print "Learner Mode processing..."
- learner_mode(metadata_path, course_code, cursor)
- #
- # # # Survey mode
- # # survey_path = str(course_log_path + course_code + "/surveys/")
- # # if course_code in survey_update_map.keys():
- # # print "Survey Mode processing..."
- # # pre_id_index = int(survey_update_map[course_code][0])
- # # post_id_index = int(survey_update_map[course_code][1])
- # # survey_mode(metadata_path, survey_path, cursor, pre_id_index, post_id_index)
- #
- if tracker_map["status"]:
- print
- continue
- #
- # Retrieve the start/end date of the course
- course_metadata_map = ExtractCourseInformation(metadata_path)
- course_id = course_metadata_map["course_id"]
- start_date = course_metadata_map["start_date"]
- end_date = course_metadata_map["end_date"]
- current_date = start_date
- while current_date <= end_date:
- current_date_string = str(current_date)[0:10]
- if current_date_string not in tracker_map["processed_dates"]:
- daily_log_file = str("delftx-edx-events-" + current_date_string + ".log.gz")
- if os.path.exists(str(course_log_path + "/daily_logs/" + daily_log_file)):
- print daily_log_file
- # Decompress log files
- unzip_file_path = str(course_log_path + course_code + "/unzip_daily_logs/")
- if not os.path.exists(unzip_file_path):
- os.mkdir(unzip_file_path)
- output_path = str(unzip_file_path + daily_log_file[0:-3])
- if not os.path.exists(output_path):
- output_file = open(output_path, 'w')
- with gzip.open(str(course_log_path + "/daily_logs/" + daily_log_file), 'r') as f:
- for line in f:
- jsonObject = json.loads(line)
- if course_id in jsonObject["context"]["course_id"]:
- output_file.write(line)
- output_file.close()
- daily_log_path = output_path
- #
- # Video_interaction table
- # print "1.\t Video_interaction table processing..."
- remaining_video_interaction_log_path = course_log_path + course_code + "/remaining_video_interaction_logs"
- video_interaction(metadata_path, daily_log_path, remaining_video_interaction_log_path, cursor)
- #
- # Quiz mode
- # print "2.\t Quiz mode processing..."
- quiz_mode(daily_log_path, cursor)
- #
- # Quiz_sessions table
- # print "3.\t Quiz_sessions table processing..."
- remaining_quiz_session_log_path = course_log_path + course_code + "/remaining_quiz_session_logs"
- quiz_sessions(metadata_path, daily_log_path, remaining_quiz_session_log_path, cursor)
- #
- # Forum_interaction table
- # print "4.\t Forum_interaction table processing..."
- forum_interaction(metadata_path, daily_log_path, cursor)
- #
- # Forum_sessions table
- # print "5.\t Forum_sessions table processing..."
- remaining_forum_session_log_path = course_log_path + course_code + "/remaining_forum_session_logs"
- forum_sessions(metadata_path, daily_log_path, remaining_forum_session_log_path, cursor)
- # Sessions table
- # print "6.\t Sessions table processing..."
- remaining_session_log_path = course_log_path + course_code + "/remaining_session_logs"
- sessions(metadata_path, daily_log_path, remaining_session_log_path, cursor)
- tracker_map["processed_dates"].append(current_date_string)
- #
- current_date = getNextDay(current_date)
- #
- if len(tracker_map["processed_dates"]) == getDayDiff(start_date, end_date) + 1:
- tracker_map["status"] = True
- if tracker_map["num_processed_dates"] != len(tracker_map["processed_dates"]):
- tracker_map["num_processed_dates"] = len(tracker_map["processed_dates"])
- output_file = open(tracker_path, "w")
- output_file.write(json.dumps(tracker_map))
- output_file.close()
- # Delete the decompressed files
- if remove_filtered_logs == "1":
- log_files = os.listdir(str(course_log_path + "/daily_logs/"))
- for log_file in log_files:
- os.remove(str(course_log_path + "/daily_logs/" + log_file))
- # ###############################################################################
- if __name__ == '__main__':
- configFile = sys.argv[1:]
- if len(configFile) == 0:
- configFile = 'config'
- main(configFile)
- print "All finished."
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement