Advertisement
Guest User

MaXwEllDeN

a guest
Feb 27th, 2019
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.34 KB | None | 0 0
  1. """
  2. Crawler for fetching data from UFCG's Online Student Dashboard
  3. Author: Maxwell Albuquerque
  4. """
  5.  
  6. import re
  7. import bs4
  8. import random
  9. import requests
  10. from bs4 import BeautifulSoup
  11.  
  12. _URL = "https://pre.ufcg.edu.br:8443/ControleAcademicoOnline"
  13. _URI_LOGIN = "/Controlador"
  14. _URI_CLASSES = "/Controlador?command=AlunoTurmasListar"
  15. _URI_OFFERED_COURSES = "/Controlador?command=AlunoDisciplinasOfertadas"
  16. _URI_REGISTER_COURSES = "/Controlador?command=AlunoMatriculaGetForm"
  17.  
  18. class Course:
  19.     __uri = None
  20.     __name = None
  21.     __absences = None
  22.     __absences_limit = None
  23.     __scores = []
  24.  
  25.     def __init__(self, course_uri, name, absences, absences_limit, scores):
  26.         self.__uri = course_uri
  27.         self.__name = name
  28.         self.__absences = absences
  29.         self.__absences_limit = absences_limit
  30.         self.__scores = scores
  31.  
  32.     def get_name(self):
  33.         return self.__name
  34.  
  35.     def get_scores(self):
  36.         return self.__scores
  37.    
  38.     def get_absences(self):
  39.         return self.__absences
  40.  
  41.     def get_absences_limit(self):
  42.         return self.__absences_limit
  43.  
  44.     def get_summary(self):
  45.         message = "📖 %s\n\n" % self.get_name()
  46.         scores = self.get_scores()
  47.  
  48.         if len(scores["scores"]) > 0:
  49.             for index in range(len(scores["scores"])):
  50.                 text = "Nota %s - *%s* - _Peso %0.2f_.\n"
  51.                 score = ""
  52.  
  53.                 if scores["scores"][index] == -1:
  54.                     score = "indisponível"
  55.                 else:
  56.                     score = "%0.2f" % scores["scores"][index]
  57.  
  58.                 message += text % (index + 1, score, scores["weights"][index])
  59.  
  60.             message += "\nMédia parcial: *%0.2f*" % (scores["average"])
  61.         else:
  62.             message += "Ainda não foi registrada nenhuma nota dessa disciplina.\n"
  63.  
  64.         message += "\nFaltas registradas: %s/%s"
  65.         message = message % (self.get_absences(), self.get_absences_limit())
  66.  
  67.         return message
  68.  
  69. class AcademicPanel:
  70.     __registration_id = None
  71.     __password = None
  72.     __session = None
  73.     _user_name = None
  74.  
  75.     def __init__(self, registration_id, password):
  76.         self.__registration_id = registration_id
  77.         self.__password = password
  78.  
  79.         self.__update_session()
  80.  
  81.     def __update_session(self):
  82.         """Authenticates in website and gets a new
  83.        session object.
  84.        """
  85.         self.__session = requests.Session()
  86.  
  87.         data_frame = {
  88.             "login": self.__registration_id,
  89.             "senha": self.__password,
  90.             "command": "AlunoLogin"
  91.         }
  92.  
  93.         self.__session.post(_URL + _URI_LOGIN, data=data_frame)
  94.  
  95.     def is_logged_in(self):
  96.         request = self.__session.get(_URL)
  97.         soup = BeautifulSoup(request.text, "html.parser")
  98.  
  99.         user = soup.find("div", {"class": "col-sm-9 col-xs-7"})
  100.         return user is not None
  101.  
  102.     def __get_course_absences(self, course_uri):
  103.         url = _URL + "/" + course_uri
  104.         url = url.replace("AlunoTurmaNotas", "AlunoTurmaFrequencia")
  105.        
  106.         request = self.__session.get(url)
  107.         soup = BeautifulSoup(request.text, "html.parser")
  108.        
  109.         table = soup.find("table")
  110.         table_thead = table.thead.tr
  111.  
  112.         row = list(table_thead)
  113.         offset = -1
  114.  
  115.         absences_limit = 0
  116.  
  117.         for index in range(7, len(row)):
  118.             item = row[index]
  119.             offset += 1            
  120.             if type(item) is not bs4.element.NavigableString:
  121.                 if re.match(r"Total", item.get_text()) is not None:
  122.                     limit_str = re.findall(r"\d+", item["title"])[0]
  123.                     absences_limit = int(limit_str)
  124.                     break
  125.  
  126.         table_tr = table.tbody.tr
  127.         row = list(table_tr)
  128.  
  129.         table_tbody = table.tbody.tr
  130.         row = list(table_tbody)
  131.  
  132.         absences = int(row[7 + offset].get_text())
  133.  
  134.         return absences, absences_limit
  135.  
  136.     def __get_course_scores(self, course_uri):
  137.         data = {
  138.             "weights": [],
  139.             "scores": [],
  140.             "average": -1.0,
  141.             "final_exam": -1.0,
  142.             "final_average": -1.0
  143.         }
  144.  
  145.         request = self.__session.get(_URL + "/" + course_uri)
  146.         soup = BeautifulSoup(request.text, "html.parser")
  147.        
  148.         table = soup.find("table")
  149.         table_thead = table.thead.tr
  150.        
  151.         for item in list(table_thead):
  152.             if type(item) is not bs4.element.NavigableString:
  153.                 weights = re.findall(r"P = \d+", item.get_text())
  154.  
  155.                 if len(weights) > 0:
  156.                     weight_str = re.findall(r"\d+", weights[0])[0]
  157.                     weight = float(weight_str)
  158.                     data["weights"].append(weight)
  159.  
  160.         table_tr = table.tbody.tr
  161.         row = list(table_tr)
  162.  
  163.         score_amount = len(data["weights"])
  164.  
  165.         for score_index in range(score_amount):
  166.             score_string = row[7 + 2 * score_index].get_text()
  167.  
  168.             score = -1.0
  169.  
  170.             if score_string != "":
  171.                 score = float(row[7 + 2 * score_index].get_text())
  172.  
  173.             data["scores"].append(score)
  174.  
  175.         average_index = 7 + 2 * score_amount
  176.         average_str = row[average_index].get_text().replace(",", ".")
  177.  
  178.         if average_str != "":
  179.             data["average"] = float(average_str)
  180.  
  181.         final_exam_index = average_index + 2
  182.         final_exam_str = row[final_exam_index].get_text().replace(",", ".")
  183.  
  184.         if final_exam_str != "":
  185.             data["final_exam"] = float(final_exam_str)
  186.  
  187.         final_average_index = final_exam_index + 2
  188.         final_average_str = row[final_average_index].get_text().replace(",", ".")
  189.  
  190.         if final_average_str != "":
  191.             data["final_average"] = float(final_average_str)
  192.  
  193.         return data
  194.  
  195.     def get_courses(self):
  196.         courses = []
  197.  
  198.         request = self.__session.get(_URL + _URI_CLASSES)
  199.         soup = BeautifulSoup(request.text, "html.parser")
  200.  
  201.         table_body = soup.find("table").tbody
  202.  
  203.         for item in list(table_body):
  204.             if type(item) is not bs4.element.NavigableString:
  205.                 children = list(item.children)
  206.                
  207.                 name = children[5].get_text()
  208.  
  209.                 # Removing breaklines in course's name
  210.                 name = name[3:]
  211.                 name = name[: len(name) - 1]
  212.  
  213.                 # Making name pretty
  214.                 name = prettify_name(name)
  215.                 course_uri = children[5].a["href"]
  216.                
  217.                 scores = self.__get_course_scores(course_uri)
  218.                 absences, absences_limit = self.__get_course_absences(course_uri)
  219.  
  220.                 courses.append(Course(course_uri, name, absences, absences_limit, scores))
  221.  
  222.         return courses
  223.  
  224.     def get_offered_courses(self, allow_blocked=True, semester=None):
  225.         request = self.__session.get(_URL + _URI_OFFERED_COURSES)
  226.         soup = BeautifulSoup(request.text, "html.parser")
  227.  
  228.         table_body = soup.find("table").tbody
  229.        
  230.         courses = {}
  231.         used_hashs = []
  232.  
  233.         for item in list(table_body):
  234.             if type(item) is not bs4.element.NavigableString:
  235.                 children = list(item.children)
  236.  
  237.                 class_name = children[5].get_text().lower()
  238.                 class_name = class_name.replace("\r", "")                
  239.                 is_blocked = False
  240.  
  241.                 if len(re.findall(r"\(bloqueada(.*)\)", class_name)) > 0:
  242.                     is_blocked = True
  243.                     class_name = class_name.split("\n")[1]
  244.  
  245.                 if (is_blocked and not allow_blocked):
  246.                     continue
  247.  
  248.                 aux = children[3].get_text().split("-")
  249.                 class_code = int(aux[0])    
  250.                 class_number = int(aux[1])
  251.                 class_semester = list(children[1].children)[0]
  252.  
  253.                 if is_number(class_semester):
  254.                     class_semester = int(class_semester)
  255.                 else:
  256.                     class_semester = 0
  257.  
  258.                 if semester is not None and class_semester != semester:
  259.                     continue
  260.  
  261.                 class_name = class_name.replace("\n", "")
  262.                 class_name = prettify_name(class_name)
  263.  
  264.                 # Getting schedule
  265.                 aux = children[7].get_text().split("\n")
  266.                 class_schedule = []
  267.  
  268.                 for row in aux:
  269.                     template = {"day": 0, "from": "", "to": "", "place": ""}
  270.                     if len(row) > 0 and row[0].isdigit():
  271.                         template["day"] = int(row[0])
  272.                         template["from"] = row[2:7]
  273.                         template["to"] = row[8:13]
  274.                         template["place"] = re.findall(r"\((.*)\)", row)[0]
  275.  
  276.                     if template["day"] == 0:
  277.                         continue
  278.                     else:
  279.                         class_schedule.append(template)
  280.  
  281.                 if len(class_schedule) == 0:
  282.                     continue
  283.  
  284.                 class_hash = generate_unused_hash(used_hashs)
  285.                 template = {
  286. #                    "number": class_number,
  287.                     "schedule": class_schedule,
  288.                     "blocked": is_blocked,
  289.                     "class_hash": class_hash
  290.                 }
  291.  
  292.                 if not class_code in courses:
  293.                     courses[class_code] = {
  294.                         "code": class_code,
  295.                         "name": class_name,
  296.                         "semester": class_semester,
  297.                         "classes": {}
  298.                     }
  299.  
  300.                 courses[class_code]["classes"][class_number] = template
  301.                 used_hashs.append(class_hash)
  302.  
  303.         return courses
  304.  
  305. def generate_unused_hash(used_hashs):
  306.     hash = random.getrandbits(64)
  307.  
  308.     while hash in used_hashs:
  309.         hash = random.getrandbits(64)
  310.  
  311.     return hash
  312.  
  313. def prettify_name(name):
  314.     new_name = ""
  315.     last = " "
  316.    
  317.     for letter in name.lower():
  318.         if last == " " or (letter == "i" and last == "I") or last == "(":
  319.             new_name += letter.upper()
  320.             last = letter.upper()
  321.         else:
  322.             new_name += letter
  323.             last = letter
  324.  
  325.     return new_name
  326.  
  327. def is_number(value):
  328.     try:
  329.         int(value)
  330.         return True
  331.     except:
  332.         return False
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement