Advertisement
Guest User

prase.py

a guest
Jan 20th, 2020
111
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.28 KB | None | 0 0
  1. from lxml import etree
  2.  
  3. def open_html(filepath):
  4.     with open(filepath, "r", encoding="utf-8") as f:
  5.         html = f.read()
  6.     return html
  7.  
  8. def prase(html_str):
  9.     html = etree.HTML(html_str)
  10.     table = html.xpath("/html/body/form/table[@id='T1']/tr")[2:]
  11.  
  12.     course = []
  13.     semester = ""
  14.     for tr in table:
  15.         if (len(tr.xpath("td"))) == 1:
  16.             if len(tr.xpath("td/div")) == 1:
  17.                 semester = tr.xpath("td/div/text()")[0]
  18.         else:
  19.             if len(tr.xpath("td/div/font")) > 0:
  20.                 a_course = {}
  21.                 a_course['id'] = tr.xpath("td/div/font/text()")[0]
  22.                 a_course['name'] = tr.xpath("td[2]/div/font/text()")[0]
  23.                 a_course['grade'] = tr.xpath("td[3]/div/font/text()")[0]
  24.                 a_course['credit'] = tr.xpath("td[4]/div/font/text()")[0]
  25.                 a_course['grade_point'] = tr.xpath("td[5]/div/font/text()")[0]
  26.                 a_course['is_passed'] = tr.xpath("td[6]/div/font/text()")[0]
  27.                 a_course['is_normal'] = tr.xpath("td[7]/div/font/text()")[0]
  28.                 if  len(tr.xpath("td[8]/div/font/text()")) == 1:
  29.                     a_course['is_elective'] = tr.xpath("td[8]/div/font/text()")[0]
  30.                 else:
  31.                     a_course['is_elective'] = tr.xpath("td[8]/div/font/font/text()")[0]
  32.  
  33.                 a_course['date'] = tr.xpath("td[9]/div/font/text()")[0]
  34.                 a_course['semester'] = semester
  35.                 course.append(a_course)
  36.  
  37.     student = {}
  38.  
  39.     informations = html.xpath("/html/body/form/table[@id='T1']/tr[1]/td/font/text()")
  40.     student['id'] = informations[1].strip()
  41.     student['name'] = informations[2].strip()
  42.     student['department']= informations[3].strip()
  43.     student['major']= informations[4].strip()
  44.  
  45.     grades = html.xpath("/html/body/form/table[@id='T1']/tr[2]/td")[0]
  46.     student['all_grade'] = grades.xpath("font[1]/text()")[0]
  47.     student['all_credit'] = grades.xpath("font[2]/text()")[0][1:]
  48.     student['unpassed_credit'] = grades.xpath("font[3]/text()")[0]
  49.     student['unpassed_count'] = grades.xpath("font[4]/text()")[0]
  50.     student['course'] = course
  51.  
  52.     return student
  53.    
  54.  
  55. if __name__ == "__main__":
  56.     print(
  57.         prase(
  58.             open_html("./1754200.html")
  59.         )
  60.     )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement