Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from lxml import etree
- def open_html(filepath):
- with open(filepath, "r", encoding="utf-8") as f:
- html = f.read()
- return html
- def prase(html_str):
- html = etree.HTML(html_str)
- table = html.xpath("/html/body/form/table[@id='T1']/tr")[2:]
- course = []
- semester = ""
- for tr in table:
- if (len(tr.xpath("td"))) == 1:
- if len(tr.xpath("td/div")) == 1:
- semester = tr.xpath("td/div/text()")[0]
- else:
- if len(tr.xpath("td/div/font")) > 0:
- a_course = {}
- a_course['id'] = tr.xpath("td/div/font/text()")[0]
- a_course['name'] = tr.xpath("td[2]/div/font/text()")[0]
- a_course['grade'] = tr.xpath("td[3]/div/font/text()")[0]
- a_course['credit'] = tr.xpath("td[4]/div/font/text()")[0]
- a_course['grade_point'] = tr.xpath("td[5]/div/font/text()")[0]
- a_course['is_passed'] = tr.xpath("td[6]/div/font/text()")[0]
- a_course['is_normal'] = tr.xpath("td[7]/div/font/text()")[0]
- if len(tr.xpath("td[8]/div/font/text()")) == 1:
- a_course['is_elective'] = tr.xpath("td[8]/div/font/text()")[0]
- else:
- a_course['is_elective'] = tr.xpath("td[8]/div/font/font/text()")[0]
- a_course['date'] = tr.xpath("td[9]/div/font/text()")[0]
- a_course['semester'] = semester
- course.append(a_course)
- student = {}
- informations = html.xpath("/html/body/form/table[@id='T1']/tr[1]/td/font/text()")
- student['id'] = informations[1].strip()
- student['name'] = informations[2].strip()
- student['department']= informations[3].strip()
- student['major']= informations[4].strip()
- grades = html.xpath("/html/body/form/table[@id='T1']/tr[2]/td")[0]
- student['all_grade'] = grades.xpath("font[1]/text()")[0]
- student['all_credit'] = grades.xpath("font[2]/text()")[0][1:]
- student['unpassed_credit'] = grades.xpath("font[3]/text()")[0]
- student['unpassed_count'] = grades.xpath("font[4]/text()")[0]
- student['course'] = course
- return student
- if __name__ == "__main__":
- print(
- prase(
- open_html("./1754200.html")
- )
- )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement