Advertisement
collinsanele

current_code

Sep 2nd, 2019
172
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.24 KB | None | 0 0
  1. from lxml import html
  2. from bs4 import BeautifulSoup
  3. import json
  4.  
  5.  
  6. """ NB: for only 5397124106352075058.htm """
  7.  
  8.  
  9.  
  10. def html2json(path):
  11. results = []
  12. container = []
  13. with open(path) as f:
  14. content = f.read()
  15.  
  16. soup = BeautifulSoup(content, "lxml")
  17. paras = soup.findAll("p", {"align":"JUSTIFY"})
  18. indexes = []
  19. for i,item in enumerate(paras):
  20. if "Sol" in item.text:
  21. indexes.append(i)
  22.  
  23. for i,item in enumerate(indexes):
  24. try:
  25. container.append(paras[indexes[i]:indexes[i+1]])
  26. except IndexError:
  27. pass
  28.  
  29.  
  30. qnums = []
  31. c_options = []
  32. str_container = []
  33.  
  34. for i, item in enumerate(container):
  35. if not item[0].findAll("b"):
  36. del container[i]
  37.  
  38.  
  39. for item in container:
  40. try:
  41. target = item[0].findAll("b")
  42. except Exception as e:
  43. print(e)
  44. pass
  45.  
  46. try:
  47. qn = target[0].text.split()[0].strip()
  48. qnums.append(qn)
  49. except Exception:
  50. pass
  51.  
  52. try:
  53. if len(target) == 2:
  54. c_option = target[1].text
  55. c_options.append(c_option.strip())
  56. elif len(target) == 1:
  57. c_option = target[0].text.split()[1]
  58. c_options.append(c_option.strip())
  59.  
  60. except IndexError:
  61. c_option = " "
  62. c_options.append(c_option)
  63. pass
  64.  
  65.  
  66. #container
  67. for item in container:
  68. try:
  69. trash = item[0].find("b").decompose()
  70. except:
  71. pass
  72.  
  73.  
  74. str_container = [str(item).replace("[", "").replace('<p align="JUSTIFY">', "").replace("</p>", "").replace(",", "").replace("]", "") for item in container]
  75.  
  76. for x in range(len(qnums)):
  77. obj = {"Question Number":"", "Correct Option":"", "Solution":""}
  78. obj["Question Number"] = qnums[x]
  79. obj["Correct Option"] = c_options[x]
  80. obj["Solution"] = str_container[x]
  81.  
  82. results.append(obj)
  83.  
  84. final = {"results":results}
  85.  
  86.  
  87. #Saves result as json in the current working directory
  88. with open("539.json", "w") as f:
  89. json.dump(final, f)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement