Advertisement
Guest User

Untitled

a guest
Aug 28th, 2016
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 121.43 KB | None | 0 0
  1. 28/1:
  2. from BeautifulSoup import BeautifulSoup, SoupStrainer
  3. f = open('infs1200.html','r').read()
  4.  
  5. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  6. if link.has_attr('href'):
  7. print link['href']
  8. 28/2:
  9. from BeautifulSoup import BeautifulSoup, SoupStrainer
  10. f = open('infs1200.html','r').read()
  11.  
  12. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  13. if link.has_attr('href'):
  14. print(link['href'])
  15. 28/3:
  16. from BeautifulSoup import BeautifulSoup, SoupStrainer
  17. f = open('infs1200.html','r').read()
  18.  
  19. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  20. if link.has_attr('href'):
  21. print(link['href'])
  22. 28/4:
  23. from BeautifulSoup4 import BeautifulSoup, SoupStrainer
  24. f = open('infs1200.html','r').read()
  25.  
  26. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  27. if link.has_attr('href'):
  28. print(link['href'])
  29. 28/5:
  30. from bs4 import BeautifulSoup, SoupStrainer
  31. f = open('infs1200.html','r').read()
  32.  
  33. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  34. if link.has_attr('href'):
  35. print(link['href'])
  36. 28/6:
  37. from bs4 import BeautifulSoup, SoupStrainer
  38. f = open('infs1200.html','r').read()
  39.  
  40. for link in BeautifulSoup(f, parse_only=SoupStrainer('a')):
  41. if link.has_attr('href'):
  42. print(link['href'])
  43. 28/7:
  44. from bs4 import BeautifulSoup, SoupStrainer
  45. f = open('infs1200.html','r').read()
  46.  
  47. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  48. if link.has_attr('href'):
  49. print(link['href'])
  50. 28/8:
  51. from bs4 import BeautifulSoup, SoupStrainer
  52. f = open('infs1200.html','r').read()
  53.  
  54. for link in BeautifulSoup(f, parse_only=SoupStrainer('a')):
  55. if link.has_attr('href'):
  56. print(link['href'])
  57. 28/9:
  58. from bs4 import BeautifulSoup, SoupStrainer
  59. f = open('infs1200.html','r').read()
  60.  
  61. for link in BeautifulSoup(f, "html.parser"):
  62. if link.has_attr('href'):
  63. print(link['href'])
  64. 28/10:
  65. from bs4 import BeautifulSoup, SoupStrainer
  66. f = open('infs1200.html','r').read()
  67.  
  68. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  69. if link.has_attr('href'):
  70. print(link['href'])
  71. 29/1:
  72. from bs4 import BeautifulSoup, SoupStrainer
  73. f = open('infs1200.html','r').read()
  74.  
  75. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  76. if link.has_attr('href'):
  77. print(link['href'])
  78. 29/2:
  79. from bs4 import BeautifulSoup, SoupStrainer
  80. f = open('infs1200.html','r').read()
  81. links = []
  82.  
  83. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  84. if link.has_attr('href'):
  85. links.append(link['href'])
  86. 29/3:
  87. from bs4 import BeautifulSoup, SoupStrainer
  88. f = open('infs1200.html','r').read()
  89. links = []
  90.  
  91. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  92. if link.has_attr('href'):
  93. links.append(link['href'])
  94. 29/4:
  95. from bs4 import BeautifulSoup, SoupStrainer
  96. f = open('infs1200.html','r').read()
  97. links = []
  98.  
  99. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  100. if link.has_attr('href'):
  101. links.append(link['href'])
  102. 29/5:
  103. from bs4 import BeautifulSoup, SoupStrainer
  104. f = open('infs1200.html','r').read()
  105. links = []
  106.  
  107. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  108. if link.has_attr('href'):
  109. links.append(link['href'])
  110. links = [link for link in links if link[:30] == "https://uq.rl.talis.com/lists/"]
  111. 29/6: links
  112. 29/7:
  113. from bs4 import BeautifulSoup, SoupStrainer
  114. f = open('infs1200.html','r').read()
  115. links = []
  116.  
  117. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  118. if link.has_attr('href'):
  119. links.append(link['href'])
  120. link = [link for link in links if link[:30] == "https://uq.rl.talis.com/lists/"][0]
  121. 29/8: link
  122. 29/9:
  123. from bs4 import BeautifulSoup, SoupStrainer
  124. f = open('infs1200.html','r').read()
  125. page_links = []
  126.  
  127. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  128. if link.has_attr('href'):
  129. page_links.append(link['href'])
  130. link = [link for link in page_links if link[:30] == "https://uq.rl.talis.com/lists/"][0]
  131. 29/10: link
  132. 29/11:
  133. from bs4 import BeautifulSoup, SoupStrainer
  134. import os
  135.  
  136. for i in os.listdir(os.getcwd()):
  137. f = open(i,'r').read()
  138. page_links = []
  139.  
  140. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  141. if link.has_attr('href'):
  142. page_links.append(link['href'])
  143. link = [link for link in page_links if link[:30] == "https://uq.rl.talis.com/lists/"][0]
  144. print(link)
  145. 29/12:
  146. from bs4 import BeautifulSoup, SoupStrainer
  147. import os
  148.  
  149. for i in os.listdir(os.getcwd()):
  150. print(i)
  151. f = open(i,'r').read()
  152. page_links = []
  153.  
  154. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  155. if link.has_attr('href'):
  156. page_links.append(link['href'])
  157. link = [link for link in page_links if link[:30] == "https://uq.rl.talis.com/lists/"][0]
  158. print(link)
  159. 29/13:
  160. from bs4 import BeautifulSoup, SoupStrainer
  161. import os
  162.  
  163. for i in os.listdir(os.getcwd()):
  164. print(i)
  165. f = open(i,'r').read()
  166. page_links = []
  167.  
  168. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  169. if link.has_attr('href'):
  170. page_links.append(link['href'])
  171. print(page_links)
  172. link = [link for link in page_links if link[:30] == "https://uq.rl.talis.com/lists/"][0]
  173. print(link)
  174. 29/14:
  175. from bs4 import BeautifulSoup, SoupStrainer
  176. import os
  177.  
  178. for i in os.listdir(os.getcwd()):
  179. print(i)
  180. f = open(i,'r').read()
  181. page_links = []
  182.  
  183. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  184. if link.has_attr('href'):
  185. page_links.append(link['href'])
  186. print(page_links)
  187. link = [link for link in page_links if link[:30] == "https://uq.rl.talis.com/lists/"]
  188. if len(link) > 0:
  189. print(link[0])
  190. 29/15:
  191. from bs4 import BeautifulSoup, SoupStrainer
  192. import os
  193.  
  194. for i in os.listdir(os.getcwd()):
  195. print(i)
  196. f = open(i,'r').read()
  197. page_links = []
  198.  
  199. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  200. if link.has_attr('href'):
  201. page_links.append(link['href'])
  202. #print(page_links)
  203. link = [link for link in page_links if link[:30] == "https://uq.rl.talis.com/lists/"]
  204. if len(link) > 0:
  205. print(link[0])
  206. 29/16:
  207. from bs4 import BeautifulSoup, SoupStrainer
  208. import os
  209.  
  210. links = []
  211. for i in os.listdir(os.getcwd()):
  212. f = open(i,'r').read()
  213. page_links = []
  214.  
  215. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  216. if link.has_attr('href'):
  217. page_links.append(link['href'])
  218. #print(page_links)
  219. link = [link for link in page_links if link[:30] == "https://uq.rl.talis.com/lists/"]
  220. if len(link) > 0:
  221. links.append(link[0])
  222. 29/17:
  223. with open('../new', mode='wt', encoding='utf-8') as myfile:
  224. myfile.write('\n'.join(lines))
  225. 29/18:
  226. with open('../new', mode='wt', encoding='utf-8') as myfile:
  227. myfile.write('\n'.join(links))
  228. 29/19: os.listdir()
  229. 29/20: os.di()
  230. 29/21: os.dir()
  231. 29/22: os.pwd()
  232. 29/23: os.path
  233. 29/24: os.getcwd()
  234. 29/25: os.chdir('..')
  235. 29/26: os.getcwd()
  236. 29/27: f = open('rip2/0A3C88A9-61D3-82EC-8861-3A4FBD03BABC.html','r').read()
  237. 29/28: f
  238. 29/29:
  239. from bs4 import BeautifulSoup, SoupStrainer
  240. import os
  241.  
  242. links = []
  243. for i in os.listdir('rip2'):
  244. f = open(i,'r').read()
  245. page_links = []
  246.  
  247. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  248. if link.has_attr('href'):
  249. page_links.append(link['href'])
  250. #print(page_links)
  251. link = [link for link in page_links if link[:35] == "https://uq.rl.talis.com/courses/"]
  252. if len(link) > 0:
  253. links.append(link[0])
  254. 29/30:
  255. from bs4 import BeautifulSoup, SoupStrainer
  256. import os
  257.  
  258. links = []
  259. for i in os.listdir('rip2'):
  260. f = open('rip2/' + i,'r').read()
  261. page_links = []
  262.  
  263. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  264. if link.has_attr('href'):
  265. page_links.append(link['href'])
  266. #print(page_links)
  267. link = [link for link in page_links if link[:35] == "https://uq.rl.talis.com/courses/"]
  268. if len(link) > 0:
  269. links.append(link[0])
  270. 29/31:
  271. from bs4 import BeautifulSoup, SoupStrainer
  272. import os
  273.  
  274. links = []
  275. for i in os.listdir('rip2'):
  276. f = open('rip2/' + i,'r').read()
  277. page_links = []
  278.  
  279. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  280. if link.has_attr('href'):
  281. page_links.append(link['href'])
  282. #print(page_links)
  283. link = [link for link in page_links if link[:35] == "https://uq.rl.talis.com/courses/"]
  284. if len(link) > 0:
  285. print(link[0])
  286. 29/32:
  287. from bs4 import BeautifulSoup, SoupStrainer
  288. import os
  289.  
  290. links = []
  291. for i in os.listdir('rip2'):
  292. f = open('rip2/' + i,'r').read()
  293. page_links = []
  294.  
  295. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  296. if link.has_attr('href'):
  297. page_links.append(link['href'])
  298. #print(page_links)
  299. link = [link for link in page_links if link[:35] == "https://uq.rl.talis.com/courses/"]
  300. if len(link) > 0:
  301. print(link[0])
  302. else:
  303. print(link)
  304. 29/33:
  305. from bs4 import BeautifulSoup, SoupStrainer
  306. import os
  307.  
  308. links = []
  309. for i in os.listdir('rip2'):
  310. f = open('rip2/' + i,'r').read()
  311. page_links = []
  312.  
  313. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  314. if link.has_attr('href'):
  315. page_links.append(link['href'])
  316. #print(page_links)
  317. link = [link for link in page_links if link[:33] == "https://uq.rl.talis.com/courses/"]
  318. if len(link) > 0:
  319. print(link[0])
  320. else:
  321. print(link)
  322. 29/34:
  323. from bs4 import BeautifulSoup, SoupStrainer
  324. import os
  325.  
  326. links = []
  327. for i in os.listdir('rip2'):
  328. f = open('rip2/' + i,'r').read()
  329. page_links = []
  330.  
  331. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  332. if link.has_attr('href'):
  333. page_links.append(link['href'])
  334. print(page_links)
  335. link = [link for link in page_links if link[:33] == "https://uq.rl.talis.com/courses/"]
  336. if len(link) > 0:
  337. print(link[0])
  338. else:
  339. print(link)
  340. 29/35:
  341. from bs4 import BeautifulSoup, SoupStrainer
  342. import os
  343.  
  344. links = []
  345. for i in os.listdir('rip2'):
  346. f = open('rip2/' + i,'r').read()
  347. page_links = []
  348.  
  349. title = BeautifulSoup(f).title.string
  350. print(title)
  351.  
  352. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  353. if link.has_attr('href'):
  354. page_links.append(link['href'])
  355. print(page_links)
  356. link = [link for link in page_links if link[:33] == "https://uq.rl.talis.com/courses/"]
  357. if len(link) > 0:
  358. print(link[0])
  359. else:
  360. print(link)
  361. 29/36:
  362. from bs4 import BeautifulSoup, SoupStrainer
  363. import os
  364.  
  365. links = []
  366. for i in os.listdir('rip2'):
  367. f = open('rip2/' + i,'r').read()
  368. page_links = []
  369.  
  370. title = BeautifulSoup(f).title.string.split(' ')[0]
  371. print(title)
  372.  
  373. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  374. if link.has_attr('href'):
  375. page_links.append(link['href'])
  376. print(page_links)
  377. link = [link for link in page_links if link[:33] == "https://uq.rl.talis.com/courses/"]
  378. if len(link) > 0:
  379. print(link[0])
  380. else:
  381. print(link)
  382. 29/37:
  383. from bs4 import BeautifulSoup, SoupStrainer
  384. import os
  385.  
  386. links = []
  387. for i in os.listdir('rip2'):
  388. f = open('rip2/' + i,'r').read()
  389. page_links = []
  390.  
  391. title = BeautifulSoup(f).title.string.split(' ')[0]
  392. print(title)
  393.  
  394. paragraphs = BeautifulSoup(f).findAll("p", { "class" : "itemBibData" })
  395. 29/38:
  396. from bs4 import BeautifulSoup, SoupStrainer
  397. import os
  398.  
  399. links = []
  400. for i in os.listdir('rip2'):
  401. f = open('rip2/' + i,'r').read()
  402. page_links = []
  403.  
  404. title = BeautifulSoup(f).title.string.split(' ')[0]
  405. print(title)
  406.  
  407. paragraphs = BeautifulSoup(f).findAll("p", { "class" : "itemBibData" })
  408. print(paragraphs[0])
  409. 29/39:
  410. from bs4 import BeautifulSoup, SoupStrainer
  411. import os
  412.  
  413. subjects =
  414. for i in os.listdir('rip2'):
  415. f = open('rip2/' + i,'r').read()
  416. page_links = []
  417.  
  418. title = BeautifulSoup(f).title.string.split(' ')[0]
  419. print(title)
  420.  
  421. page_links = []
  422.  
  423. for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
  424. if link.has_attr('href'):
  425. page_links.append(link['href'])
  426. 29/40:
  427. from bs4 import BeautifulSoup, SoupStrainer
  428. import os
  429.  
  430. subjects =
  431. for i in os.listdir('rip2'):
  432. f = open('rip2/' + i,'r').read()
  433. page_links = []
  434.  
  435. title = BeautifulSoup(f).title.string.split(' ')[0]
  436. print(title)
  437.  
  438. page_links = []
  439.  
  440. for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
  441. if link.has_attr('href'):
  442. page_links.append(link['href'])
  443. 29/41:
  444. from bs4 import BeautifulSoup, SoupStrainer
  445. import os
  446.  
  447. subjects =
  448. for i in os.listdir('rip2'):
  449. f = open('rip2/' + i,'r').read()
  450. page_links = []
  451.  
  452. title = BeautifulSoup(f).title.string.split(' ')[0]
  453. print(title)
  454.  
  455. page_links = []
  456.  
  457. for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
  458. if link.has_attr('href'):
  459. page_links.append(link['href'])
  460. 29/42:
  461. from bs4 import BeautifulSoup, SoupStrainer
  462. import os
  463.  
  464. subjects = {}
  465. for i in os.listdir('rip2'):
  466. f = open('rip2/' + i,'r').read()
  467. page_links = []
  468.  
  469. title = BeautifulSoup(f).title.string.split(' ')[0]
  470. print(title)
  471.  
  472. page_links = []
  473.  
  474. for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
  475. if link.has_attr('href'):
  476. page_links.append(link['href'])
  477. 29/43:
  478. from bs4 import BeautifulSoup, SoupStrainer
  479. import os
  480.  
  481. subjects = {}
  482. for i in os.listdir('rip2'):
  483. f = open('rip2/' + i,'r').read()
  484. page_links = []
  485.  
  486. title = BeautifulSoup(f).title.string.split(' ')[0]
  487. print(title)
  488.  
  489. page_links = []
  490.  
  491. for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
  492. if link.has_attr('href'):
  493. page_links.append(link['href'])
  494. print(page_links)
  495. 29/44:
  496. from bs4 import BeautifulSoup, SoupStrainer
  497. import os
  498.  
  499. subjects = {}
  500. for i in os.listdir('rip2'):
  501. f = open('rip2/' + i,'r').read()
  502. page_links = []
  503.  
  504. title = BeautifulSoup(f).title.string.split(' ')[0]
  505. print(title)
  506.  
  507. page_links = []
  508.  
  509. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  510. if link.has_attr('href'):
  511. page_links.append(link['href'])
  512. print(page_links)
  513. 29/45:
  514. from bs4 import BeautifulSoup, SoupStrainer
  515. import os
  516.  
  517. subjects = {}
  518. for i in os.listdir('rip2'):
  519. f = open('rip2/' + i,'r').read()
  520. page_links = []
  521.  
  522. title = BeautifulSoup(f).title.string.split(' ')[0]
  523. print(title)
  524.  
  525. page_links = []
  526.  
  527. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  528. if link.has_attr('href'):
  529. page_links.append(link['href'])
  530.  
  531. link = [link for link in page_links if link[:31] == "https://uq.rl.talis.com/items/"]
  532.  
  533. print(link)
  534. 29/46:
  535. from bs4 import BeautifulSoup, SoupStrainer
  536. import os
  537.  
  538. subjects = {}
  539. for i in os.listdir('rip2'):
  540. f = open('rip2/' + i,'r').read()
  541. page_links = []
  542.  
  543. title = BeautifulSoup(f).title.string.split(' ')[0]
  544. print(title)
  545.  
  546. page_links = []
  547.  
  548. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  549. if link.has_attr('href'):
  550. page_links.append(link['href'])
  551.  
  552. link = [link for link in page_links if link[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/"]
  553.  
  554. print(link)
  555. 29/47:
  556. from bs4 import BeautifulSoup, SoupStrainer
  557. import os
  558.  
  559. subjects = {}
  560. for i in os.listdir('rip2'):
  561. f = open('rip2/' + i,'r').read()
  562. page_links = []
  563.  
  564. title = BeautifulSoup(f).title.string.split(' ')[0]
  565. print(title)
  566.  
  567. page_links = []
  568.  
  569. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  570. if link.has_attr('href'):
  571. page_links.append(link['href'])
  572.  
  573. link = [link for link in page_links if link[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/"]
  574.  
  575. print(link)
  576. 29/48:
  577. from bs4 import BeautifulSoup, SoupStrainer
  578. import os
  579.  
  580. links = []
  581. for i in os.listdir('rip2'):
  582. f = open('rip2/' + i,'r').read()
  583. page_links = []
  584.  
  585. title = BeautifulSoup(f).title.string.split(' ')[0]
  586. print(title)
  587.  
  588. page_links = []
  589.  
  590. for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
  591. if link.has_attr('href'):
  592. page_links.append(link['href'])
  593.  
  594. page_links = [link for link in page_links if link[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/"]
  595.  
  596. for link in page_links:
  597. links.append(link)
  598. print(i)
  599. 29/49: len(links)
  600. 29/50:
  601. from bs4 import BeautifulSoup, SoupStrainer
  602. import os
  603.  
  604. links = []
  605. for i in os.listdir('rip2'):
  606. f = open('rip2/' + i,'r').read()
  607. page_links = []
  608.  
  609. title = BeautifulSoup(f).title.string.split(' ')[0]
  610. print(title)
  611.  
  612. paragraphs = findAll("div", { "class" : "item" })
  613. print(paragraphs)
  614. 29/51:
  615. from bs4 import BeautifulSoup, SoupStrainer
  616. import os
  617.  
  618. links = []
  619. for i in os.listdir('rip2'):
  620. f = open('rip2/' + i,'r').read()
  621. page_links = []
  622.  
  623. title = BeautifulSoup(f).title.string.split(' ')[0]
  624. print(title)
  625.  
  626. paragraphs = BeautifulSoup(f).findAll("div", { "class" : "item" })
  627. print(paragraphs)
  628. 29/52:
  629. from bs4 import BeautifulSoup, SoupStrainer
  630. import os
  631.  
  632. links = []
  633. for i in os.listdir('rip2'):
  634. f = open('rip2/' + i,'r').read()
  635. page_links = []
  636.  
  637. title = BeautifulSoup(f).title.string.split(' ')[0]
  638. print(title)
  639.  
  640. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  641. print(paragraphs)
  642. 29/53:
  643. from bs4 import BeautifulSoup, SoupStrainer
  644. import os
  645.  
  646. links = []
  647. for i in os.listdir('rip2'):
  648. f = open('rip2/' + i,'r').read()
  649. page_links = []
  650.  
  651. title = BeautifulSoup(f).title.string.split(' ')[0]
  652. print(title)
  653.  
  654. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  655. print('~~~~~~~~~~~~~~~~~~')
  656. print(paragraphs)
  657. print('~~~~~~~~~~~~~~~~~~')
  658. 29/54:
  659. from bs4 import BeautifulSoup, SoupStrainer
  660. import os
  661.  
  662. links = []
  663. for i in os.listdir('rip2'):
  664. f = open('rip2/' + i,'r').read()
  665. page_links = []
  666.  
  667. title = BeautifulSoup(f).title.string.split(' ')[0]
  668. print(title)
  669.  
  670. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  671. print('~~~~~~~~~~~~~~~~~~')
  672. print(paragraphs[0])
  673. print('~~~~~~~~~~~~~~~~~~')
  674. 29/55:
  675. from bs4 import BeautifulSoup, SoupStrainer
  676. import os
  677.  
  678. links = []
  679. for i in os.listdir('rip2'):
  680. f = open('rip2/' + i,'r').read()
  681. page_links = []
  682.  
  683. title = BeautifulSoup(f).title.string.split(' ')[0]
  684. print(title)
  685.  
  686. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  687. 29/56:
  688. from bs4 import BeautifulSoup, SoupStrainer
  689. import os
  690.  
  691. links = []
  692. for i in os.listdir('rip2'):
  693. f = open('rip2/' + i,'r').read()
  694. page_links = []
  695.  
  696. title = BeautifulSoup(f).title.string.split(' ')[0]
  697. print(title)
  698.  
  699. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  700.  
  701. for paragraph in paragraphs:
  702. if len([if type == "Book" for type in BeautifulSoup(paragraph).findAll("span", {"class" : "resourceType label"})]) > 0:
  703. print("yes")
  704. 29/57:
  705. from bs4 import BeautifulSoup, SoupStrainer
  706. import os
  707.  
  708. links = []
  709. for i in os.listdir('rip2'):
  710. f = open('rip2/' + i,'r').read()
  711. page_links = []
  712.  
  713. title = BeautifulSoup(f).title.string.split(' ')[0]
  714. print(title)
  715.  
  716. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  717.  
  718. for paragraph in paragraphs:
  719. if len([type for type in BeautifulSoup(paragraph).findAll("span", {"class" : "resourceType label"}) if type == "Book"]) > 0:
  720. print("yes")
  721. 29/58:
  722. from bs4 import BeautifulSoup, SoupStrainer
  723. import os
  724.  
  725. links = []
  726. for i in os.listdir('rip2'):
  727. f = open('rip2/' + i,'r').read()
  728. page_links = []
  729.  
  730. title = BeautifulSoup(f).title.string.split(' ')[0]
  731. print(title)
  732.  
  733. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  734.  
  735. for paragraph in paragraphs:
  736. if len([type for type in BeautifulSoup(paragraph).findAll("span", {"class" : "resourceType label"}) if type.content == "Book"]) > 0:
  737. print("yes")
  738. 29/59:
  739. from bs4 import BeautifulSoup, SoupStrainer
  740. import os
  741.  
  742. links = []
  743. for i in os.listdir('rip2'):
  744. f = open('rip2/' + i,'r').read()
  745. page_links = []
  746.  
  747. title = BeautifulSoup(f).title.string.split(' ')[0]
  748. print(title)
  749.  
  750. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  751.  
  752. for paragraph in paragraphs:
  753. if len([type for type in BeautifulSoup(strparagraph)).findAll("span", {"class" : "resourceType label"}) if type.content == "Book"]) > 0:
  754. print("yes")
  755. 29/60:
  756. from bs4 import BeautifulSoup, SoupStrainer
  757. import os
  758.  
  759. links = []
  760. for i in os.listdir('rip2'):
  761. f = open('rip2/' + i,'r').read()
  762. page_links = []
  763.  
  764. title = BeautifulSoup(f).title.string.split(' ')[0]
  765. print(title)
  766.  
  767. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  768.  
  769. for paragraph in paragraphs:
  770. if len([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"}) if type.content == "Book"]) > 0:
  771. print("yes")
  772. 29/61:
  773. from bs4 import BeautifulSoup, SoupStrainer
  774. import os
  775.  
  776. links = []
  777. for i in os.listdir('rip2'):
  778. f = open('rip2/' + i,'r').read()
  779. page_links = []
  780.  
  781. title = BeautifulSoup(f).title.string.split(' ')[0]
  782. print(title)
  783.  
  784. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  785.  
  786. for paragraph in paragraphs:
  787. print([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"}) if type.content == "Book"]))
  788. 29/62:
  789. from bs4 import BeautifulSoup, SoupStrainer
  790. import os
  791.  
  792. links = []
  793. for i in os.listdir('rip2'):
  794. f = open('rip2/' + i,'r').read()
  795. page_links = []
  796.  
  797. title = BeautifulSoup(f).title.string.split(' ')[0]
  798. print(title)
  799.  
  800. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  801.  
  802. for paragraph in paragraphs:
  803. print([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"}) if type.content == "Book"])
  804. 29/63:
  805. from bs4 import BeautifulSoup, SoupStrainer
  806. import os
  807.  
  808. links = []
  809. for i in os.listdir('rip2'):
  810. f = open('rip2/' + i,'r').read()
  811. page_links = []
  812.  
  813. title = BeautifulSoup(f).title.string.split(' ')[0]
  814. print(title)
  815.  
  816. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  817.  
  818. for paragraph in paragraphs:
  819. print([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})])
  820. 29/64:
  821. from bs4 import BeautifulSoup, SoupStrainer
  822. import os
  823.  
  824. links = []
  825. for i in os.listdir('rip2'):
  826. f = open('rip2/' + i,'r').read()
  827. page_links = []
  828.  
  829. title = BeautifulSoup(f).title.string.split(' ')[0]
  830. print(title)
  831.  
  832. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  833.  
  834. for paragraph in paragraphs:
  835. print([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"}).contents])
  836. 29/65:
  837. from bs4 import BeautifulSoup, SoupStrainer
  838. import os
  839.  
  840. links = []
  841. for i in os.listdir('rip2'):
  842. f = open('rip2/' + i,'r').read()
  843. page_links = []
  844.  
  845. title = BeautifulSoup(f).title.string.split(' ')[0]
  846. print(title)
  847.  
  848. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  849.  
  850. for paragraph in paragraphs:
  851. print([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"}).contents])
  852. 29/66:
  853. from bs4 import BeautifulSoup, SoupStrainer
  854. import os
  855.  
  856. links = []
  857. for i in os.listdir('rip2'):
  858. f = open('rip2/' + i,'r').read()
  859. page_links = []
  860.  
  861. title = BeautifulSoup(f).title.string.split(' ')[0]
  862. print(title)
  863.  
  864. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  865.  
  866. for paragraph in paragraphs:
  867. print([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents])
  868. 29/67:
  869. from bs4 import BeautifulSoup, SoupStrainer
  870. import os
  871.  
  872. links = []
  873. for i in os.listdir('rip2'):
  874. f = open('rip2/' + i,'r').read()
  875. page_links = []
  876.  
  877. title = BeautifulSoup(f).title.string.split(' ')[0]
  878. print(title)
  879.  
  880. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  881.  
  882. for paragraph in paragraphs:
  883. print([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents[0]])
  884. 29/68:
  885. from bs4 import BeautifulSoup, SoupStrainer
  886. import os
  887.  
  888. links = []
  889. for i in os.listdir('rip2'):
  890. f = open('rip2/' + i,'r').read()
  891. page_links = []
  892.  
  893. title = BeautifulSoup(f).title.string.split(' ')[0]
  894. print(title)
  895.  
  896. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  897.  
  898. for paragraph in paragraphs:
  899. print([paragraph for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents if type == "Book"])
  900. 29/69:
  901. from bs4 import BeautifulSoup, SoupStrainer
  902. import os
  903.  
  904. links = []
  905. for i in os.listdir('rip2'):
  906. f = open('rip2/' + i,'r').read()
  907. page_links = []
  908.  
  909. title = BeautifulSoup(f).title.string.split(' ')[0]
  910. print(title)
  911.  
  912. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  913.  
  914. for paragraph in paragraphs:
  915. if len([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents if type == "Book"]) > 0:
  916. print("yes")
  917. else:
  918. print("no")
  919. 29/70:
  920. from bs4 import BeautifulSoup, SoupStrainer
  921. import os
  922.  
  923. subjects = {}
  924. for i in os.listdir('rip2'):
  925. f = open('rip2/' + i,'r').read()
  926. page_links = []
  927.  
  928. title = BeautifulSoup(f).title.string.split(' ')[0]
  929. print(title)
  930.  
  931. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  932.  
  933. books = []
  934. for paragraph in paragraphs:
  935. if len([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents if type == "Book"]) > 0:
  936. books.append(paragraph)
  937.  
  938. print(books)
  939. subjects[title] = books
  940. 29/71:
  941. from bs4 import BeautifulSoup, SoupStrainer
  942. import os
  943.  
  944. subjects = {}
  945. for i in os.listdir('rip2'):
  946. f = open('rip2/' + i,'r').read()
  947. page_links = []
  948.  
  949. title = BeautifulSoup(f).title.string.split(' ')[0]
  950. print(title)
  951.  
  952. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  953.  
  954. books = []
  955. for paragraph in paragraphs:
  956. if len([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents if type == "Book"]) > 0:
  957. books.append(paragraph)
  958.  
  959. print(i)
  960. subjects[title] = books
  961. 29/72:
  962. from bs4 import BeautifulSoup, SoupStrainer
  963. import os
  964.  
  965. subjects = {}
  966. for i in os.listdir('rip2'):
  967. f = open('rip2/' + i,'r').read()
  968. page_links = []
  969.  
  970. title = BeautifulSoup(f).title.string.split(' ')[0]
  971. print(title)
  972.  
  973. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  974.  
  975. books = []
  976. for paragraph in paragraphs:
  977. if len([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"}) if len(type) > 0 and type[0].contents == "Book"]) > 0:
  978. books.append(paragraph)
  979.  
  980. print(i)
  981. subjects[title] = books
  982. 29/73:
  983. from bs4 import BeautifulSoup, SoupStrainer
  984. import os
  985.  
  986. subjects = {}
  987. for i in os.listdir('rip2'):
  988. f = open('rip2/' + i,'r').read()
  989. page_links = []
  990.  
  991. title = BeautifulSoup(f).title.string.split(' ')[0]
  992. print(title)
  993.  
  994. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  995.  
  996. books = []
  997. for paragraph in paragraphs:
  998. try:
  999. if len([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents if type == "Book"]) > 0:
  1000. books.append(paragraph)
  1001. except:
  1002. pass
  1003.  
  1004. print(books)
  1005. subjects[title] = books
  1006. 29/74:
  1007. from bs4 import BeautifulSoup, SoupStrainer
  1008. import os
  1009.  
  1010. subjects = {}
  1011. for i in os.listdir('rip2'):
  1012. f = open('rip2/' + i,'r').read()
  1013. page_links = []
  1014.  
  1015. title = BeautifulSoup(f).title.string.split(' ')[0]
  1016. print(title)
  1017.  
  1018. paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
  1019.  
  1020. books = []
  1021. for paragraph in paragraphs:
  1022. try:
  1023. if len([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents if type == "Book"]) > 0:
  1024. books.append(paragraph)
  1025. except:
  1026. pass
  1027.  
  1028. print(i)
  1029. subjects[title] = books
  1030. 29/75: subjects
  1031. 29/76: subjects["infs1200"]
  1032. 29/77: len(subjects)
  1033. 29/78:
  1034. for key, value in dict.iteritems():
  1035. temp = [key,value]
  1036. print(temp)
  1037. 29/79:
  1038. for key, value in subjects.iteritems():
  1039. temp = [key,value]
  1040. print(temp)
  1041. 29/80:
  1042. for key, value in subjects.items():
  1043. temp = [key,value]
  1044. print(temp)
  1045. 29/81:
  1046. for key, value in subjects.items():
  1047. temp = [key,value]
  1048. print(key)
  1049. 29/82: subjects["INFS1200"]
  1050. 29/83: len(subjects)
  1051. 29/84: import json
  1052. 29/85:
  1053. with open('json.json','w') as js:
  1054. js.write(json.dumps(subjects))
  1055. 29/86:
  1056. with open('json.json','w') as js:
  1057. json.dumps(subjects,js)
  1058. 29/87: type(subjects)
  1059. 29/88:
  1060. with open('json.json','w') as js:
  1061. json.dump(subjects,js)
  1062. 29/89:
  1063. with open('json.json','w') as js:
  1064. json.dump(dict(subjects),js)
  1065. 29/90: len(subjects["LAWS1100"])
  1066. 29/91:
  1067. def save_obj(obj, name ):
  1068. with open('obj/'+ name + '.pkl', 'wb') as f:
  1069. pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
  1070.  
  1071. def load_obj(name ):
  1072. with open('obj/' + name + '.pkl', 'rb') as f:
  1073. return pickle.load(f)
  1074. 29/92: save_obj(subjects, "subjects_dump")
  1075. 29/93: save_obj(subjects, "subjects_dump")
  1076. 30/1: subjects
  1077. 29/94: subjects["INFS1200"]
  1078. 29/95: subjects[0]
  1079. 29/96:
  1080. with open('json.json','w') as js:
  1081. json.dump(dict(subjects),js)
  1082. 29/97: type(subjects["INFS1200"])
  1083. 29/98: type(subjects["INFS1200"][0])
  1084. 29/99: str(subjects["INFS1200"][0])
  1085. 29/100:
  1086. for key, value in subjects.items():
  1087. temp = [key,value]
  1088. print(key)
  1089. 29/101:
  1090. for key, value in subjects.items():
  1091. print(value)
  1092. 29/102:
  1093. subj_url = {}
  1094. for key, value in subjects.items():
  1095. urls = []
  1096. print(value)
  1097. 29/103:
  1098. subj_url = {}
  1099. for key, value in subjects.items():
  1100. urls = []
  1101.  
  1102. for paragraph in value:
  1103. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1104. if link.has_attr('href'):
  1105. print(link['href'])
  1106. 29/104: new_subjects = {}
  1107. 29/105:
  1108. for key, value in subjects.items():
  1109. new_subjects[key] = [str(paragraph) for paragraph in value]
  1110. 29/106:
  1111. with open('json.json','w') as js:
  1112. json.dump(dict(new_subjects),js)
  1113. 31/1: import json
  1114. 31/2:
  1115. with open('json.json','r') as js:
  1116. json.load(js)
  1117. 31/3: import os
  1118. 31/4: os.getcwd()
  1119. 32/1:
  1120. with open('json.json','r') as js:
  1121. json.load(js)
  1122. 32/2: import json
  1123. 32/3:
  1124. with open('json.json','r') as js:
  1125. json.load(js)
  1126. 32/4:
  1127. subjects = {}
  1128. with open('json.json','r') as js:
  1129. subjects = json.load(js)
  1130. 32/5: subjects
  1131. 32/6: subjects["INFS2200"]
  1132. 32/7: from bs4 import BeautifulSoup
  1133. 32/8:
  1134. for key, value in subjects.items():
  1135. new_subjects[key] = [str(paragraph) for paragraph in value]
  1136. 32/9:
  1137. new_subjects = {}
  1138. for key, value in subjects.items():
  1139. new_subjects[key] = [str(paragraph) for paragraph in value]
  1140. 32/10:
  1141. subj_url = {}
  1142. for key, value in subjects.items():
  1143. urls = []
  1144.  
  1145. for paragraph in value:
  1146. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1147. if link.has_attr('href'):
  1148. print(link['href'])
  1149. 32/11:
  1150. from bs4 import SoupStrainer
  1151. subj_url = {}
  1152. for key, value in subjects.items():
  1153. urls = []
  1154.  
  1155. for paragraph in value:
  1156. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1157. if link.has_attr('href'):
  1158. print(link['href'])
  1159. 32/12:
  1160. from bs4 import SoupStrainer
  1161. subj_url = {}
  1162. for key, value in subjects.items():
  1163. urls = []
  1164.  
  1165. for paragraph in value:
  1166. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1167. if link.has_attr('href'):
  1168. url = link['href'])
  1169. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1170. urls.append(url)
  1171. print(key)
  1172. print(urls)
  1173. 32/13:
  1174. from bs4 import SoupStrainer
  1175. subj_url = {}
  1176. for key, value in subjects.items():
  1177. urls = []
  1178.  
  1179. for paragraph in value:
  1180. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1181. if link.has_attr('href'):
  1182. url = link['href']
  1183. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1184. urls.append(url)
  1185. print(key)
  1186. print(urls)
  1187. 32/14:
  1188. from bs4 import SoupStrainer
  1189. subj_url = {}
  1190. for key, value in subjects.items():
  1191. urls = []
  1192.  
  1193. for paragraph in value:
  1194. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1195. if link.has_attr('href'):
  1196. url = link['href']
  1197. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1198. urls.append(url)
  1199. print(key)
  1200. print(urls)
  1201. 32/15: subjects["INFS2200"]
  1202. 32/16: "Required" in subjects["INFS2200"][0]
  1203. 32/17: "Required" in subjects["INFS2200"][1]
  1204. 32/18: BeautifulSoup(subjects["INFS2200"][0])
  1205. 32/19: BeautifulSoup(subjects["INFS2200"][0], "html.parser")
  1206. 32/20: BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong")
  1207. 32/21: BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong").contents
  1208. 32/22: BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong")[0].contents
  1209. 32/23: type(BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong")[0].contents)
  1210. 32/24: BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong")[0].contents[0]
  1211. 32/25:
  1212. from bs4 import SoupStrainer
  1213. subj_url = {}
  1214. for key, value in subjects.items():
  1215. urls = {}
  1216.  
  1217. for paragraph in value:
  1218. book_type = BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong")[0].contents[0]
  1219. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1220. if link.has_attr('href'):
  1221. url = link['href']
  1222. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1223. urls[book_type].append(url)
  1224. print(key)
  1225. print(urls)
  1226. 32/26:
  1227. from bs4 import SoupStrainer
  1228. subj_url = {}
  1229. for key, value in subjects.items():
  1230. urls = {}
  1231.  
  1232. for paragraph in value:
  1233. book_type = BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong")[0].contents[0]
  1234. if book_type not in urls:
  1235. urls[book_type] = []
  1236. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1237. if link.has_attr('href'):
  1238. url = link['href']
  1239. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1240. urls[book_type].append(url)
  1241. print(key)
  1242. print(urls)
  1243. 32/27:
  1244. from bs4 import SoupStrainer
  1245. subj_url = {}
  1246. for key, value in subjects.items():
  1247. urls = {}
  1248.  
  1249. for paragraph in value:
  1250. book_type = BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong")[0].contents[0]
  1251. if book_type not in urls:
  1252. urls[book_type] = []
  1253. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1254. if link.has_attr('href'):
  1255. url = link['href']
  1256. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1257. urls[book_type].append(url)
  1258. subj_url[key] = urls
  1259. 32/28: subj_url
  1260. 32/29: subj_url["INFS1200"]
  1261. 32/30:
  1262. from bs4 import SoupStrainer
  1263. subj_url = {}
  1264. for key, value in subjects.items():
  1265. urls = {}
  1266.  
  1267. for paragraph in value:
  1268. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")[0].contents[0]
  1269. if book_type not in urls:
  1270. urls[book_type] = []
  1271. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1272. if link.has_attr('href'):
  1273. url = link['href']
  1274. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1275. urls[book_type].append(url)
  1276. subj_url[key] = urls
  1277. 32/31:
  1278. from bs4 import SoupStrainer
  1279. subj_url = {}
  1280. for key, value in subjects.items():
  1281. urls = {}
  1282.  
  1283. for paragraph in value:
  1284. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")[0].contents[0]
  1285. print(book_type)
  1286. if book_type not in urls:
  1287. urls[book_type] = []
  1288. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1289. if link.has_attr('href'):
  1290. url = link['href']
  1291. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1292. urls[book_type].append(url)
  1293. subj_url[key] = urls
  1294. 32/32:
  1295. from bs4 import SoupStrainer
  1296. subj_url = {}
  1297. for key, value in subjects.items():
  1298. urls = {}
  1299.  
  1300. for paragraph in value:
  1301. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")[0].contents[0]
  1302. print(book_type)
  1303. print(key)
  1304. if book_type not in urls:
  1305. urls[book_type] = []
  1306. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1307. if link.has_attr('href'):
  1308. url = link['href']
  1309. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1310. urls[book_type].append(url)
  1311. subj_url[key] = urls
  1312. 32/33:
  1313. from bs4 import SoupStrainer
  1314. subj_url = {}
  1315. for key, value in subjects.items():
  1316. urls = {}
  1317.  
  1318. for paragraph in value:
  1319. print(paragraph)
  1320. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")[0].contents[0]
  1321. print(book_type)
  1322. print(key)
  1323. if book_type not in urls:
  1324. urls[book_type] = []
  1325. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1326. if link.has_attr('href'):
  1327. url = link['href']
  1328. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1329. urls[book_type].append(url)
  1330. subj_url[key] = urls
  1331. 32/34:
  1332. from bs4 import SoupStrainer
  1333. subj_url = {}
  1334. for key, value in subjects.items():
  1335. urls = {}
  1336.  
  1337. for paragraph in value:
  1338. print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
  1339. print(paragraph)
  1340. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")[0].contents[0]
  1341. print(book_type)
  1342. print(key)
  1343. if book_type not in urls:
  1344. urls[book_type] = []
  1345. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1346. if link.has_attr('href'):
  1347. url = link['href']
  1348. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1349. urls[book_type].append(url)
  1350. subj_url[key] = urls
  1351. 32/35:
  1352. from bs4 import SoupStrainer
  1353. subj_url = {}
  1354. for key, value in subjects.items():
  1355. urls = {}
  1356.  
  1357. for paragraph in value:
  1358. print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
  1359. print(key)
  1360. print(paragraph)
  1361. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")[0].contents[0]
  1362. print(book_type)
  1363. if book_type not in urls:
  1364. urls[book_type] = []
  1365. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1366. if link.has_attr('href'):
  1367. url = link['href']
  1368. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1369. urls[book_type].append(url)
  1370. subj_url[key] = urls
  1371. 32/36:
  1372. from bs4 import SoupStrainer
  1373. subj_url = {}
  1374. for key, value in subjects.items():
  1375. urls = {}
  1376.  
  1377. for paragraph in value:
  1378. print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
  1379. print(key)
  1380. print(paragraph)
  1381. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")[0].contents
  1382. if len(book_type) > 0:
  1383. book_type = book_type[0]
  1384. else:
  1385. book_type = 'Required'
  1386. print(book_type)
  1387. if book_type not in urls:
  1388. urls[book_type] = []
  1389. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1390. if link.has_attr('href'):
  1391. url = link['href']
  1392. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1393. urls[book_type].append(url)
  1394. subj_url[key] = urls
  1395. 32/37:
  1396. from bs4 import SoupStrainer
  1397. subj_url = {}
  1398. for key, value in subjects.items():
  1399. urls = {}
  1400.  
  1401. for paragraph in value:
  1402. print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
  1403. print(key)
  1404. print(paragraph)
  1405. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1406. if len(book_type) > 0:
  1407. book_type = book_type[0].contents[0]
  1408. else:
  1409. book_type = 'Required'
  1410. print(book_type)
  1411. if book_type not in urls:
  1412. urls[book_type] = []
  1413. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1414. if link.has_attr('href'):
  1415. url = link['href']
  1416. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1417. urls[book_type].append(url)
  1418. subj_url[key] = urls
  1419. 32/38:
  1420. from bs4 import SoupStrainer
  1421. subj_url = {}
  1422. for key, value in subjects.items():
  1423. urls = {}
  1424.  
  1425. for paragraph in value:
  1426. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1427. if len(book_type) > 0:
  1428. book_type = book_type[0].contents[0]
  1429. else:
  1430. book_type = 'Required'
  1431. print(book_type)
  1432. if book_type not in urls:
  1433. urls[book_type] = []
  1434. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1435. if link.has_attr('href'):
  1436. url = link['href']
  1437. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1438. urls[book_type].append(url)
  1439. subj_url[key] = urls
  1440. 32/39:
  1441. from bs4 import SoupStrainer
  1442. subj_url = {}
  1443. for key, value in subjects.items():
  1444. urls = {}
  1445.  
  1446. for paragraph in value:
  1447. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1448. if len(book_type) > 0:
  1449. book_type = book_type[0].contents[0]
  1450. else:
  1451. book_type = 'Required'
  1452. if book_type not in urls:
  1453. urls[book_type] = []
  1454. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1455. if link.has_attr('href'):
  1456. url = link['href']
  1457. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1458. urls[book_type].append(url)
  1459. subj_url[key] = urls
  1460. 32/40: subj_url["INFS1200"]
  1461. 32/41: subj_url["LAWS1100"]
  1462. 32/42:
  1463. from bs4 import SoupStrainer
  1464. subj_url = {}
  1465. for key, value in subjects.items():
  1466. urls = {}
  1467.  
  1468. for paragraph in value:
  1469. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1470. if len(book_type) > 0:
  1471. print("oh")
  1472. book_type = book_type[0].contents[0]
  1473. else:
  1474. print("boy")
  1475. book_type = 'Required'
  1476. if book_type not in urls:
  1477. urls[book_type] = []
  1478. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1479. if link.has_attr('href'):
  1480. url = link['href']
  1481. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1482. urls[book_type].append(url)
  1483. subj_url[key] = urls
  1484. 32/43:
  1485. from bs4 import SoupStrainer
  1486. subj_url = {}
  1487. for key, value in subjects.items():
  1488. urls = {}
  1489.  
  1490. for paragraph in value:
  1491. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1492. if len(book_type) > 0:
  1493. book_type = book_type[0].contents[0]
  1494. else:
  1495. book_type = 'Required'
  1496. if book_type not in urls:
  1497. urls[book_type] = []
  1498. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1499. if link.has_attr('href'):
  1500. url = link['href']
  1501. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1502. urls[book_type].append(url)
  1503. print(book_type)
  1504. subj_url[key] = urls
  1505. 32/44:
  1506. from bs4 import SoupStrainer
  1507. subj_url = {}
  1508. for key, value in subjects.items():
  1509. urls = {}
  1510.  
  1511. for paragraph in value:
  1512. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1513. if len(book_type) > 0:
  1514. book_type = book_type[0].contents[0]
  1515. else:
  1516. book_type = 'Required'
  1517. if book_type not in urls:
  1518. urls[book_type] = []
  1519. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1520. if link.has_attr('href'):
  1521. url = link['href']
  1522. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1523. urls[book_type].append(url)
  1524. print(urls)
  1525. subj_url[key] = urls
  1526. 32/45:
  1527. from bs4 import SoupStrainer
  1528. subj_url = {}
  1529. for key, value in subjects.items():
  1530. urls = {}
  1531.  
  1532. for paragraph in value:
  1533. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1534. if len(book_type) > 0:
  1535. book_type = book_type[0].contents[0]
  1536. else:
  1537. book_type = 'Required'
  1538. if book_type not in urls:
  1539. urls[book_type] = []
  1540. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1541. if link.has_attr('href'):
  1542. url = link['href']
  1543. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1544. urls[book_type].append(url)
  1545. subj_url[key] = urls
  1546. 32/46: subj_url["INFS1200"]
  1547. 32/47: subj_url["LAND3007"]
  1548. 32/48: subj_url["LAWS1112"]
  1549. 32/49: count = 0
  1550. 32/50:
  1551. for key, value in subj_url:
  1552. for key2, value2 in value:
  1553. count+=len(value2)
  1554. 32/51:
  1555. for key, value in subj_url.items():
  1556. for key2, value2 in value:
  1557. count+=len(value2)
  1558. 32/52:
  1559. for key, value in subj_url.items():
  1560. for key2, value2 in value.items():
  1561. count+=len(value2)
  1562. 32/53: count
  1563. 32/54: import os
  1564. 32/55: os.getcwd()
  1565. 32/56: os.chdir('rip3')
  1566. 32/57:
  1567. for key, value in subj_url:
  1568. os.mkdir(key)
  1569. 32/58:
  1570. for key, value in subj_url.items():
  1571. os.mkdir(key)
  1572. 32/59:
  1573. for key, value in subj_url.items():
  1574. print(key)
  1575. os.mkdir(key)
  1576. 32/60:
  1577. for key, value in subj_url.items():
  1578. print(key)
  1579. 32/61: subj_url['']
  1580. 32/62:
  1581. for key, value in subj_url.items():
  1582. if key != '':
  1583. os.mkdir(key)
  1584. 32/63:
  1585. for key, value in subj_url.items():
  1586. if key != '':
  1587. for key2, value2 in value.items():
  1588. os.mkdir('key/' + key2)
  1589. 32/64:
  1590. for key, value in subj_url.items():
  1591. if key != '':
  1592. for key2, value2 in value.items():
  1593. os.mkdir(key + '/' + key2)
  1594. 32/65:
  1595. for key, value in subj_url.items():
  1596. if key != '':
  1597. for key2, value2 in value.items():
  1598. print("wget " + value2 + '-O ' key + '/' + key2)
  1599. 32/66:
  1600. for key, value in subj_url.items():
  1601. if key != '':
  1602. for key2, value2 in value.items():
  1603. print("wget " + value2 + '-O '+ key + '/' + key2)
  1604. 32/67:
  1605. for key, value in subj_url.items():
  1606. if key != '':
  1607. for key2, value2 in value.items():
  1608. for url in value2:
  1609. print("wget " + item + '-O '+ key + '/' + key2)
  1610. 32/68:
  1611. for key, value in subj_url.items():
  1612. if key != '':
  1613. for key2, value2 in value.items():
  1614. for url in value2:
  1615. print("wget " + url + '-O '+ key + '/' + key2)
  1616. 32/69:
  1617. for key, value in subj_url.items():
  1618. if key != '':
  1619. for key2, value2 in value.items():
  1620. for url in value2:
  1621. print("wget " + url + ' -O '+ key + '/' + key2)
  1622. 32/70:
  1623. for key, value in subj_url.items():
  1624. if key != '':
  1625. for key2, value2 in value.items():
  1626. for url in value2:
  1627. print("wget " + url + ' -P '+ key + '/' + key2)
  1628. 32/71:
  1629. for key, value in subj_url.items():
  1630. if key != '':
  1631. for key2, value2 in value.items():
  1632. for url in value2:
  1633. os.system("wget " + url + ' -P '+ key + '/' + key2)
  1634. 35/1:
  1635. subjects = {}
  1636. with open('json.json','r') as js:
  1637. subjects = json.load(js)
  1638. 35/2: os.chdir('git/uqexchange/
  1639. 35/3: os.chdir('git/uqexchange/')
  1640. 35/4: import os
  1641. 35/5: os.chdir('git/uqexchange/')
  1642. 35/6:
  1643. subjects = {}
  1644. with open('json.json','r') as js:
  1645. subjects = json.load(js)
  1646. 35/7: import json
  1647. 35/8:
  1648. subjects = {}
  1649. with open('json.json','r') as js:
  1650. subjects = json.load(js)
  1651. 35/9:
  1652. from bs4 import SoupStrainer
  1653. subj_url = {}
  1654. for key, value in subjects.items():
  1655. urls = {}
  1656.  
  1657. for paragraph in value:
  1658. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1659. if len(book_type) > 0:
  1660. book_type = book_type[0].contents[0]
  1661. else:
  1662. book_type = 'Required'
  1663. if book_type not in urls:
  1664. urls[book_type] = []
  1665. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1666. if link.has_attr('href'):
  1667. url = link['href']
  1668. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1669. urls[book_type].append(url)
  1670. subj_url[key] = urls
  1671. 35/10: from bs4 import BeautifulSoup
  1672. 35/11:
  1673. from bs4 import SoupStrainer
  1674. subj_url = {}
  1675. for key, value in subjects.items():
  1676. urls = {}
  1677.  
  1678. for paragraph in value:
  1679. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1680. if len(book_type) > 0:
  1681. book_type = book_type[0].contents[0]
  1682. else:
  1683. book_type = 'Required'
  1684. if book_type not in urls:
  1685. urls[book_type] = []
  1686. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1687. if link.has_attr('href'):
  1688. url = link['href']
  1689. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1690. urls[book_type].append(url)
  1691. subj_url[key] = urls
  1692. 36/1:
  1693. subjects = {}
  1694. with open('json.json','r') as js:
  1695. subjects = json.load(js)
  1696. 36/2: import json
  1697. 36/3:
  1698. subjects = {}
  1699. with open('json.json','r') as js:
  1700. subjects = json.load(js)
  1701. 36/4:
  1702. from bs4 import SoupStrainer
  1703. subj_url = {}
  1704. for key, value in subjects.items():
  1705. urls = {}
  1706.  
  1707. for paragraph in value:
  1708. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1709. if len(book_type) > 0:
  1710. book_type = book_type[0].contents[0]
  1711. else:
  1712. book_type = 'Required'
  1713. if book_type not in urls:
  1714. urls[book_type] = []
  1715. for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
  1716. if link.has_attr('href'):
  1717. url = link['href']
  1718. if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
  1719. urls[book_type].append(url)
  1720. subj_url[key] = urls
  1721. 36/5: from bs4 import BeautifulSoup
  1722. 36/6:
  1723. from bs4 import SoupStrainer
  1724. subj_isbn = {}
  1725. for key, value in subjects.items():
  1726. isbns = {}
  1727.  
  1728. for paragraph in value:
  1729. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1730. if len(book_type) > 0:
  1731. book_type = book_type[0].contents[0]
  1732. else:
  1733. book_type = 'Required'
  1734. if book_type not in urls:
  1735. isbns[book_type] = []
  1736.  
  1737. print(BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "resourceType label")}))
  1738.  
  1739. # isbns[book_type].append(isbn)
  1740.  
  1741. # subj_url[key] = urls
  1742. 36/7:
  1743. from bs4 import SoupStrainer
  1744. subj_isbn = {}
  1745. for key, value in subjects.items():
  1746. isbns = {}
  1747.  
  1748. for paragraph in value:
  1749. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1750. if len(book_type) > 0:
  1751. book_type = book_type[0].contents[0]
  1752. else:
  1753. book_type = 'Required'
  1754. if book_type not in urls:
  1755. isbns[book_type] = []
  1756.  
  1757. print(BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "resourceType label"})))
  1758.  
  1759. # isbns[book_type].append(isbn)
  1760.  
  1761. # subj_url[key] = urls
  1762. 36/8:
  1763. from bs4 import SoupStrainer
  1764. subj_isbn = {}
  1765. for key, value in subjects.items():
  1766. isbns = {}
  1767.  
  1768. for paragraph in value:
  1769. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1770. if len(book_type) > 0:
  1771. book_type = book_type[0].contents[0]
  1772. else:
  1773. book_type = 'Required'
  1774. if book_type not in urls:
  1775. isbns[book_type] = []
  1776.  
  1777. print(BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "resourceType label"}))
  1778.  
  1779. # isbns[book_type].append(isbn)
  1780.  
  1781. # subj_url[key] = urls
  1782. 36/9:
  1783. from bs4 import SoupStrainer
  1784. subj_isbn = {}
  1785. for key, value in subjects.items():
  1786. isbns = {}
  1787.  
  1788. for paragraph in value:
  1789. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1790. if len(book_type) > 0:
  1791. book_type = book_type[0].contents[0]
  1792. else:
  1793. book_type = 'Required'
  1794. if book_type not in urls:
  1795. isbns[book_type] = []
  1796.  
  1797. print(BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"}))
  1798.  
  1799. # isbns[book_type].append(isbn)
  1800.  
  1801. # subj_url[key] = urls
  1802. 36/10:
  1803. from bs4 import SoupStrainer
  1804. subj_isbn = {}
  1805. for key, value in subjects.items():
  1806. isbns = {}
  1807.  
  1808. for paragraph in value:
  1809. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1810. if len(book_type) > 0:
  1811. book_type = book_type[0].contents[0]
  1812. else:
  1813. book_type = 'Required'
  1814. if book_type not in urls:
  1815. isbns[book_type] = []
  1816.  
  1817. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"}).contents[0].split(',')
  1818.  
  1819. isbns[book_type].append(isbn)
  1820.  
  1821. subj_isbn[key] = isbns
  1822. 36/11:
  1823. from bs4 import SoupStrainer
  1824. subj_isbn = {}
  1825. for key, value in subjects.items():
  1826. isbns = {}
  1827.  
  1828. for paragraph in value:
  1829. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1830. if len(book_type) > 0:
  1831. book_type = book_type[0].contents[0]
  1832. else:
  1833. book_type = 'Required'
  1834. if book_type not in urls:
  1835. isbns[book_type] = []
  1836.  
  1837. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"}).contents[0].split(',')
  1838.  
  1839. isbns[book_type].append(isbn)
  1840.  
  1841. subj_isbn[key] = isbns
  1842. 36/12:
  1843. from bs4 import SoupStrainer
  1844. subj_isbn = {}
  1845. for key, value in subjects.items():
  1846. isbns = {}
  1847.  
  1848. for paragraph in value:
  1849. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1850. if len(book_type) > 0:
  1851. book_type = book_type[0].contents[0]
  1852. else:
  1853. book_type = 'Required'
  1854. if book_type not in urls:
  1855. isbns[book_type] = []
  1856.  
  1857. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})[0].contents[0].split(',')
  1858.  
  1859. isbns[book_type].append(isbn)
  1860.  
  1861. subj_isbn[key] = isbns
  1862. 36/13:
  1863. from bs4 import SoupStrainer
  1864. subj_isbn = {}
  1865. for key, value in subjects.items():
  1866. isbns = {}
  1867.  
  1868. for paragraph in value:
  1869. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1870. if len(book_type) > 0:
  1871. book_type = book_type[0].contents[0]
  1872. else:
  1873. book_type = 'Required'
  1874. if book_type not in urls:
  1875. isbns[book_type] = []
  1876.  
  1877. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  1878. if len(isbn) > 0:
  1879. isbn = isbn[0].contents[0].split(',')
  1880.  
  1881. isbns[book_type].append(isbn)
  1882.  
  1883. subj_isbn[key] = isbns
  1884. 36/14: subj_isbn["INFS1200"]
  1885. 36/15: subj_isbn
  1886. 36/16:
  1887. from bs4 import SoupStrainer
  1888. subj_isbn = {}
  1889. for key, value in subjects.items():
  1890. isbns = {}
  1891.  
  1892. for paragraph in value:
  1893. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1894. if len(book_type) > 0:
  1895. book_type = book_type[0].contents[0]
  1896. else:
  1897. book_type = 'Required'
  1898. if book_type not in isbns:
  1899. isbns[book_type] = []
  1900.  
  1901. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  1902. if len(isbn) > 0:
  1903. isbn = isbn[0].contents[0].split(',')
  1904.  
  1905. isbns[book_type].append(isbn)
  1906.  
  1907. subj_isbn[key] = isbns
  1908. 36/17: subj_isbn
  1909. 36/18: subj_isbn["LAWS1114"]
  1910. 36/19: subj_isbn["INFS1300"]
  1911. 36/20:
  1912. from bs4 import SoupStrainer
  1913. db = {}
  1914. for key, value in subjects.items():
  1915. books = {}
  1916.  
  1917. for paragraph in value:
  1918. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1919. if len(book_type) > 0:
  1920. book_type = book_type[0].contents[0]
  1921. else:
  1922. book_type = 'Required'
  1923. if book_type not in isbns:
  1924. books[book_type] = []
  1925.  
  1926. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  1927. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  1928. if len(isbn) > 0:
  1929. isbn = isbn[0].contents[0].split(',')
  1930. book["ISBNs"] = isbn
  1931.  
  1932. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  1933. book["title"] = name
  1934.  
  1935. author = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})[0].contents[0]
  1936. book["author"] = author
  1937.  
  1938. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
  1939. book["publishedDate"] = publishedDate
  1940.  
  1941. books[book_type].append(book)
  1942.  
  1943. db[key] = books
  1944. 36/21:
  1945. from bs4 import SoupStrainer
  1946. db = {}
  1947. for key, value in subjects.items():
  1948. books = {}
  1949.  
  1950. for paragraph in value:
  1951. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1952. if len(book_type) > 0:
  1953. book_type = book_type[0].contents[0]
  1954. else:
  1955. book_type = 'Required'
  1956. if book_type not in isbns:
  1957. books[book_type] = []
  1958.  
  1959. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  1960. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  1961. if len(isbn) > 0:
  1962. isbn = isbn[0].contents[0].split(',')
  1963. book["ISBNs"] = isbn
  1964.  
  1965. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  1966. book["title"] = title
  1967.  
  1968. author = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})[0].contents[0]
  1969. book["author"] = author
  1970.  
  1971. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
  1972. book["publishedDate"] = publishedDate
  1973.  
  1974. books[book_type].append(book)
  1975.  
  1976. db[key] = books
  1977. 36/22:
  1978. from bs4 import SoupStrainer
  1979. db = {}
  1980. for key, value in subjects.items():
  1981. books = {}
  1982.  
  1983. for paragraph in value:
  1984. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  1985. if len(book_type) > 0:
  1986. book_type = book_type[0].contents[0]
  1987. else:
  1988. book_type = 'Required'
  1989. if book_type not in isbns:
  1990. books[book_type] = []
  1991.  
  1992. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  1993. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  1994. if len(isbn) > 0:
  1995. isbn = isbn[0].contents[0].split(',')
  1996. book["ISBNs"] = isbn
  1997.  
  1998. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  1999. book["title"] = title
  2000.  
  2001. author = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})[0].contents[0]
  2002. book["author"] = author
  2003.  
  2004. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
  2005. book["publishedDate"] = publishedDate
  2006.  
  2007. print(book)
  2008. books[book_type].append(book)
  2009.  
  2010. db[key] = books
  2011. 36/23:
  2012. from bs4 import SoupStrainer
  2013. db = {}
  2014. for key, value in subjects.items():
  2015. books = {}
  2016.  
  2017. for paragraph in value:
  2018. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2019. if len(book_type) > 0:
  2020. book_type = book_type[0].contents[0]
  2021. else:
  2022. book_type = 'Required'
  2023. if book_type not in isbns:
  2024. books[book_type] = []
  2025.  
  2026. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2027. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2028. if len(isbn) > 0:
  2029. isbn = isbn[0].contents[0].split(',')
  2030. book["ISBNs"] = isbn
  2031.  
  2032. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2033. book["title"] = title
  2034.  
  2035. author = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})[0].contents[0]
  2036. book["author"] = author
  2037.  
  2038. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
  2039. book["publishedDate"] = publishedDate
  2040.  
  2041. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2042. book["link"] = link
  2043.  
  2044. print(book)
  2045. books[book_type].append(book)
  2046.  
  2047. db[key] = books
  2048. 36/24:
  2049. from bs4 import SoupStrainer
  2050. db = {}
  2051. count = 0
  2052. for key, value in subjects.items():
  2053. books = {}
  2054.  
  2055. for paragraph in value:
  2056. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2057. if len(book_type) > 0:
  2058. book_type = book_type[0].contents[0]
  2059. else:
  2060. book_type = 'Required'
  2061. if book_type not in isbns:
  2062. books[book_type] = []
  2063.  
  2064. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2065. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2066. if len(isbn) > 0:
  2067. isbn = isbn[0].contents[0].split(',')
  2068. book["ISBNs"] = isbn
  2069.  
  2070. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2071. book["title"] = title
  2072.  
  2073. author = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})[0].contents[0]
  2074. book["author"] = author
  2075.  
  2076. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
  2077. book["publishedDate"] = publishedDate
  2078.  
  2079. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2080. book["link"] = link
  2081.  
  2082.  
  2083. books[book_type].append(book)
  2084. print(count+=1)
  2085. db[key] = books
  2086. 36/25:
  2087. from bs4 import SoupStrainer
  2088. db = {}
  2089. count = 0
  2090. for key, value in subjects.items():
  2091. books = {}
  2092.  
  2093. for paragraph in value:
  2094. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2095. if len(book_type) > 0:
  2096. book_type = book_type[0].contents[0]
  2097. else:
  2098. book_type = 'Required'
  2099. if book_type not in isbns:
  2100. books[book_type] = []
  2101.  
  2102. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2103. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2104. if len(isbn) > 0:
  2105. isbn = isbn[0].contents[0].split(',')
  2106. book["ISBNs"] = isbn
  2107.  
  2108. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2109. book["title"] = title
  2110.  
  2111. author = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})[0].contents[0]
  2112. book["author"] = author
  2113.  
  2114. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
  2115. book["publishedDate"] = publishedDate
  2116.  
  2117. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2118. book["link"] = link
  2119.  
  2120.  
  2121. books[book_type].append(book)
  2122. count+=1
  2123. print(count)
  2124. db[key] = books
  2125. 36/26:
  2126. from bs4 import SoupStrainer
  2127. db = {}
  2128. count = 0
  2129. for key, value in subjects.items():
  2130. books = {}
  2131.  
  2132. for paragraph in value:
  2133. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2134. if len(book_type) > 0:
  2135. book_type = book_type[0].contents[0]
  2136. else:
  2137. book_type = 'Required'
  2138. if book_type not in isbns:
  2139. books[book_type] = []
  2140.  
  2141. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2142. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2143. if len(isbn) > 0:
  2144. isbn = isbn[0].contents[0].split(',')
  2145. book["ISBNs"] = isbn
  2146.  
  2147. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2148. book["title"] = title
  2149.  
  2150. author = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})[0].contents[0]
  2151. print(BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"}))
  2152. book["author"] = author
  2153.  
  2154. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
  2155. book["publishedDate"] = publishedDate
  2156.  
  2157. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2158. book["link"] = link
  2159.  
  2160.  
  2161. books[book_type].append(book)
  2162. count+=1
  2163. print(count)
  2164. db[key] = books
  2165. 36/27:
  2166. from bs4 import SoupStrainer
  2167. db = {}
  2168. count = 0
  2169. for key, value in subjects.items():
  2170. books = {}
  2171.  
  2172. for paragraph in value:
  2173. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2174. if len(book_type) > 0:
  2175. book_type = book_type[0].contents[0]
  2176. else:
  2177. book_type = 'Required'
  2178. if book_type not in isbns:
  2179. books[book_type] = []
  2180.  
  2181. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2182. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2183. if len(isbn) > 0:
  2184. isbn = isbn[0].contents[0].split(',')
  2185. book["ISBNs"] = isbn
  2186.  
  2187. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2188. book["title"] = title
  2189.  
  2190. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2191. book["author"] = author
  2192. print(author)
  2193.  
  2194. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
  2195. book["publishedDate"] = publishedDate
  2196.  
  2197. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2198. book["link"] = link
  2199.  
  2200.  
  2201. books[book_type].append(book)
  2202. count+=1
  2203. print(count)
  2204. db[key] = books
  2205. 36/28:
  2206. from bs4 import SoupStrainer
  2207. db = {}
  2208. count = 0
  2209. for key, value in subjects.items():
  2210. books = {}
  2211.  
  2212. for paragraph in value:
  2213. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2214. if len(book_type) > 0:
  2215. book_type = book_type[0].contents[0]
  2216. else:
  2217. book_type = 'Required'
  2218. if book_type not in isbns:
  2219. books[book_type] = []
  2220.  
  2221. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2222. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2223. if len(isbn) > 0:
  2224. isbn = isbn[0].contents[0].split(',')
  2225. book["ISBNs"] = isbn
  2226.  
  2227. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2228. book["title"] = title
  2229.  
  2230. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2231. book["author"] = author
  2232.  
  2233. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
  2234. book["publishedDate"] = publishedDate
  2235.  
  2236. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2237. book["link"] = link
  2238.  
  2239.  
  2240. books[book_type].append(book)
  2241. count+=1
  2242. print(count)
  2243. db[key] = books
  2244. 36/29:
  2245. from bs4 import SoupStrainer
  2246. db = {}
  2247. count = 0
  2248. for key, value in subjects.items():
  2249. books = {}
  2250.  
  2251. for paragraph in value:
  2252. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2253. if len(book_type) > 0:
  2254. book_type = book_type[0].contents[0]
  2255. else:
  2256. book_type = 'Required'
  2257. if book_type not in isbns:
  2258. books[book_type] = []
  2259.  
  2260. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2261. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2262. if len(isbn) > 0:
  2263. isbn = isbn[0].contents[0].split(',')
  2264. book["ISBNs"] = isbn
  2265.  
  2266. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2267. book["title"] = title
  2268.  
  2269. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2270. book["author"] = author
  2271.  
  2272. print(BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"}))
  2273. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
  2274. book["publishedDate"] = publishedDate
  2275.  
  2276. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2277. book["link"] = link
  2278.  
  2279.  
  2280. books[book_type].append(book)
  2281. count+=1
  2282. print(count)
  2283. db[key] = books
  2284. 36/30:
  2285. from bs4 import SoupStrainer
  2286. db = {}
  2287. count = 0
  2288. for key, value in subjects.items():
  2289. books = {}
  2290.  
  2291. for paragraph in value:
  2292. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2293. if len(book_type) > 0:
  2294. book_type = book_type[0].contents[0]
  2295. else:
  2296. book_type = 'Required'
  2297. if book_type not in isbns:
  2298. books[book_type] = []
  2299.  
  2300. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2301. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2302. if len(isbn) > 0:
  2303. isbn = isbn[0].contents[0].split(',')
  2304. book["ISBNs"] = isbn
  2305.  
  2306. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2307. book["title"] = title
  2308.  
  2309. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2310. book["author"] = author
  2311.  
  2312. print(title)
  2313. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
  2314. book["publishedDate"] = publishedDate
  2315.  
  2316. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2317. book["link"] = link
  2318.  
  2319.  
  2320. books[book_type].append(book)
  2321. count+=1
  2322. print(count)
  2323. db[key] = books
  2324. 36/31:
  2325. from bs4 import SoupStrainer
  2326. db = {}
  2327. count = 0
  2328. for key, value in subjects.items():
  2329. books = {}
  2330.  
  2331. for paragraph in value:
  2332. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2333. if len(book_type) > 0:
  2334. book_type = book_type[0].contents[0]
  2335. else:
  2336. book_type = 'Required'
  2337. if book_type not in isbns:
  2338. books[book_type] = []
  2339.  
  2340. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2341. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2342. if len(isbn) > 0:
  2343. isbn = isbn[0].contents[0].split(',')
  2344. book["ISBNs"] = isbn
  2345.  
  2346. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2347. book["title"] = title
  2348.  
  2349. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2350. book["author"] = author
  2351.  
  2352. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  2353. if len(publishedDate) > 0
  2354. book["publishedDate"] = publishedDate[0].contents[0]
  2355. else:
  2356. book["publishedDate"] = "unknown"
  2357.  
  2358. print book["publishedDate"]
  2359. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2360. book["link"] = link
  2361.  
  2362.  
  2363. books[book_type].append(book)
  2364. count+=1
  2365. print(count)
  2366. db[key] = books
  2367. 36/32:
  2368. from bs4 import SoupStrainer
  2369. db = {}
  2370. count = 0
  2371. for key, value in subjects.items():
  2372. books = {}
  2373.  
  2374. for paragraph in value:
  2375. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2376. if len(book_type) > 0:
  2377. book_type = book_type[0].contents[0]
  2378. else:
  2379. book_type = 'Required'
  2380. if book_type not in isbns:
  2381. books[book_type] = []
  2382.  
  2383. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2384. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2385. if len(isbn) > 0:
  2386. isbn = isbn[0].contents[0].split(',')
  2387. book["ISBNs"] = isbn
  2388.  
  2389. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2390. book["title"] = title
  2391.  
  2392. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2393. book["author"] = author
  2394.  
  2395. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  2396. if len(publishedDate) > 0:
  2397. book["publishedDate"] = publishedDate[0].contents[0]
  2398. else:
  2399. book["publishedDate"] = "unknown"
  2400.  
  2401. print book["publishedDate"]
  2402. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2403. book["link"] = link
  2404.  
  2405.  
  2406. books[book_type].append(book)
  2407. count+=1
  2408. print(count)
  2409. db[key] = books
  2410. 36/33:
  2411. from bs4 import SoupStrainer
  2412. db = {}
  2413. count = 0
  2414. for key, value in subjects.items():
  2415. books = {}
  2416.  
  2417. for paragraph in value:
  2418. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2419. if len(book_type) > 0:
  2420. book_type = book_type[0].contents[0]
  2421. else:
  2422. book_type = 'Required'
  2423. if book_type not in isbns:
  2424. books[book_type] = []
  2425.  
  2426. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2427. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2428. if len(isbn) > 0:
  2429. isbn = isbn[0].contents[0].split(',')
  2430. book["ISBNs"] = isbn
  2431.  
  2432. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2433. book["title"] = title
  2434.  
  2435. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2436. book["author"] = author
  2437.  
  2438. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  2439. if len(publishedDate) > 0:
  2440. book["publishedDate"] = publishedDate[0].contents[0]
  2441. else:
  2442. book["publishedDate"] = "unknown"
  2443.  
  2444. print(book["publishedDate"])
  2445. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2446. book["link"] = link
  2447.  
  2448.  
  2449. books[book_type].append(book)
  2450. count+=1
  2451. print(count)
  2452. db[key] = books
  2453. 36/34:
  2454. from bs4 import SoupStrainer
  2455. db = {}
  2456. count = 0
  2457. for key, value in subjects.items():
  2458. books = {}
  2459.  
  2460. for paragraph in value:
  2461. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2462. if len(book_type) > 0:
  2463. book_type = book_type[0].contents[0]
  2464. else:
  2465. book_type = 'Required'
  2466. if book_type not in isbns:
  2467. books[book_type] = []
  2468.  
  2469. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2470. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2471. if len(isbn) > 0:
  2472. isbn = isbn[0].contents[0].split(',')
  2473. book["ISBNs"] = isbn
  2474.  
  2475. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2476. book["title"] = title
  2477.  
  2478. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2479. book["author"] = author
  2480.  
  2481. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  2482. if len(publishedDate) > 0:
  2483. book["publishedDate"] = publishedDate[0].contents[0]
  2484. else:
  2485. book["publishedDate"] = "unknown"
  2486.  
  2487.  
  2488. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2489. book["link"] = link
  2490.  
  2491.  
  2492. books[book_type].append(book)
  2493. count+=1
  2494. print(count)
  2495. db[key] = books
  2496. 36/35: db["INFS2200"]
  2497. 36/36:
  2498. with open('db.json','w') as dbf:
  2499. json.dump(db, dbf)
  2500. 36/37:
  2501. with open('db.json','w') as dbf:
  2502. json.dump(db, dbf, indent=4)
  2503. 36/38:
  2504. from bs4 import SoupStrainer
  2505. db = {}
  2506. count = 0
  2507. for key, value in subjects.items():
  2508. books = {}
  2509.  
  2510. for paragraph in value:
  2511. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2512. if len(book_type) > 0:
  2513. book_type = book_type[0].contents[0]
  2514. else:
  2515. book_type = 'Required'
  2516. if book_type not in books:
  2517. books[book_type] = []
  2518.  
  2519. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2520. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2521. if len(isbn) > 0:
  2522. isbn = isbn[0].contents[0].split(',')
  2523. book["ISBNs"] = isbn
  2524.  
  2525. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2526. book["title"] = title
  2527.  
  2528. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2529. book["author"] = author
  2530.  
  2531. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  2532. if len(publishedDate) > 0:
  2533. book["publishedDate"] = publishedDate[0].contents[0]
  2534. else:
  2535. book["publishedDate"] = "unknown"
  2536.  
  2537.  
  2538. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2539. book["link"] = link
  2540.  
  2541.  
  2542. books[book_type].append(book)
  2543. count+=1
  2544. print(count)
  2545. db[key] = books
  2546. with open('db.jon','w') as dbf:
  2547. json.dump(db, dbf, indent=4)
  2548. 36/39:
  2549. from bs4 import SoupStrainer
  2550. db = {}
  2551. count = 0
  2552. for key, value in subjects.items():
  2553. books = {}
  2554.  
  2555. for paragraph in value:
  2556. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2557. if len(book_type) > 0:
  2558. book_type = book_type[0].contents[0]
  2559. else:
  2560. book_type = 'Required'
  2561. if book_type not in books:
  2562. books[book_type] = []
  2563.  
  2564. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2565. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2566. if len(isbn) > 0:
  2567. isbn = isbn[0].contents[0].split(',')
  2568. book["ISBNs"] = isbn
  2569.  
  2570. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2571. book["title"] = title
  2572.  
  2573. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2574. book["author"] = author
  2575.  
  2576. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  2577. if len(publishedDate) > 0:
  2578. book["publishedDate"] = publishedDate[0].contents[0]
  2579. else:
  2580. book["publishedDate"] = "unknown"
  2581.  
  2582.  
  2583. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2584. book["link"] = link
  2585.  
  2586. print(book)
  2587. books[book_type].append(book)
  2588. count+=1
  2589. print(count)
  2590. db[key] = books
  2591. with open('db.jon','w') as dbf:
  2592. json.dump(db, dbf, indent=4)
  2593. 36/40:
  2594. from bs4 import SoupStrainer
  2595. db = {}
  2596. count = 0
  2597. for key, value in subjects.items():
  2598. books = {}
  2599.  
  2600. for paragraph in value:
  2601. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2602. if len(book_type) > 0:
  2603. book_type = book_type[0].contents[0]
  2604. else:
  2605. book_type = 'Required'
  2606. if book_type not in books:
  2607. books[book_type] = []
  2608.  
  2609. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2610. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2611. if len(isbn) > 0:
  2612. isbn = isbn[0].contents[0].split(',')
  2613. book["ISBNs"] = isbn
  2614.  
  2615. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2616. book["title"] = title
  2617.  
  2618. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2619. book["author"] = author
  2620.  
  2621. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  2622. if len(publishedDate) > 0:
  2623. book["publishedDate"] = publishedDate[0].contents[0]
  2624. else:
  2625. book["publishedDate"] = "unknown"
  2626.  
  2627.  
  2628. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2629. book["link"] = link
  2630.  
  2631. books[book_type].append(book)
  2632. count+=1
  2633. print(books)
  2634. db[key] = books
  2635. with open('db.jon','w') as dbf:
  2636. json.dump(db, dbf, indent=4)
  2637. 36/41:
  2638. from bs4 import SoupStrainer
  2639. db = {}
  2640. count = 0
  2641. for key, value in subjects.items():
  2642. books = {}
  2643.  
  2644. for paragraph in value:
  2645. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2646. if len(book_type) > 0:
  2647. book_type = book_type[0].contents[0]
  2648. else:
  2649. book_type = 'Required'
  2650. if book_type not in books:
  2651. books[book_type] = []
  2652.  
  2653. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2654. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2655. if len(isbn) > 0:
  2656. isbn = isbn[0].contents[0].split(',')
  2657. book["ISBNs"] = isbn
  2658.  
  2659. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2660. book["title"] = title
  2661. if title == "The law of torts in Australia":
  2662. print("found it")
  2663.  
  2664. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2665. book["author"] = author
  2666.  
  2667. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  2668. if len(publishedDate) > 0:
  2669. book["publishedDate"] = publishedDate[0].contents[0]
  2670. else:
  2671. book["publishedDate"] = "unknown"
  2672.  
  2673.  
  2674. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2675. book["link"] = link
  2676.  
  2677. books[book_type].append(book)
  2678. count+=1
  2679. #print(books)
  2680. db[key] = books
  2681. with open('db.jon','w') as dbf:
  2682. json.dump(db, dbf, indent=4)
  2683. 36/42:
  2684. from bs4 import SoupStrainer
  2685. db = {}
  2686. count = 0
  2687. for key, value in subjects.items():
  2688. books = {}
  2689.  
  2690. for paragraph in value:
  2691. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2692. if len(book_type) > 0:
  2693. book_type = book_type[0].contents[0]
  2694. else:
  2695. book_type = 'Required'
  2696. if book_type not in books:
  2697. books[book_type] = []
  2698.  
  2699. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2700. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2701. if len(isbn) > 0:
  2702. isbn = isbn[0].contents[0].split(',')
  2703. book["ISBNs"] = isbn
  2704.  
  2705. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2706. book["title"] = title
  2707. if title == "The law of torts in Australia":
  2708. print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
  2709. print(book_type)
  2710.  
  2711. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2712. book["author"] = author
  2713.  
  2714. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  2715. if len(publishedDate) > 0:
  2716. book["publishedDate"] = publishedDate[0].contents[0]
  2717. else:
  2718. book["publishedDate"] = "unknown"
  2719.  
  2720.  
  2721. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2722. book["link"] = link
  2723.  
  2724. books[book_type].append(book)
  2725. count+=1
  2726. #print(books)
  2727. db[key] = books
  2728. with open('db.jon','w') as dbf:
  2729. json.dump(db, dbf, indent=4)
  2730. 36/43:
  2731. from bs4 import SoupStrainer
  2732. db = {}
  2733. count = 0
  2734. for key, value in subjects.items():
  2735. books = {}
  2736.  
  2737. for paragraph in value:
  2738. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2739. if len(book_type) > 0:
  2740. book_type = book_type[0].contents[0]
  2741. else:
  2742. book_type = 'Required'
  2743. if book_type not in books:
  2744. books[book_type] = []
  2745.  
  2746. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2747. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2748. if len(isbn) > 0:
  2749. isbn = isbn[0].contents[0].split(',')
  2750. book["ISBNs"] = isbn
  2751.  
  2752. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2753. book["title"] = title
  2754. if title == "The law of torts in Australia":
  2755. print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
  2756. print(book_type)
  2757.  
  2758. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2759. book["author"] = author
  2760.  
  2761. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  2762. if len(publishedDate) > 0:
  2763. book["publishedDate"] = publishedDate[0].contents[0]
  2764. else:
  2765. book["publishedDate"] = "unknown"
  2766.  
  2767.  
  2768. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2769. book["link"] = link
  2770. print(book)
  2771.  
  2772.  
  2773. books[book_type].append(book)
  2774.  
  2775. print(books[book_type])
  2776. count+=1
  2777. #print(books)
  2778. db[key] = books
  2779. with open('db.jon','w') as dbf:
  2780. json.dump(db, dbf, indent=4)
  2781. 36/44:
  2782. from bs4 import SoupStrainer
  2783. db = {}
  2784. count = 0
  2785. for key, value in subjects.items():
  2786. books = {}
  2787.  
  2788. for paragraph in value:
  2789. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2790. if len(book_type) > 0:
  2791. book_type = book_type[0].contents[0]
  2792. else:
  2793. book_type = 'Required'
  2794. if book_type not in books:
  2795. books[book_type] = []
  2796.  
  2797. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2798. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2799. if len(isbn) > 0:
  2800. isbn = isbn[0].contents[0].split(',')
  2801. book["ISBNs"] = isbn
  2802.  
  2803. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2804. book["title"] = title
  2805. if title == "The law of torts in Australia":
  2806. print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
  2807. print(book_type)
  2808.  
  2809. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2810. book["author"] = author
  2811.  
  2812. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  2813. if len(publishedDate) > 0:
  2814. book["publishedDate"] = publishedDate[0].contents[0]
  2815. else:
  2816. book["publishedDate"] = "unknown"
  2817.  
  2818.  
  2819. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2820. book["link"] = link
  2821. print(book)
  2822.  
  2823.  
  2824. books[book_type].append(book)
  2825.  
  2826. print(books[book_type])
  2827. count+=1
  2828. #print(books)
  2829. db[key] = books
  2830. with open('db.jon','w') as dbf:
  2831. json.dump(db, dbf, indent=4)
  2832. 36/45:
  2833. from bs4 import SoupStrainer
  2834. db = {}
  2835. count = 0
  2836. for key, value in subjects.items():
  2837. books = {}
  2838.  
  2839. for paragraph in value:
  2840. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2841. if len(book_type) > 0:
  2842. book_type = book_type[0].contents[0]
  2843. else:
  2844. book_type = 'Required'
  2845. if book_type not in books:
  2846. books[book_type] = []
  2847.  
  2848. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2849. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2850. if len(isbn) > 0:
  2851. isbn = isbn[0].contents[0].split(',')
  2852. book["ISBNs"] = isbn
  2853.  
  2854. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2855. book["title"] = title
  2856.  
  2857. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2858. book["author"] = author
  2859.  
  2860. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  2861. if len(publishedDate) > 0:
  2862. book["publishedDate"] = publishedDate[0].contents[0]
  2863. else:
  2864. book["publishedDate"] = "unknown"
  2865.  
  2866.  
  2867. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2868. book["link"] = link
  2869. print(book_type)
  2870. print()
  2871. print(book)
  2872. print()
  2873.  
  2874. books[book_type].append(book)
  2875.  
  2876. print(books[book_type])
  2877. count+=1
  2878. #print(books)
  2879. db[key] = books
  2880. with open('db.jon','w') as dbf:
  2881. json.dump(db, dbf, indent=4)
  2882. 36/46:
  2883. from bs4 import SoupStrainer
  2884. db = {}
  2885. count = 0
  2886. for key, value in subjects.items():
  2887. books = {}
  2888.  
  2889. for paragraph in value:
  2890. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2891. if len(book_type) > 0:
  2892. book_type = book_type[0].contents[0]
  2893. else:
  2894. book_type = 'Required'
  2895. if book_type not in books:
  2896. books[book_type] = []
  2897.  
  2898. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2899. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2900. if len(isbn) > 0:
  2901. isbn = isbn[0].contents[0].split(',')
  2902. book["ISBNs"] = isbn
  2903.  
  2904. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2905. book["title"] = title
  2906.  
  2907. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2908. book["author"] = author
  2909.  
  2910. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  2911. if len(publishedDate) > 0:
  2912. book["publishedDate"] = publishedDate[0].contents[0]
  2913. else:
  2914. book["publishedDate"] = "unknown"
  2915.  
  2916.  
  2917. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2918. book["link"] = link
  2919. print(book_type)
  2920. print()
  2921. print(book)
  2922. print()
  2923.  
  2924. books[book_type].append(book)
  2925.  
  2926. print(books)
  2927. count+=1
  2928. #print(books)
  2929. db[key] = books
  2930. with open('db.jon','w') as dbf:
  2931. json.dump(db, dbf, indent=4)
  2932. 36/47:
  2933. from bs4 import SoupStrainer
  2934. db = {}
  2935. count = 0
  2936. for key, value in subjects.items():
  2937. books = {}
  2938.  
  2939. for paragraph in value:
  2940. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2941. if len(book_type) > 0:
  2942. book_type = book_type[0].contents[0]
  2943. else:
  2944. book_type = 'Required'
  2945. if book_type not in books:
  2946. books[book_type] = []
  2947.  
  2948. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  2949. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  2950. if len(isbn) > 0:
  2951. isbn = isbn[0].contents[0].split(',')
  2952. book["ISBNs"] = isbn
  2953.  
  2954. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  2955. book["title"] = title
  2956.  
  2957. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  2958. book["author"] = author
  2959.  
  2960. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  2961. if len(publishedDate) > 0:
  2962. book["publishedDate"] = publishedDate[0].contents[0]
  2963. else:
  2964. book["publishedDate"] = "unknown"
  2965.  
  2966.  
  2967. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  2968. book["link"] = link
  2969. print(key)
  2970. print()
  2971. print(book_type)
  2972. print()
  2973. print(book)
  2974. print()
  2975.  
  2976. books[book_type].append(book)
  2977.  
  2978. print(books)
  2979. print()
  2980. count+=1
  2981. #print(books)
  2982. db[key] = books
  2983. with open('db.jon','w') as dbf:
  2984. json.dump(db, dbf, indent=4)
  2985. 36/48:
  2986. from bs4 import SoupStrainer
  2987. db = {}
  2988. count = 0
  2989. for key, value in subjects.items():
  2990. books = {}
  2991.  
  2992. for paragraph in value:
  2993. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  2994. if len(book_type) > 0:
  2995. book_type = book_type[0].contents[0]
  2996. else:
  2997. book_type = 'Required'
  2998. if book_type not in books:
  2999. books[book_type] = []
  3000.  
  3001. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  3002. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  3003. if len(isbn) > 0:
  3004. isbn = isbn[0].contents[0].split(',')
  3005. book["ISBNs"] = isbn
  3006.  
  3007. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  3008. book["title"] = title
  3009.  
  3010. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  3011. book["author"] = author
  3012.  
  3013. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  3014. if len(publishedDate) > 0:
  3015. book["publishedDate"] = publishedDate[0].contents[0]
  3016. else:
  3017. book["publishedDate"] = "unknown"
  3018.  
  3019.  
  3020. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  3021. book["link"] = link
  3022. #print(key)
  3023. #print()
  3024. #print(book_type)
  3025. #print()
  3026. #print(book)
  3027. #print()
  3028.  
  3029. books[book_type].append(book)
  3030.  
  3031. #print(books)
  3032. #print()
  3033. count+=1
  3034. db[key] = books
  3035. if "PHIL1002" in db:
  3036. print(db["PHIL1002"])
  3037. with open('db.jon','w') as dbf:
  3038. json.dump(db, dbf, indent=4)
  3039. 36/49:
  3040. from bs4 import SoupStrainer
  3041. db = {}
  3042. count = 0
  3043. for key, value in subjects.items():
  3044. books = {}
  3045.  
  3046. for paragraph in value:
  3047. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  3048. if len(book_type) > 0:
  3049. book_type = book_type[0].contents[0]
  3050. else:
  3051. book_type = 'Required'
  3052. if book_type not in books:
  3053. books[book_type] = []
  3054.  
  3055. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  3056. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  3057. if len(isbn) > 0:
  3058. isbn = isbn[0].contents[0].split(',')
  3059. book["ISBNs"] = isbn
  3060.  
  3061. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  3062. book["title"] = title
  3063.  
  3064. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  3065. book["author"] = author
  3066.  
  3067. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  3068. if len(publishedDate) > 0:
  3069. book["publishedDate"] = publishedDate[0].contents[0]
  3070. else:
  3071. book["publishedDate"] = "unknown"
  3072.  
  3073.  
  3074. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  3075. book["link"] = link
  3076. #print(key)
  3077. #print()
  3078. #print(book_type)
  3079. #print()
  3080. #print(book)
  3081. #print()
  3082.  
  3083. books[book_type].append(book)
  3084.  
  3085. #print(books)
  3086. #print()
  3087. count+=1
  3088. db[key] = books
  3089. print(count)
  3090. with open('db.jon','w') as dbf:
  3091. json.dump(db, dbf, indent=4)
  3092. 36/50:
  3093. from bs4 import SoupStrainer
  3094. db = {}
  3095. count = 0
  3096. for key, value in subjects.items():
  3097. books = {}
  3098.  
  3099. for paragraph in value:
  3100. book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
  3101. if len(book_type) > 0:
  3102. book_type = book_type[0].contents[0]
  3103. else:
  3104. book_type = 'Required'
  3105. if book_type not in books:
  3106. books[book_type] = []
  3107.  
  3108. book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
  3109. isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
  3110. if len(isbn) > 0:
  3111. isbn = isbn[0].contents[0].split(',')
  3112. book["ISBNs"] = isbn
  3113.  
  3114. title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
  3115. book["title"] = title
  3116.  
  3117. author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
  3118. book["author"] = author
  3119.  
  3120. publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
  3121. if len(publishedDate) > 0:
  3122. book["publishedDate"] = publishedDate[0].contents[0]
  3123. else:
  3124. book["publishedDate"] = "unknown"
  3125.  
  3126.  
  3127. link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
  3128. book["link"] = link
  3129. #print(key)
  3130. #print()
  3131. #print(book_type)
  3132. #print()
  3133. #print(book)
  3134. #print()
  3135.  
  3136. books[book_type].append(book)
  3137.  
  3138. #print(books)
  3139. #print()
  3140. count+=1
  3141. db[key] = books
  3142. print(count)
  3143. with open('db.json','w') as dbf:
  3144. json.dump(db, dbf, indent=4)
  3145. 36/51: db["INFS1200"]
  3146. 36/52: db["PHIL1002"]
  3147. 36/53:
  3148. with open("subjects_books.json", 'w') as sbj:
  3149. json.dumps(db, sbj, indent=4)
  3150. 36/54: db
  3151. 36/55:
  3152. with open("subjects_books.json", 'w') as sbj:
  3153. json.dumps(db, sbj, indent=4)
  3154. 36/56:
  3155. with open("subjects_books.json", 'w') as sbj:
  3156. json.dumps(db, sbj)
  3157. 36/57:
  3158. with open("subjects_books.json", 'w') as sbj:
  3159. sbj.write(json.dumps(db))
  3160. 37/1: f = open('subjects_books.json','r)
  3161. 37/2: import json
  3162. 37/3: f = json.loads(open('subjects_books.json','r'))
  3163. 37/4: f = json.loads(open('subjects_books.json','r').read())
  3164. 37/5: f
  3165. 37/6:
  3166. for subject, book_type in f:
  3167. print(subject)
  3168. 37/7:
  3169. for subject, book_type in f.items():
  3170. print(subject)
  3171. 37/8:
  3172. for subject, book_type in f.items():
  3173. print(book_type)
  3174. 37/9:
  3175. for subject, book_types in f.items():
  3176. for book_type, books in book_types:
  3177. print(books["link"])
  3178. 37/10:
  3179. for subject, book_types in f.items():
  3180. for book_type, books in book_types.items():
  3181. print(books["link"])
  3182. 37/11:
  3183. for subject, book_types in f.items():
  3184. for book_type, books in book_types.items():
  3185. for book in books:
  3186. print(book["link"])
  3187. 37/12:
  3188. for subject, book_types in f.items():
  3189. for book_type, books in book_types.items():
  3190. for book in books:
  3191. print(book["link"])
  3192. print(subject + " " + book_type)
  3193. 37/13:
  3194. for subject, book_types in f.items():
  3195. for book_type, books in book_types.items():
  3196. for book in books:
  3197. print(book["link"])
  3198. print("\tdir=" + subject + " " + book_type)
  3199. 37/14:
  3200. for subject, book_types in f.items():
  3201. for book_type, books in book_types.items():
  3202. for book in books:
  3203. print(book["link"])
  3204. print("\tdir=" + subject + "/" + book_type)
  3205. 37/15: dls = ""
  3206. 37/16:
  3207. for subject, book_types in f.items():
  3208. for book_type, books in book_types.items():
  3209. for book in books:
  3210. dls+= book["link"]
  3211. dls+= "\tdir=" + subject + "/" + book_type
  3212. 37/17: dls
  3213. 37/18:
  3214. for subject, book_types in f.items():
  3215. for book_type, books in book_types.items():
  3216. for book in books:
  3217. dls+= book["link"]
  3218. dls+='\n'
  3219. dls+= "\tdir=" + subject + "/" + book_type
  3220. dls+='\n'
  3221. 37/19: dls
  3222. 37/20: dls = ''
  3223. 37/21:
  3224. for subject, book_types in f.items():
  3225. for book_type, books in book_types.items():
  3226. for book in books:
  3227. dls+= book["link"]
  3228. dls+='\n'
  3229. dls+= "\tdir=" + subject + "/" + book_type
  3230. dls+='\n'
  3231. 37/22: dls
  3232. 37/23:
  3233. with open('dls','w') as dl:
  3234. dl.write(dls)
  3235. 38/1: db["PHIL1002"]
  3236. 39/1: from bs4 import BeautifulSoup
  3237. 39/2: import json
  3238. 39/3: import os
  3239. 39/4:
  3240. for folder in os.listdit():
  3241. print(folder)
  3242. 39/5:
  3243. for folder in os.listdir():
  3244. print(folder)
  3245. 39/6:
  3246. for folder in os.listdir():
  3247. for book_type in os.listdir(folder):
  3248. print(book_type)
  3249. 39/7:
  3250. for folder in os.listdir():
  3251. for book_type in os.listdir(folder):
  3252. for page in os.listdir(folder + "/" + book_type):
  3253. print(page)
  3254. 39/8:
  3255. for folder in os.listdir():
  3256. for book_type in os.listdir(folder):
  3257. for page in os.listdir(folder + "/" + book_type):
  3258. with open(folder + "/" + book_type + "/" + page_file, 'r').read() as f:
  3259. BeautifulSoup(f)
  3260. 39/9:
  3261. for folder in os.listdir():
  3262. for book_type in os.listdir(folder):
  3263. for page in os.listdir(folder + "/" + book_type):
  3264. with open(folder + "/" + book_type + "/" + page, 'r').read() as f:
  3265. BeautifulSoup(f)
  3266. 39/10:
  3267. for folder in os.listdir():
  3268. for book_type in os.listdir(folder):
  3269. for page in os.listdir(folder + "/" + book_type):
  3270. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3271. BeautifulSoup(f.read())
  3272. 39/11:
  3273. for folder in os.listdir():
  3274. for book_type in os.listdir(folder):
  3275. for page in os.listdir(folder + "/" + book_type):
  3276. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3277. print(BeautifulSoup(f.read()))
  3278. 39/12:
  3279. for folder in os.listdir():
  3280. for book_type in os.listdir(folder):
  3281. for page in os.listdir(folder + "/" + book_type):
  3282. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3283. soup = BeautifulSoup(f.read())
  3284. 39/13:
  3285. for folder in os.listdir():
  3286. for book_type in os.listdir(folder):
  3287. for page in os.listdir(folder + "/" + book_type):
  3288. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3289. soup = BeautifulSoup(f.read())
  3290. title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
  3291. print(title)
  3292. 39/14:
  3293. for folder in os.listdir():
  3294. for book_type in os.listdir(folder):
  3295. for page in os.listdir(folder + "/" + book_type):
  3296. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3297. soup = BeautifulSoup(f.read())
  3298. title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
  3299. 39/15:
  3300. for folder in os.listdir():
  3301. for book_type in os.listdir(folder):
  3302. for page in os.listdir(folder + "/" + book_type):
  3303. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3304. soup = BeautifulSoup(f.read())
  3305. title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
  3306. 39/16:
  3307. for folder in os.listdir():
  3308. for book_type in os.listdir(folder):
  3309. for page in os.listdir(folder + "/" + book_type):
  3310. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3311. soup = BeautifulSoup(f.read())
  3312. title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
  3313. 39/17:
  3314. for folder in os.listdir():
  3315. for book_type in os.listdir(folder):
  3316. for page in os.listdir(folder + "/" + book_type):
  3317. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3318. soup = BeautifulSoup(f.read())
  3319. title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
  3320.  
  3321. authors = ''
  3322. 39/18:
  3323. for folder in os.listdir():
  3324. for book_type in os.listdir(folder):
  3325. for page in os.listdir(folder + "/" + book_type):
  3326. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3327. soup = BeautifulSoup(f.read())
  3328. title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
  3329.  
  3330. authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
  3331. 39/19:
  3332. for folder in os.listdir():
  3333. for book_type in os.listdir(folder):
  3334. for page in os.listdir(folder + "/" + book_type):
  3335. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3336. soup = BeautifulSoup(f.read())
  3337. title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
  3338.  
  3339. authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
  3340. print(authors)
  3341. 39/20:
  3342. for folder in os.listdir():
  3343. for book_type in os.listdir(folder):
  3344. for page in os.listdir(folder + "/" + book_type):
  3345. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3346. soup = BeautifulSoup(f.read())
  3347. title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
  3348.  
  3349. authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
  3350.  
  3351. publishedDate = soup.findAll('span', {'id':"fieldValue-date"})
  3352. print(publishedDate)
  3353. 39/21:
  3354. for folder in os.listdir():
  3355. for book_type in os.listdir(folder):
  3356. for page in os.listdir(folder + "/" + book_type):
  3357. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3358. soup = BeautifulSoup(f.read())
  3359. title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
  3360.  
  3361. authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
  3362.  
  3363. publishedDate = soup.findAll('span', {'id':"fieldValue-date"})[0].contents[0]
  3364. print(publishedDate)
  3365. 39/22:
  3366. for folder in os.listdir():
  3367. for book_type in os.listdir(folder):
  3368. for page in os.listdir(folder + "/" + book_type):
  3369. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3370. soup = BeautifulSoup(f.read())
  3371. title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
  3372.  
  3373. authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
  3374.  
  3375. publishedDate = soup.findAll('span', {'id':"fieldValue-date"})[0].contents[0]
  3376.  
  3377. revision = soup.findAll('span', {'id':"fieldValue-edition"})
  3378. if len(revision) > 0:
  3379. revision = revision[0].contents[0]
  3380. else:
  3381. revision = "0"
  3382.  
  3383.  
  3384. print(revision)
  3385. 39/23:
  3386. for folder in os.listdir():
  3387. for book_type in os.listdir(folder):
  3388. for page in os.listdir(folder + "/" + book_type):
  3389. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3390. soup = BeautifulSoup(f.read())
  3391. title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
  3392.  
  3393. authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
  3394.  
  3395. publishedDate = soup.findAll('span', {'id':"fieldValue-date"})[0].contents[0]
  3396.  
  3397. edition = soup.findAll('span', {'id':"fieldValue-edition"})
  3398. if len(edition) > 0:
  3399. edition = edition[0].edition[0]
  3400. else:
  3401. edition = "0"
  3402.  
  3403.  
  3404. print(edition)
  3405. 39/24:
  3406. for folder in os.listdir():
  3407. for book_type in os.listdir(folder):
  3408. for page in os.listdir(folder + "/" + book_type):
  3409. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3410. soup = BeautifulSoup(f.read())
  3411. title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
  3412.  
  3413. authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
  3414.  
  3415. publishedDate = soup.findAll('span', {'id':"fieldValue-date"})[0].contents[0]
  3416.  
  3417. edition = soup.findAll('span', {'id':"fieldValue-edition"})
  3418. if len(edition) > 0:
  3419. edition = edition[0].contents[0]
  3420. else:
  3421. edition = "0"
  3422.  
  3423.  
  3424. print(edition)
  3425. 39/25:
  3426. for folder in os.listdir():
  3427. for book_type in os.listdir(folder):
  3428. for page in os.listdir(folder + "/" + book_type):
  3429. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3430. soup = BeautifulSoup(f.read())
  3431. print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
  3432. print(folder)
  3433. print(book_type)
  3434.  
  3435. title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
  3436. print(title)
  3437.  
  3438. authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
  3439. print(authors)
  3440.  
  3441. publishedDate = soup.findAll('span', {'id':"fieldValue-date"})[0].contents[0]
  3442. print(publishedDate)
  3443.  
  3444. edition = soup.findAll('span', {'id':"fieldValue-edition"})
  3445. if len(edition) > 0:
  3446. edition = edition[0].contents[0]
  3447. else:
  3448. edition = "0"
  3449.  
  3450. print(edition)
  3451. 39/26:
  3452. for folder in os.listdir():
  3453. for book_type in os.listdir(folder):
  3454. for page in os.listdir(folder + "/" + book_type):
  3455. with open(folder + "/" + book_type + "/" + page, 'r') as f:
  3456. soup = BeautifulSoup(f.read())
  3457. print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
  3458. print(folder)
  3459. print(book_type)
  3460.  
  3461. title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
  3462. print(title)
  3463.  
  3464. authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
  3465. print(authors)
  3466.  
  3467. publishedDate = soup.findAll('span', {'id':"fieldValue-date"})[0].contents[0]
  3468. print(publishedDate)
  3469.  
  3470. edition = soup.findAll('span', {'id':"fieldValue-edition"})
  3471. if len(edition) > 0:
  3472. edition = edition[0].contents[0]
  3473. else:
  3474. edition = "0"
  3475.  
  3476. print(edition)
  3477. 40/1: from bs4 import BeautifulSoup
  3478. 40/2:
  3479. with open('uq/ANCH3030/Further/F26E2724-F82F-019D-99AD-692F43F4643A.html') as f:
  3480. soup = BeautifulSoup(f.read().decode('utf-8','ignore'))
  3481. 40/3:
  3482. with open('uq/ANCH3030/Further/F26E2724-F82F-019D-99AD-692F43F4643A.html') as f:
  3483. soup = BeautifulSoup(f.read(decode('utf-8','ignore')))
  3484. 40/4:
  3485. with open('uq/ANCH3030/Further/F26E2724-F82F-019D-99AD-692F43F4643A.html') as f:
  3486. soup = BeautifulSoup(f.read(decode='utf-8')))
  3487. 40/5:
  3488. with open('uq/ANCH3030/Further/F26E2724-F82F-019D-99AD-692F43F4643A.html') as f:
  3489. soup = BeautifulSoup(f.read())
  3490. 40/6:
  3491. with open('uq/ANCH3030/Further/F26E2724-F82F-019D-99AD-692F43F4643A.html').read() as f:
  3492. soup = BeautifulSoup(f)
  3493. 40/7:
  3494. with open('uq/ANCH3030/Further/F26E2724-F82F-019D-99AD-692F43F4643A.html','r').read() as f:
  3495. soup = BeautifulSoup(f)
  3496. 40/8:
  3497. with open('uq/ANCH3030/Further/F26E2724-F82F-019D-99AD-692F43F4643A.html','r', encoding='utf-8').read() as f:
  3498. soup = BeautifulSoup(f)
  3499. 41/1: import sandman2
  3500. 41/2: sandman2.db
  3501. 42/1: import sqlite3
  3502. 42/2: conn = sqlite3.connect('database.db')
  3503. 42/3: c = conn.cursor()
  3504. 42/4:
  3505. from random import randint
  3506. c.execute("INSERT INTO Book_submissions(bookID, message, price, edition) VALUES (? , ? , ? , ?)" , randint(4,1000), "e@mail.com", "$100", "2nd")
  3507. 42/5:
  3508. from random import randint
  3509. c.execute("INSERT INTO Book_submissions(bookID, message, price, edition) VALUES (? , ? , ? , ?)" , (randint(4,1000), "e@mail.com", "$100", "2nd"))
  3510. 42/6: conn.commit()
  3511. 42/7: conn.close()
  3512. 42/8:
  3513. from random import randint
  3514. for i in range(4000):
  3515. c.execute("INSERT INTO Book_submissions(bookID, message, price, edition) VALUES (? , ? , ? , ?)" , (randint(4,1000), "e@mail.com", "$100", "2nd"))
  3516. 42/9: conn = sqlite3.connect('database.db')
  3517. 42/10: c = conn.cursor()
  3518. 42/11:
  3519. from random import randint
  3520. for i in range(4000):
  3521. c.execute("INSERT INTO Book_submissions(bookID, message, price, edition) VALUES (? , ? , ? , ?)" , (randint(4,1000), "e@mail.com", "$100", "2nd"))
  3522. 42/12:
  3523. from random import randint
  3524. for i in range(4000):
  3525. c.execute("INSERT INTO Book_submissions(bookID, message, price, edition) VALUES (? , ? , ? , ?)" , (randint(4,100), "e@mail.com", "$100", "2nd"))
  3526. 42/13:
  3527. from random import randint
  3528. for i in range(4000):
  3529. c.execute("INSERT INTO Book_submissions(bookID, message, price, edition) VALUES (? , ? , ? , ?)" , (randint(50), "e@mail.com", "$100", "2nd"))
  3530. 42/14: conn.commit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement