Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- 28/1:
- from BeautifulSoup import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- print link['href']
- 28/2:
- from BeautifulSoup import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- print(link['href'])
- 28/3:
- from BeautifulSoup import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- print(link['href'])
- 28/4:
- from BeautifulSoup4 import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- print(link['href'])
- 28/5:
- from bs4 import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- print(link['href'])
- 28/6:
- from bs4 import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- for link in BeautifulSoup(f, parse_only=SoupStrainer('a')):
- if link.has_attr('href'):
- print(link['href'])
- 28/7:
- from bs4 import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- print(link['href'])
- 28/8:
- from bs4 import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- for link in BeautifulSoup(f, parse_only=SoupStrainer('a')):
- if link.has_attr('href'):
- print(link['href'])
- 28/9:
- from bs4 import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- for link in BeautifulSoup(f, "html.parser"):
- if link.has_attr('href'):
- print(link['href'])
- 28/10:
- from bs4 import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- print(link['href'])
- 29/1:
- from bs4 import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- print(link['href'])
- 29/2:
- from bs4 import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- links.append(link['href'])
- 29/3:
- from bs4 import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- links.append(link['href'])
- 29/4:
- from bs4 import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- links.append(link['href'])
- 29/5:
- from bs4 import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- links.append(link['href'])
- links = [link for link in links if link[:30] == "https://uq.rl.talis.com/lists/"]
- 29/6: links
- 29/7:
- from bs4 import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- links.append(link['href'])
- link = [link for link in links if link[:30] == "https://uq.rl.talis.com/lists/"][0]
- 29/8: link
- 29/9:
- from bs4 import BeautifulSoup, SoupStrainer
- f = open('infs1200.html','r').read()
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- link = [link for link in page_links if link[:30] == "https://uq.rl.talis.com/lists/"][0]
- 29/10: link
- 29/11:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- for i in os.listdir(os.getcwd()):
- f = open(i,'r').read()
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- link = [link for link in page_links if link[:30] == "https://uq.rl.talis.com/lists/"][0]
- print(link)
- 29/12:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- for i in os.listdir(os.getcwd()):
- print(i)
- f = open(i,'r').read()
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- link = [link for link in page_links if link[:30] == "https://uq.rl.talis.com/lists/"][0]
- print(link)
- 29/13:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- for i in os.listdir(os.getcwd()):
- print(i)
- f = open(i,'r').read()
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- print(page_links)
- link = [link for link in page_links if link[:30] == "https://uq.rl.talis.com/lists/"][0]
- print(link)
- 29/14:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- for i in os.listdir(os.getcwd()):
- print(i)
- f = open(i,'r').read()
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- print(page_links)
- link = [link for link in page_links if link[:30] == "https://uq.rl.talis.com/lists/"]
- if len(link) > 0:
- print(link[0])
- 29/15:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- for i in os.listdir(os.getcwd()):
- print(i)
- f = open(i,'r').read()
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- #print(page_links)
- link = [link for link in page_links if link[:30] == "https://uq.rl.talis.com/lists/"]
- if len(link) > 0:
- print(link[0])
- 29/16:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir(os.getcwd()):
- f = open(i,'r').read()
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- #print(page_links)
- link = [link for link in page_links if link[:30] == "https://uq.rl.talis.com/lists/"]
- if len(link) > 0:
- links.append(link[0])
- 29/17:
- with open('../new', mode='wt', encoding='utf-8') as myfile:
- myfile.write('\n'.join(lines))
- 29/18:
- with open('../new', mode='wt', encoding='utf-8') as myfile:
- myfile.write('\n'.join(links))
- 29/19: os.listdir()
- 29/20: os.di()
- 29/21: os.dir()
- 29/22: os.pwd()
- 29/23: os.path
- 29/24: os.getcwd()
- 29/25: os.chdir('..')
- 29/26: os.getcwd()
- 29/27: f = open('rip2/0A3C88A9-61D3-82EC-8861-3A4FBD03BABC.html','r').read()
- 29/28: f
- 29/29:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open(i,'r').read()
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- #print(page_links)
- link = [link for link in page_links if link[:35] == "https://uq.rl.talis.com/courses/"]
- if len(link) > 0:
- links.append(link[0])
- 29/30:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- #print(page_links)
- link = [link for link in page_links if link[:35] == "https://uq.rl.talis.com/courses/"]
- if len(link) > 0:
- links.append(link[0])
- 29/31:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- #print(page_links)
- link = [link for link in page_links if link[:35] == "https://uq.rl.talis.com/courses/"]
- if len(link) > 0:
- print(link[0])
- 29/32:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- #print(page_links)
- link = [link for link in page_links if link[:35] == "https://uq.rl.talis.com/courses/"]
- if len(link) > 0:
- print(link[0])
- else:
- print(link)
- 29/33:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- #print(page_links)
- link = [link for link in page_links if link[:33] == "https://uq.rl.talis.com/courses/"]
- if len(link) > 0:
- print(link[0])
- else:
- print(link)
- 29/34:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- print(page_links)
- link = [link for link in page_links if link[:33] == "https://uq.rl.talis.com/courses/"]
- if len(link) > 0:
- print(link[0])
- else:
- print(link)
- 29/35:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string
- print(title)
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- print(page_links)
- link = [link for link in page_links if link[:33] == "https://uq.rl.talis.com/courses/"]
- if len(link) > 0:
- print(link[0])
- else:
- print(link)
- 29/36:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- print(page_links)
- link = [link for link in page_links if link[:33] == "https://uq.rl.talis.com/courses/"]
- if len(link) > 0:
- print(link[0])
- else:
- print(link)
- 29/37:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("p", { "class" : "itemBibData" })
- 29/38:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("p", { "class" : "itemBibData" })
- print(paragraphs[0])
- 29/39:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- subjects =
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- page_links = []
- for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- 29/40:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- subjects =
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- page_links = []
- for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- 29/41:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- subjects =
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- page_links = []
- for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- 29/42:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- subjects = {}
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- page_links = []
- for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- 29/43:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- subjects = {}
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- page_links = []
- for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- print(page_links)
- 29/44:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- subjects = {}
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- print(page_links)
- 29/45:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- subjects = {}
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- link = [link for link in page_links if link[:31] == "https://uq.rl.talis.com/items/"]
- print(link)
- 29/46:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- subjects = {}
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- link = [link for link in page_links if link[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/"]
- print(link)
- 29/47:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- subjects = {}
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- link = [link for link in page_links if link[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/"]
- print(link)
- 29/48:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- page_links = []
- for link in BeautifulSoup(f, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- page_links.append(link['href'])
- page_links = [link for link in page_links if link[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/"]
- for link in page_links:
- links.append(link)
- print(i)
- 29/49: len(links)
- 29/50:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = findAll("div", { "class" : "item" })
- print(paragraphs)
- 29/51:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("div", { "class" : "item" })
- print(paragraphs)
- 29/52:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- print(paragraphs)
- 29/53:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- print('~~~~~~~~~~~~~~~~~~')
- print(paragraphs)
- print('~~~~~~~~~~~~~~~~~~')
- 29/54:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- print('~~~~~~~~~~~~~~~~~~')
- print(paragraphs[0])
- print('~~~~~~~~~~~~~~~~~~')
- 29/55:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- 29/56:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- for paragraph in paragraphs:
- if len([if type == "Book" for type in BeautifulSoup(paragraph).findAll("span", {"class" : "resourceType label"})]) > 0:
- print("yes")
- 29/57:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- for paragraph in paragraphs:
- if len([type for type in BeautifulSoup(paragraph).findAll("span", {"class" : "resourceType label"}) if type == "Book"]) > 0:
- print("yes")
- 29/58:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- for paragraph in paragraphs:
- if len([type for type in BeautifulSoup(paragraph).findAll("span", {"class" : "resourceType label"}) if type.content == "Book"]) > 0:
- print("yes")
- 29/59:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- for paragraph in paragraphs:
- if len([type for type in BeautifulSoup(strparagraph)).findAll("span", {"class" : "resourceType label"}) if type.content == "Book"]) > 0:
- print("yes")
- 29/60:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- for paragraph in paragraphs:
- if len([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"}) if type.content == "Book"]) > 0:
- print("yes")
- 29/61:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- for paragraph in paragraphs:
- print([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"}) if type.content == "Book"]))
- 29/62:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- for paragraph in paragraphs:
- print([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"}) if type.content == "Book"])
- 29/63:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- for paragraph in paragraphs:
- print([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})])
- 29/64:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- for paragraph in paragraphs:
- print([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"}).contents])
- 29/65:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- for paragraph in paragraphs:
- print([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"}).contents])
- 29/66:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- for paragraph in paragraphs:
- print([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents])
- 29/67:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- for paragraph in paragraphs:
- print([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents[0]])
- 29/68:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- for paragraph in paragraphs:
- print([paragraph for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents if type == "Book"])
- 29/69:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- links = []
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- for paragraph in paragraphs:
- if len([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents if type == "Book"]) > 0:
- print("yes")
- else:
- print("no")
- 29/70:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- subjects = {}
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- books = []
- for paragraph in paragraphs:
- if len([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents if type == "Book"]) > 0:
- books.append(paragraph)
- print(books)
- subjects[title] = books
- 29/71:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- subjects = {}
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- books = []
- for paragraph in paragraphs:
- if len([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents if type == "Book"]) > 0:
- books.append(paragraph)
- print(i)
- subjects[title] = books
- 29/72:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- subjects = {}
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- books = []
- for paragraph in paragraphs:
- if len([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"}) if len(type) > 0 and type[0].contents == "Book"]) > 0:
- books.append(paragraph)
- print(i)
- subjects[title] = books
- 29/73:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- subjects = {}
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- books = []
- for paragraph in paragraphs:
- try:
- if len([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents if type == "Book"]) > 0:
- books.append(paragraph)
- except:
- pass
- print(books)
- subjects[title] = books
- 29/74:
- from bs4 import BeautifulSoup, SoupStrainer
- import os
- subjects = {}
- for i in os.listdir('rip2'):
- f = open('rip2/' + i,'r').read()
- page_links = []
- title = BeautifulSoup(f).title.string.split(' ')[0]
- print(title)
- paragraphs = BeautifulSoup(f).findAll("li", { "class" : "item" })
- books = []
- for paragraph in paragraphs:
- try:
- if len([type for type in BeautifulSoup(str(paragraph)).findAll("span", {"class" : "resourceType label"})[0].contents if type == "Book"]) > 0:
- books.append(paragraph)
- except:
- pass
- print(i)
- subjects[title] = books
- 29/75: subjects
- 29/76: subjects["infs1200"]
- 29/77: len(subjects)
- 29/78:
- for key, value in dict.iteritems():
- temp = [key,value]
- print(temp)
- 29/79:
- for key, value in subjects.iteritems():
- temp = [key,value]
- print(temp)
- 29/80:
- for key, value in subjects.items():
- temp = [key,value]
- print(temp)
- 29/81:
- for key, value in subjects.items():
- temp = [key,value]
- print(key)
- 29/82: subjects["INFS1200"]
- 29/83: len(subjects)
- 29/84: import json
- 29/85:
- with open('json.json','w') as js:
- js.write(json.dumps(subjects))
- 29/86:
- with open('json.json','w') as js:
- json.dumps(subjects,js)
- 29/87: type(subjects)
- 29/88:
- with open('json.json','w') as js:
- json.dump(subjects,js)
- 29/89:
- with open('json.json','w') as js:
- json.dump(dict(subjects),js)
- 29/90: len(subjects["LAWS1100"])
- 29/91:
- def save_obj(obj, name ):
- with open('obj/'+ name + '.pkl', 'wb') as f:
- pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
- def load_obj(name ):
- with open('obj/' + name + '.pkl', 'rb') as f:
- return pickle.load(f)
- 29/92: save_obj(subjects, "subjects_dump")
- 29/93: save_obj(subjects, "subjects_dump")
- 30/1: subjects
- 29/94: subjects["INFS1200"]
- 29/95: subjects[0]
- 29/96:
- with open('json.json','w') as js:
- json.dump(dict(subjects),js)
- 29/97: type(subjects["INFS1200"])
- 29/98: type(subjects["INFS1200"][0])
- 29/99: str(subjects["INFS1200"][0])
- 29/100:
- for key, value in subjects.items():
- temp = [key,value]
- print(key)
- 29/101:
- for key, value in subjects.items():
- print(value)
- 29/102:
- subj_url = {}
- for key, value in subjects.items():
- urls = []
- print(value)
- 29/103:
- subj_url = {}
- for key, value in subjects.items():
- urls = []
- for paragraph in value:
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- print(link['href'])
- 29/104: new_subjects = {}
- 29/105:
- for key, value in subjects.items():
- new_subjects[key] = [str(paragraph) for paragraph in value]
- 29/106:
- with open('json.json','w') as js:
- json.dump(dict(new_subjects),js)
- 31/1: import json
- 31/2:
- with open('json.json','r') as js:
- json.load(js)
- 31/3: import os
- 31/4: os.getcwd()
- 32/1:
- with open('json.json','r') as js:
- json.load(js)
- 32/2: import json
- 32/3:
- with open('json.json','r') as js:
- json.load(js)
- 32/4:
- subjects = {}
- with open('json.json','r') as js:
- subjects = json.load(js)
- 32/5: subjects
- 32/6: subjects["INFS2200"]
- 32/7: from bs4 import BeautifulSoup
- 32/8:
- for key, value in subjects.items():
- new_subjects[key] = [str(paragraph) for paragraph in value]
- 32/9:
- new_subjects = {}
- for key, value in subjects.items():
- new_subjects[key] = [str(paragraph) for paragraph in value]
- 32/10:
- subj_url = {}
- for key, value in subjects.items():
- urls = []
- for paragraph in value:
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- print(link['href'])
- 32/11:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = []
- for paragraph in value:
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- print(link['href'])
- 32/12:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = []
- for paragraph in value:
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href'])
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls.append(url)
- print(key)
- print(urls)
- 32/13:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = []
- for paragraph in value:
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls.append(url)
- print(key)
- print(urls)
- 32/14:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = []
- for paragraph in value:
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls.append(url)
- print(key)
- print(urls)
- 32/15: subjects["INFS2200"]
- 32/16: "Required" in subjects["INFS2200"][0]
- 32/17: "Required" in subjects["INFS2200"][1]
- 32/18: BeautifulSoup(subjects["INFS2200"][0])
- 32/19: BeautifulSoup(subjects["INFS2200"][0], "html.parser")
- 32/20: BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong")
- 32/21: BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong").contents
- 32/22: BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong")[0].contents
- 32/23: type(BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong")[0].contents)
- 32/24: BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong")[0].contents[0]
- 32/25:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- book_type = BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong")[0].contents[0]
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- print(key)
- print(urls)
- 32/26:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- book_type = BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong")[0].contents[0]
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- print(key)
- print(urls)
- 32/27:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- book_type = BeautifulSoup(subjects["INFS2200"][0], "html.parser").findAll("strong")[0].contents[0]
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 32/28: subj_url
- 32/29: subj_url["INFS1200"]
- 32/30:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")[0].contents[0]
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 32/31:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")[0].contents[0]
- print(book_type)
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 32/32:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")[0].contents[0]
- print(book_type)
- print(key)
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 32/33:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- print(paragraph)
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")[0].contents[0]
- print(book_type)
- print(key)
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 32/34:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
- print(paragraph)
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")[0].contents[0]
- print(book_type)
- print(key)
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 32/35:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
- print(key)
- print(paragraph)
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")[0].contents[0]
- print(book_type)
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 32/36:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
- print(key)
- print(paragraph)
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")[0].contents
- if len(book_type) > 0:
- book_type = book_type[0]
- else:
- book_type = 'Required'
- print(book_type)
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 32/37:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
- print(key)
- print(paragraph)
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- print(book_type)
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 32/38:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- print(book_type)
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 32/39:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 32/40: subj_url["INFS1200"]
- 32/41: subj_url["LAWS1100"]
- 32/42:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- print("oh")
- book_type = book_type[0].contents[0]
- else:
- print("boy")
- book_type = 'Required'
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 32/43:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- print(book_type)
- subj_url[key] = urls
- 32/44:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- print(urls)
- subj_url[key] = urls
- 32/45:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 32/46: subj_url["INFS1200"]
- 32/47: subj_url["LAND3007"]
- 32/48: subj_url["LAWS1112"]
- 32/49: count = 0
- 32/50:
- for key, value in subj_url:
- for key2, value2 in value:
- count+=len(value2)
- 32/51:
- for key, value in subj_url.items():
- for key2, value2 in value:
- count+=len(value2)
- 32/52:
- for key, value in subj_url.items():
- for key2, value2 in value.items():
- count+=len(value2)
- 32/53: count
- 32/54: import os
- 32/55: os.getcwd()
- 32/56: os.chdir('rip3')
- 32/57:
- for key, value in subj_url:
- os.mkdir(key)
- 32/58:
- for key, value in subj_url.items():
- os.mkdir(key)
- 32/59:
- for key, value in subj_url.items():
- print(key)
- os.mkdir(key)
- 32/60:
- for key, value in subj_url.items():
- print(key)
- 32/61: subj_url['']
- 32/62:
- for key, value in subj_url.items():
- if key != '':
- os.mkdir(key)
- 32/63:
- for key, value in subj_url.items():
- if key != '':
- for key2, value2 in value.items():
- os.mkdir('key/' + key2)
- 32/64:
- for key, value in subj_url.items():
- if key != '':
- for key2, value2 in value.items():
- os.mkdir(key + '/' + key2)
- 32/65:
- for key, value in subj_url.items():
- if key != '':
- for key2, value2 in value.items():
- print("wget " + value2 + '-O ' key + '/' + key2)
- 32/66:
- for key, value in subj_url.items():
- if key != '':
- for key2, value2 in value.items():
- print("wget " + value2 + '-O '+ key + '/' + key2)
- 32/67:
- for key, value in subj_url.items():
- if key != '':
- for key2, value2 in value.items():
- for url in value2:
- print("wget " + item + '-O '+ key + '/' + key2)
- 32/68:
- for key, value in subj_url.items():
- if key != '':
- for key2, value2 in value.items():
- for url in value2:
- print("wget " + url + '-O '+ key + '/' + key2)
- 32/69:
- for key, value in subj_url.items():
- if key != '':
- for key2, value2 in value.items():
- for url in value2:
- print("wget " + url + ' -O '+ key + '/' + key2)
- 32/70:
- for key, value in subj_url.items():
- if key != '':
- for key2, value2 in value.items():
- for url in value2:
- print("wget " + url + ' -P '+ key + '/' + key2)
- 32/71:
- for key, value in subj_url.items():
- if key != '':
- for key2, value2 in value.items():
- for url in value2:
- os.system("wget " + url + ' -P '+ key + '/' + key2)
- 35/1:
- subjects = {}
- with open('json.json','r') as js:
- subjects = json.load(js)
- 35/2: os.chdir('git/uqexchange/
- 35/3: os.chdir('git/uqexchange/')
- 35/4: import os
- 35/5: os.chdir('git/uqexchange/')
- 35/6:
- subjects = {}
- with open('json.json','r') as js:
- subjects = json.load(js)
- 35/7: import json
- 35/8:
- subjects = {}
- with open('json.json','r') as js:
- subjects = json.load(js)
- 35/9:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 35/10: from bs4 import BeautifulSoup
- 35/11:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 36/1:
- subjects = {}
- with open('json.json','r') as js:
- subjects = json.load(js)
- 36/2: import json
- 36/3:
- subjects = {}
- with open('json.json','r') as js:
- subjects = json.load(js)
- 36/4:
- from bs4 import SoupStrainer
- subj_url = {}
- for key, value in subjects.items():
- urls = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in urls:
- urls[book_type] = []
- for link in BeautifulSoup(paragraph, parseOnlyThese=SoupStrainer('a')):
- if link.has_attr('href'):
- url = link['href']
- if url[:len("https://uq.rl.talis.com/items/")] == "https://uq.rl.talis.com/items/":
- urls[book_type].append(url)
- subj_url[key] = urls
- 36/5: from bs4 import BeautifulSoup
- 36/6:
- from bs4 import SoupStrainer
- subj_isbn = {}
- for key, value in subjects.items():
- isbns = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in urls:
- isbns[book_type] = []
- print(BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "resourceType label")}))
- # isbns[book_type].append(isbn)
- # subj_url[key] = urls
- 36/7:
- from bs4 import SoupStrainer
- subj_isbn = {}
- for key, value in subjects.items():
- isbns = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in urls:
- isbns[book_type] = []
- print(BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "resourceType label"})))
- # isbns[book_type].append(isbn)
- # subj_url[key] = urls
- 36/8:
- from bs4 import SoupStrainer
- subj_isbn = {}
- for key, value in subjects.items():
- isbns = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in urls:
- isbns[book_type] = []
- print(BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "resourceType label"}))
- # isbns[book_type].append(isbn)
- # subj_url[key] = urls
- 36/9:
- from bs4 import SoupStrainer
- subj_isbn = {}
- for key, value in subjects.items():
- isbns = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in urls:
- isbns[book_type] = []
- print(BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"}))
- # isbns[book_type].append(isbn)
- # subj_url[key] = urls
- 36/10:
- from bs4 import SoupStrainer
- subj_isbn = {}
- for key, value in subjects.items():
- isbns = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in urls:
- isbns[book_type] = []
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"}).contents[0].split(',')
- isbns[book_type].append(isbn)
- subj_isbn[key] = isbns
- 36/11:
- from bs4 import SoupStrainer
- subj_isbn = {}
- for key, value in subjects.items():
- isbns = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in urls:
- isbns[book_type] = []
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"}).contents[0].split(',')
- isbns[book_type].append(isbn)
- subj_isbn[key] = isbns
- 36/12:
- from bs4 import SoupStrainer
- subj_isbn = {}
- for key, value in subjects.items():
- isbns = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in urls:
- isbns[book_type] = []
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})[0].contents[0].split(',')
- isbns[book_type].append(isbn)
- subj_isbn[key] = isbns
- 36/13:
- from bs4 import SoupStrainer
- subj_isbn = {}
- for key, value in subjects.items():
- isbns = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in urls:
- isbns[book_type] = []
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- isbns[book_type].append(isbn)
- subj_isbn[key] = isbns
- 36/14: subj_isbn["INFS1200"]
- 36/15: subj_isbn
- 36/16:
- from bs4 import SoupStrainer
- subj_isbn = {}
- for key, value in subjects.items():
- isbns = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- isbns[book_type] = []
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- isbns[book_type].append(isbn)
- subj_isbn[key] = isbns
- 36/17: subj_isbn
- 36/18: subj_isbn["LAWS1114"]
- 36/19: subj_isbn["INFS1300"]
- 36/20:
- from bs4 import SoupStrainer
- db = {}
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = name
- author = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})[0].contents[0]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
- book["publishedDate"] = publishedDate
- books[book_type].append(book)
- db[key] = books
- 36/21:
- from bs4 import SoupStrainer
- db = {}
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})[0].contents[0]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
- book["publishedDate"] = publishedDate
- books[book_type].append(book)
- db[key] = books
- 36/22:
- from bs4 import SoupStrainer
- db = {}
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})[0].contents[0]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
- book["publishedDate"] = publishedDate
- print(book)
- books[book_type].append(book)
- db[key] = books
- 36/23:
- from bs4 import SoupStrainer
- db = {}
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})[0].contents[0]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
- book["publishedDate"] = publishedDate
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- print(book)
- books[book_type].append(book)
- db[key] = books
- 36/24:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})[0].contents[0]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
- book["publishedDate"] = publishedDate
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- books[book_type].append(book)
- print(count+=1)
- db[key] = books
- 36/25:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})[0].contents[0]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
- book["publishedDate"] = publishedDate
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- books[book_type].append(book)
- count+=1
- print(count)
- db[key] = books
- 36/26:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})[0].contents[0]
- print(BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"}))
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
- book["publishedDate"] = publishedDate
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- books[book_type].append(book)
- count+=1
- print(count)
- db[key] = books
- 36/27:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- print(author)
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
- book["publishedDate"] = publishedDate
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- books[book_type].append(book)
- count+=1
- print(count)
- db[key] = books
- 36/28:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
- book["publishedDate"] = publishedDate
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- books[book_type].append(book)
- count+=1
- print(count)
- db[key] = books
- 36/29:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- print(BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"}))
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
- book["publishedDate"] = publishedDate
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- books[book_type].append(book)
- count+=1
- print(count)
- db[key] = books
- 36/30:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- print(title)
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})[0].contents[0]
- book["publishedDate"] = publishedDate
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- books[book_type].append(book)
- count+=1
- print(count)
- db[key] = books
- 36/31:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- print book["publishedDate"]
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- books[book_type].append(book)
- count+=1
- print(count)
- db[key] = books
- 36/32:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- print book["publishedDate"]
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- books[book_type].append(book)
- count+=1
- print(count)
- db[key] = books
- 36/33:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- print(book["publishedDate"])
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- books[book_type].append(book)
- count+=1
- print(count)
- db[key] = books
- 36/34:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in isbns:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- books[book_type].append(book)
- count+=1
- print(count)
- db[key] = books
- 36/35: db["INFS2200"]
- 36/36:
- with open('db.json','w') as dbf:
- json.dump(db, dbf)
- 36/37:
- with open('db.json','w') as dbf:
- json.dump(db, dbf, indent=4)
- 36/38:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in books:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- books[book_type].append(book)
- count+=1
- print(count)
- db[key] = books
- with open('db.jon','w') as dbf:
- json.dump(db, dbf, indent=4)
- 36/39:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in books:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- print(book)
- books[book_type].append(book)
- count+=1
- print(count)
- db[key] = books
- with open('db.jon','w') as dbf:
- json.dump(db, dbf, indent=4)
- 36/40:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in books:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- books[book_type].append(book)
- count+=1
- print(books)
- db[key] = books
- with open('db.jon','w') as dbf:
- json.dump(db, dbf, indent=4)
- 36/41:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in books:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- if title == "The law of torts in Australia":
- print("found it")
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- books[book_type].append(book)
- count+=1
- #print(books)
- db[key] = books
- with open('db.jon','w') as dbf:
- json.dump(db, dbf, indent=4)
- 36/42:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in books:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- if title == "The law of torts in Australia":
- print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
- print(book_type)
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- books[book_type].append(book)
- count+=1
- #print(books)
- db[key] = books
- with open('db.jon','w') as dbf:
- json.dump(db, dbf, indent=4)
- 36/43:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in books:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- if title == "The law of torts in Australia":
- print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
- print(book_type)
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- print(book)
- books[book_type].append(book)
- print(books[book_type])
- count+=1
- #print(books)
- db[key] = books
- with open('db.jon','w') as dbf:
- json.dump(db, dbf, indent=4)
- 36/44:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in books:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- if title == "The law of torts in Australia":
- print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
- print(book_type)
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- print(book)
- books[book_type].append(book)
- print(books[book_type])
- count+=1
- #print(books)
- db[key] = books
- with open('db.jon','w') as dbf:
- json.dump(db, dbf, indent=4)
- 36/45:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in books:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- print(book_type)
- print()
- print(book)
- print()
- books[book_type].append(book)
- print(books[book_type])
- count+=1
- #print(books)
- db[key] = books
- with open('db.jon','w') as dbf:
- json.dump(db, dbf, indent=4)
- 36/46:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in books:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- print(book_type)
- print()
- print(book)
- print()
- books[book_type].append(book)
- print(books)
- count+=1
- #print(books)
- db[key] = books
- with open('db.jon','w') as dbf:
- json.dump(db, dbf, indent=4)
- 36/47:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in books:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- print(key)
- print()
- print(book_type)
- print()
- print(book)
- print()
- books[book_type].append(book)
- print(books)
- print()
- count+=1
- #print(books)
- db[key] = books
- with open('db.jon','w') as dbf:
- json.dump(db, dbf, indent=4)
- 36/48:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in books:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- #print(key)
- #print()
- #print(book_type)
- #print()
- #print(book)
- #print()
- books[book_type].append(book)
- #print(books)
- #print()
- count+=1
- db[key] = books
- if "PHIL1002" in db:
- print(db["PHIL1002"])
- with open('db.jon','w') as dbf:
- json.dump(db, dbf, indent=4)
- 36/49:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in books:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- #print(key)
- #print()
- #print(book_type)
- #print()
- #print(book)
- #print()
- books[book_type].append(book)
- #print(books)
- #print()
- count+=1
- db[key] = books
- print(count)
- with open('db.jon','w') as dbf:
- json.dump(db, dbf, indent=4)
- 36/50:
- from bs4 import SoupStrainer
- db = {}
- count = 0
- for key, value in subjects.items():
- books = {}
- for paragraph in value:
- book_type = BeautifulSoup(paragraph, "html.parser").findAll("strong")
- if len(book_type) > 0:
- book_type = book_type[0].contents[0]
- else:
- book_type = 'Required'
- if book_type not in books:
- books[book_type] = []
- book = {"title":"","author":"","publishedDate":"","link":"","ISBNs":[]}
- isbn = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "isbns invisible"})
- if len(isbn) > 0:
- isbn = isbn[0].contents[0].split(',')
- book["ISBNs"] = isbn
- title = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "title"})[0].contents[0]
- book["title"] = title
- author = [subauthor.contents[0] for subauthor in BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "author"})]
- book["author"] = author
- publishedDate = BeautifulSoup(paragraph, "html.parser").findAll("span", {"class": "publishedDate"})
- if len(publishedDate) > 0:
- book["publishedDate"] = publishedDate[0].contents[0]
- else:
- book["publishedDate"] = "unknown"
- link = BeautifulSoup(paragraph, "html.parser").findAll("a", {"class": "itemLink"})[0]['href']
- book["link"] = link
- #print(key)
- #print()
- #print(book_type)
- #print()
- #print(book)
- #print()
- books[book_type].append(book)
- #print(books)
- #print()
- count+=1
- db[key] = books
- print(count)
- with open('db.json','w') as dbf:
- json.dump(db, dbf, indent=4)
- 36/51: db["INFS1200"]
- 36/52: db["PHIL1002"]
- 36/53:
- with open("subjects_books.json", 'w') as sbj:
- json.dumps(db, sbj, indent=4)
- 36/54: db
- 36/55:
- with open("subjects_books.json", 'w') as sbj:
- json.dumps(db, sbj, indent=4)
- 36/56:
- with open("subjects_books.json", 'w') as sbj:
- json.dumps(db, sbj)
- 36/57:
- with open("subjects_books.json", 'w') as sbj:
- sbj.write(json.dumps(db))
- 37/1: f = open('subjects_books.json','r)
- 37/2: import json
- 37/3: f = json.loads(open('subjects_books.json','r'))
- 37/4: f = json.loads(open('subjects_books.json','r').read())
- 37/5: f
- 37/6:
- for subject, book_type in f:
- print(subject)
- 37/7:
- for subject, book_type in f.items():
- print(subject)
- 37/8:
- for subject, book_type in f.items():
- print(book_type)
- 37/9:
- for subject, book_types in f.items():
- for book_type, books in book_types:
- print(books["link"])
- 37/10:
- for subject, book_types in f.items():
- for book_type, books in book_types.items():
- print(books["link"])
- 37/11:
- for subject, book_types in f.items():
- for book_type, books in book_types.items():
- for book in books:
- print(book["link"])
- 37/12:
- for subject, book_types in f.items():
- for book_type, books in book_types.items():
- for book in books:
- print(book["link"])
- print(subject + " " + book_type)
- 37/13:
- for subject, book_types in f.items():
- for book_type, books in book_types.items():
- for book in books:
- print(book["link"])
- print("\tdir=" + subject + " " + book_type)
- 37/14:
- for subject, book_types in f.items():
- for book_type, books in book_types.items():
- for book in books:
- print(book["link"])
- print("\tdir=" + subject + "/" + book_type)
- 37/15: dls = ""
- 37/16:
- for subject, book_types in f.items():
- for book_type, books in book_types.items():
- for book in books:
- dls+= book["link"]
- dls+= "\tdir=" + subject + "/" + book_type
- 37/17: dls
- 37/18:
- for subject, book_types in f.items():
- for book_type, books in book_types.items():
- for book in books:
- dls+= book["link"]
- dls+='\n'
- dls+= "\tdir=" + subject + "/" + book_type
- dls+='\n'
- 37/19: dls
- 37/20: dls = ''
- 37/21:
- for subject, book_types in f.items():
- for book_type, books in book_types.items():
- for book in books:
- dls+= book["link"]
- dls+='\n'
- dls+= "\tdir=" + subject + "/" + book_type
- dls+='\n'
- 37/22: dls
- 37/23:
- with open('dls','w') as dl:
- dl.write(dls)
- 38/1: db["PHIL1002"]
- 39/1: from bs4 import BeautifulSoup
- 39/2: import json
- 39/3: import os
- 39/4:
- for folder in os.listdit():
- print(folder)
- 39/5:
- for folder in os.listdir():
- print(folder)
- 39/6:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- print(book_type)
- 39/7:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- print(page)
- 39/8:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page_file, 'r').read() as f:
- BeautifulSoup(f)
- 39/9:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r').read() as f:
- BeautifulSoup(f)
- 39/10:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- BeautifulSoup(f.read())
- 39/11:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- print(BeautifulSoup(f.read()))
- 39/12:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- soup = BeautifulSoup(f.read())
- 39/13:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- soup = BeautifulSoup(f.read())
- title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
- print(title)
- 39/14:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- soup = BeautifulSoup(f.read())
- title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
- 39/15:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- soup = BeautifulSoup(f.read())
- title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
- 39/16:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- soup = BeautifulSoup(f.read())
- title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
- 39/17:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- soup = BeautifulSoup(f.read())
- title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
- authors = ''
- 39/18:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- soup = BeautifulSoup(f.read())
- title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
- authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
- 39/19:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- soup = BeautifulSoup(f.read())
- title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
- authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
- print(authors)
- 39/20:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- soup = BeautifulSoup(f.read())
- title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
- authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
- publishedDate = soup.findAll('span', {'id':"fieldValue-date"})
- print(publishedDate)
- 39/21:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- soup = BeautifulSoup(f.read())
- title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
- authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
- publishedDate = soup.findAll('span', {'id':"fieldValue-date"})[0].contents[0]
- print(publishedDate)
- 39/22:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- soup = BeautifulSoup(f.read())
- title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
- authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
- publishedDate = soup.findAll('span', {'id':"fieldValue-date"})[0].contents[0]
- revision = soup.findAll('span', {'id':"fieldValue-edition"})
- if len(revision) > 0:
- revision = revision[0].contents[0]
- else:
- revision = "0"
- print(revision)
- 39/23:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- soup = BeautifulSoup(f.read())
- title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
- authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
- publishedDate = soup.findAll('span', {'id':"fieldValue-date"})[0].contents[0]
- edition = soup.findAll('span', {'id':"fieldValue-edition"})
- if len(edition) > 0:
- edition = edition[0].edition[0]
- else:
- edition = "0"
- print(edition)
- 39/24:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- soup = BeautifulSoup(f.read())
- title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
- authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
- publishedDate = soup.findAll('span', {'id':"fieldValue-date"})[0].contents[0]
- edition = soup.findAll('span', {'id':"fieldValue-edition"})
- if len(edition) > 0:
- edition = edition[0].contents[0]
- else:
- edition = "0"
- print(edition)
- 39/25:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- soup = BeautifulSoup(f.read())
- print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
- print(folder)
- print(book_type)
- title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
- print(title)
- authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
- print(authors)
- publishedDate = soup.findAll('span', {'id':"fieldValue-date"})[0].contents[0]
- print(publishedDate)
- edition = soup.findAll('span', {'id':"fieldValue-edition"})
- if len(edition) > 0:
- edition = edition[0].contents[0]
- else:
- edition = "0"
- print(edition)
- 39/26:
- for folder in os.listdir():
- for book_type in os.listdir(folder):
- for page in os.listdir(folder + "/" + book_type):
- with open(folder + "/" + book_type + "/" + page, 'r') as f:
- soup = BeautifulSoup(f.read())
- print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
- print(folder)
- print(book_type)
- title = soup.findAll('h1', {'id':"pageTitle"})[0].contents[0]
- print(title)
- authors = [author.contents[0] for author in soup.findAll('span', {'id':"fieldValue-authors"})]
- print(authors)
- publishedDate = soup.findAll('span', {'id':"fieldValue-date"})[0].contents[0]
- print(publishedDate)
- edition = soup.findAll('span', {'id':"fieldValue-edition"})
- if len(edition) > 0:
- edition = edition[0].contents[0]
- else:
- edition = "0"
- print(edition)
- 40/1: from bs4 import BeautifulSoup
- 40/2:
- with open('uq/ANCH3030/Further/F26E2724-F82F-019D-99AD-692F43F4643A.html') as f:
- soup = BeautifulSoup(f.read().decode('utf-8','ignore'))
- 40/3:
- with open('uq/ANCH3030/Further/F26E2724-F82F-019D-99AD-692F43F4643A.html') as f:
- soup = BeautifulSoup(f.read(decode('utf-8','ignore')))
- 40/4:
- with open('uq/ANCH3030/Further/F26E2724-F82F-019D-99AD-692F43F4643A.html') as f:
- soup = BeautifulSoup(f.read(decode='utf-8')))
- 40/5:
- with open('uq/ANCH3030/Further/F26E2724-F82F-019D-99AD-692F43F4643A.html') as f:
- soup = BeautifulSoup(f.read())
- 40/6:
- with open('uq/ANCH3030/Further/F26E2724-F82F-019D-99AD-692F43F4643A.html').read() as f:
- soup = BeautifulSoup(f)
- 40/7:
- with open('uq/ANCH3030/Further/F26E2724-F82F-019D-99AD-692F43F4643A.html','r').read() as f:
- soup = BeautifulSoup(f)
- 40/8:
- with open('uq/ANCH3030/Further/F26E2724-F82F-019D-99AD-692F43F4643A.html','r', encoding='utf-8').read() as f:
- soup = BeautifulSoup(f)
- 41/1: import sandman2
- 41/2: sandman2.db
- 42/1: import sqlite3
- 42/2: conn = sqlite3.connect('database.db')
- 42/3: c = conn.cursor()
- 42/4:
- from random import randint
- c.execute("INSERT INTO Book_submissions(bookID, message, price, edition) VALUES (? , ? , ? , ?)" , randint(4,1000), "e@mail.com", "$100", "2nd")
- 42/5:
- from random import randint
- c.execute("INSERT INTO Book_submissions(bookID, message, price, edition) VALUES (? , ? , ? , ?)" , (randint(4,1000), "e@mail.com", "$100", "2nd"))
- 42/6: conn.commit()
- 42/7: conn.close()
- 42/8:
- from random import randint
- for i in range(4000):
- c.execute("INSERT INTO Book_submissions(bookID, message, price, edition) VALUES (? , ? , ? , ?)" , (randint(4,1000), "e@mail.com", "$100", "2nd"))
- 42/9: conn = sqlite3.connect('database.db')
- 42/10: c = conn.cursor()
- 42/11:
- from random import randint
- for i in range(4000):
- c.execute("INSERT INTO Book_submissions(bookID, message, price, edition) VALUES (? , ? , ? , ?)" , (randint(4,1000), "e@mail.com", "$100", "2nd"))
- 42/12:
- from random import randint
- for i in range(4000):
- c.execute("INSERT INTO Book_submissions(bookID, message, price, edition) VALUES (? , ? , ? , ?)" , (randint(4,100), "e@mail.com", "$100", "2nd"))
- 42/13:
- from random import randint
- for i in range(4000):
- c.execute("INSERT INTO Book_submissions(bookID, message, price, edition) VALUES (? , ? , ? , ?)" , (randint(50), "e@mail.com", "$100", "2nd"))
- 42/14: conn.commit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement