Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- try:
- #rl[0] refers to a specific url being pulled from a list in another file.
- req = urllib.request.Request(rl[0],None,headers)
- opener = urllib.request.build_opener(proxy_support, urllib.request.HTTPCookieProcessor(cj))
- resp = opener.open(req)
- soup = BeautifulSoup(resp.read(),'html.parser')
- resp.close
- except urllib.error.URLError:
- print("URL error when opening "+rl[0])
- except urllib.error.HTTPError:
- print("HTTP error when opening "+rl[0])
- except http.client.HTTPException as err:
- print(err, "HTTP exception error when opening "+rl[0])
- except socket.timeout:
- print("connection timedout accessing "+rl[0])
- soup = None
- else:
- for l in [wdict1,wdict2,wdict3,wdict4]:
- for i in l:
- foundvocab = soup.find_all(text=re.compile(i))
- for term in foundvocab:
- #c indicates the highlight color determined earlier in the script based on which dictionary the word came from.
- #numb is a term i defined earlier to use as a reference to another document this script creates.
- fixed = term.replace(i,'<mark background-color="'+c+'">'+i+'<sup>'+numb+'</sup></mark>')
- term.replace_with(fixed)
- print(soup, file=path/local.html)
- foundvocab = soup.find_all(text=i)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement