Advertisement
Guest User

Untitled

a guest
Feb 21st, 2017
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.37 KB | None | 0 0
  1. try:
  2. #rl[0] refers to a specific url being pulled from a list in another file.
  3. req = urllib.request.Request(rl[0],None,headers)
  4. opener = urllib.request.build_opener(proxy_support, urllib.request.HTTPCookieProcessor(cj))
  5. resp = opener.open(req)
  6. soup = BeautifulSoup(resp.read(),'html.parser')
  7. resp.close
  8. except urllib.error.URLError:
  9. print("URL error when opening "+rl[0])
  10. except urllib.error.HTTPError:
  11. print("HTTP error when opening "+rl[0])
  12. except http.client.HTTPException as err:
  13. print(err, "HTTP exception error when opening "+rl[0])
  14. except socket.timeout:
  15. print("connection timedout accessing "+rl[0])
  16. soup = None
  17. else:
  18. for l in [wdict1,wdict2,wdict3,wdict4]:
  19. for i in l:
  20. foundvocab = soup.find_all(text=re.compile(i))
  21. for term in foundvocab:
  22. #c indicates the highlight color determined earlier in the script based on which dictionary the word came from.
  23. #numb is a term i defined earlier to use as a reference to another document this script creates.
  24. fixed = term.replace(i,'<mark background-color="'+c+'">'+i+'<sup>'+numb+'</sup></mark>')
  25. term.replace_with(fixed)
  26. print(soup, file=path/local.html)
  27.  
  28. foundvocab = soup.find_all(text=i)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement