Advertisement
Guest User

Compares DLP's Library to the recs at KIA

a guest
May 10th, 2010
151
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.67 KB | None | 0 0
  1. import urllib2 # needed to open URLs
  2. import re # for regular expressions
  3. from BeautifulSoup import BeautifulSoup # used to parse HTML
  4.  
  5. page = urllib2.urlopen("http://forums.darklordpotter.net/library_list.php") # Library List URL is opened and the page is stored
  6. soup = BeautifulSoup(page) # the page is given to BeautifulSoup, which allows to do a bunch of cool shit
  7.  
  8. dlp = [] # will store the list of titles from the DLP Library
  9. final = [] # will store the matches between the Library and the KIA recs
  10.  
  11. # loops through all the tags from the library list page such that the href matches the DLP url plush "show" (for "showthread",
  12. # which would indicated that the link is to a thread; the only threads on this page are those of Library fics)
  13. for tag in soup.findAll(href=re.compile("^http://forums\.darklordpotter\.net/show")):
  14.     contents = tag.contents[0]
  15.     dlp.append(contents.strip()[0:contents.find(" by")].encode("utf-8")) # takes off the part of the string starting with " by" and adds title
  16.  
  17. # each Library entry is fed into a search query on KIA site
  18. for i in range(len(dlp)):
  19.     page = urllib2.urlopen("http://mujaji.net/kia/?s=" + dlp[i].replace(" ","+") + "&key=Story+Title")
  20.     soup = BeautifulSoup(page)
  21.    
  22.     # compares the titles of the results page to the Library title in question, and adds it to the final list it there is a match
  23.     for x in soup.findAll(title=re.compile("^Permanent")):
  24.         if (x.contents[0][(x.contents[0].find(";")+1):(x.contents[0].find("&#8221"))]).lower() == dlp[i].lower():
  25.             final.append(dlp[i])
  26.  
  27.    
  28. print final # all of the matches are printed, although there is still some human work to do to rule out false positives
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement