import urllib2 # needed to open URLs
import re # for regular expressions
from BeautifulSoup import BeautifulSoup # used to parse HTML

page = urllib2.urlopen("http://forums.darklordpotter.net/library_list.php") # Library List URL is opened and the page is stored
soup = BeautifulSoup(page) # the page is given to BeautifulSoup, which allows to do a bunch of cool shit

dlp = [] # will store the list of titles from the DLP Library
final = [] # will store the matches between the Library and the KIA recs

# loops through all the tags from the library list page such that the href matches the DLP url plush "show" (for "showthread",
# which would indicated that the link is to a thread; the only threads on this page are those of Library fics)
for tag in soup.findAll(href=re.compile("^http://forums\.darklordpotter\.net/show")):
	contents = tag.contents[0]
	dlp.append(contents.strip()[0:contents.find(" by")].encode("utf-8")) # takes off the part of the string starting with " by" and adds title

# each Library entry is fed into a search query on KIA site
for i in range(len(dlp)):
    page = urllib2.urlopen("http://mujaji.net/kia/?s=" + dlp[i].replace(" ","+") + "&key=Story+Title")
    soup = BeautifulSoup(page)
    
    # compares the titles of the results page to the Library title in question, and adds it to the final list it there is a match
    for x in soup.findAll(title=re.compile("^Permanent")):
        if (x.contents[0][(x.contents[0].find(";")+1):(x.contents[0].find("&#8221"))]).lower() == dlp[i].lower():
        	final.append(dlp[i])

   	
print final # all of the matches are printed, although there is still some human work to do to rule out false positives