Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib2
- import string
- import re
- import xmlrpclib
- from SimpleXMLRPCServer import SimpleXMLRPCServer
- def getPage(url):
- req = urllib2.Request(url)
- responce = urllib2.urlopen(req)
- return responce.read()
- def getLinks(content):
- if (re.search('CONTENT="text/html;', content, flags=re.IGNORECASE)): #Check if document is defined as HTML
- p = re.compile('<a *[^>]*>.*</a>')
- raw_links = p.findall(content)
- print raw_links # Debug
- if raw_links == []:
- return "No Links"
- links = []
- s = re.compile('=".*">')
- for n in range(0, len(raw_links)):
- temp = s.search(raw_links[n]).group()
- temp = temp[2:-2]
- print temp
- links.append(temp)
- return links
- def filterPages(links, filterstring):
- filteredlinks = []
- for n in range(0, len(links)):
- content = getPage(links[n])
- if re.search(filterstring, content):
- filteredlinks.append(links[n])
- return filteredlinks
- def ServerB(url, searchterm):
- content = getPage(url)
- print content # Debug
- linklist = getLinks(content)
- if linklist == "No Links":
- return [["No Links"]]
- if searchterm != None and searchterm != "":
- print "Search string is valid"
- linklist = filterPages(linklist, searchterm)
- else:
- print "Search string is not valid"
- searchterm = ""
- return linklist
- server = SimpleXMLRPCServer(("localhost", 8000))
- print "Listening on port 8000..."
- server.register_function(ServerB, "ServerB")
- server.serve_forever()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement