Advertisement
Guest User

Untitled

a guest
Jun 26th, 2017
46
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.47 KB | None | 0 0
  1. import urllib2
  2. import string
  3. import re
  4. import xmlrpclib
  5. from SimpleXMLRPCServer import SimpleXMLRPCServer
  6.  
  7.  
  8. def getPage(url):
  9.     req = urllib2.Request(url)
  10.     responce = urllib2.urlopen(req)
  11.     return responce.read()
  12.    
  13. def getLinks(content):
  14.     if (re.search('CONTENT="text/html;', content, flags=re.IGNORECASE)): #Check if document is defined as HTML
  15.         p = re.compile('<a *[^>]*>.*</a>')
  16.         raw_links = p.findall(content)
  17.        
  18.         print raw_links # Debug
  19.        
  20.         if raw_links == []:
  21.             return "No Links"
  22.        
  23.         links = []
  24.         s = re.compile('=".*">')
  25.        
  26.         for n in range(0, len(raw_links)):
  27.             temp = s.search(raw_links[n]).group()
  28.             temp = temp[2:-2]
  29.             print temp
  30.             links.append(temp)
  31.        
  32.         return links
  33.        
  34. def filterPages(links, filterstring):
  35.     filteredlinks = []
  36.    
  37.     for n in range(0, len(links)):
  38.         content = getPage(links[n])
  39.        
  40.         if re.search(filterstring, content):
  41.             filteredlinks.append(links[n])
  42.  
  43.     return filteredlinks
  44.        
  45. def ServerB(url, searchterm):
  46.     content = getPage(url)
  47.     print content # Debug
  48.     linklist = getLinks(content)
  49.    
  50.     if linklist == "No Links":
  51.         return [["No Links"]]
  52.    
  53.     if searchterm != None and searchterm != "":
  54.         print "Search string is valid"
  55.         linklist = filterPages(linklist, searchterm)
  56.    
  57.     else:
  58.         print "Search string is not valid"
  59.        
  60.     searchterm = ""
  61.    
  62.     return linklist
  63.    
  64. server = SimpleXMLRPCServer(("localhost", 8000))
  65. print "Listening on port 8000..."
  66. server.register_function(ServerB, "ServerB")
  67. server.serve_forever()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement