*** parser.py 2012-01-08 12:00:38.000000000 -0500
--- parser.py 2012-01-08 12:35:35.000000000 -0500
***************
*** 43,52 ****
self.temp = reg_urls.findall(self.results)
allurls=self.unique()
for x in allurls:
! if x.count('webcache') or x.count('google.com') or x.count('search?'):
! pass
! else:
urls.append(x)
return urls
def people_linkedin(self):
--- 43,52 ----
self.temp = reg_urls.findall(self.results)
allurls=self.unique()
for x in allurls:
! if x.endswith('doc') or x.endswith('pdf') or x.endswith('xls') or x.endswith('ppt') or x.endswith('odp') or x.endswith('ods') or x.endswith('docx') or x.endswith('xlsx') or x.endswith('pptx'):
urls.append(x)
+ else:
+ pass
return urls
def people_linkedin(self):