Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- *** parser.py 2012-01-08 12:00:38.000000000 -0500
- --- parser.py 2012-01-08 12:35:35.000000000 -0500
- ***************
- *** 43,52 ****
- self.temp = reg_urls.findall(self.results)
- allurls=self.unique()
- for x in allurls:
- ! if x.count('webcache') or x.count('google.com') or x.count('search?'):
- ! pass
- ! else:
- urls.append(x)
- return urls
- def people_linkedin(self):
- --- 43,52 ----
- self.temp = reg_urls.findall(self.results)
- allurls=self.unique()
- for x in allurls:
- ! if x.endswith('doc') or x.endswith('pdf') or x.endswith('xls') or x.endswith('ppt') or x.endswith('odp') or x.endswith('ods') or x.endswith('docx') or x.endswith('xlsx') or x.endswith('pptx'):
- urls.append(x)
- + else:
- + pass
- return urls
- def people_linkedin(self):
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement