Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import json
- from shaymachine2 import Scraper
- from bs4 import BeautifulSoup
- import threading
- import re
- import dns.resolver
- class Linkedin_contacs:
- def __init__(self):
- self.headers2 = {
- 'Host': 'www.linkedin.com',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
- 'Accept': 'application/vnd.linkedin.normalized+json+2.1',
- 'Accept-Language': 'en-US,en;q=0.5',
- 'Accept-Encoding': 'gzip, deflate, br',
- 'Referer': 'https://www.linkedin.com/search/results/people/?facetCurrentCompany=%5B%22895692%22%5D&origin=FACETED_SEARCH',
- 'x-li-lang': 'en_US',
- 'x-li-track': '{"clientVersion":"1.2.9400","osName":"web","timezoneOffset":3,"deviceFormFactor":"DESKTOP","mpName":"voyager-web"}',
- 'x-li-page-instance': 'urn:li:page:d_flagship3_search_srp_people;net2UJTURp6JmlTHJy4FKg==',
- 'csrf-token': 'ajax:3131330459841831968',
- 'x-restli-protocol-version': '2.0.0',
- 'Connection': 'keep-alive',
- 'Cookie': 'JSESSIONID="ajax:3131330459841831968"; bcookie="v=2&869cbbe1-3640-4af7-88d2-d1e75fe8e06b"; bscookie="v=1&201904071544015a21620d-7089-4805-8705-f73f780a014fAQEgAjbjoD0_1w7ThfWA9y5x4162VswX"; _ga=GA1.2.1746016029.1554651844; _lipt=CwEAAAFqDUkrsaJDOZTtsFDE3ckGbuAISeqyp6wiO5gA4DnmyNFHqdzo5fhnoM1KiCfgOHaGvF3m8S_HCdRMQKFFTy8_f7__yhIOY5NQ5IJq-CSrzxl59bJQoVYAru_YXSMQkko0TIgZaYyiaPmp4Wk2DuSMulRp6WfStw2pxmSwtl5ShT674JdJ_FBb0nILPvO0cz-KKTHVrZWbIJ9wCKlAfNyV5yIVLJrIoOVPY4ZxcV__CgfTJbqx_6mnikD_hniLPF7S0pqQH4IIhQ; UserMatchHistory=AQLOEH7LLqLr-wAAAWoNSTHZVympZKp74DumClA6pouq_s07vB4gpm-WuLgV7Av-45feVmYBroUhxdwtPxsS2zqWkMWPrSFYyXx05VF6c_ANKpE0g7HjgJBSbYeKqnA9JFZ4xTVBs5YFt9pKeUN8DFdkILNFTs8j59OavDEtbIFuMHbNB4qtbqE047I_e5sCFQuW-rgbTB_6umlkeYx0_FDew5_SLfuKdbN0; _guid=d3bbdde2-eb43-4bbe-a709-030e1362ba1a; li_oatml=AQFAKn4QOjuXrQAAAWn4iF6fiOobuXVCMgQ9ndojogDMZcTellkgTl2cq4BwJQRVAnlJLklHxOuZmLQyDD31yW2QvNjIgVT1; AMCV_14215E3D5995C57C0A495C55%40AdobeOrg=-1303530583%7CMCIDTS%7C17998%7CMCMID%7C43258446753252445861663769055965269256%7CMCAAMLH-1555605577%7C6%7CMCAAMB-1555605577%7C6G1ynYcLPuiQxYZrsz_pkqfLG9yMXBpb2zX5dvJdYQJzPXImdj0y%7CMCOPTOUT-1555007977s%7CNONE%7CMCCIDH%7C-843710154%7CvVersion%7C3.3.0; aam_uuid=43795614385153165771718626939627519683; lidc="b=OB56:g=1560:u=22:i=1555001090:t=1555087132:s=AQHFqu-BbT-rULh6BH62EY40-kZi-zB0"; _gat=1; visit=v=1&M; liap=true; li_at=AQEDASP4vVQBQDA4AAABag1EKKUAAAFqMVCspU4AxNZpWE6hlPMxz3UZ71RRG2h5n5pG8hiH70ZmWRGGjuzrnpI0PIi61eLQ5kvLnWI8bN6CRWkOCuvc9v717MgJfFJcCsR_3OwO2VyMFZl8XRseeUnO; sl=v=1&qpBwD; lang=v=2&lang=en-us; AMCVS_14215E3D5995C57C0A495C55%40AdobeOrg=1',
- 'Cache-Control': 'max-age=0',
- 'TE': 'Trailers'
- }
- self.headers = {
- 'Host': 'www.linkedin.com',
- 'User-Agent': 'Mozilla/5.0 (Windows Phone 8.1; ARM; Trident/7.0; Touch; WebView/2.0; rv:11.0; IEMobile/11.0; NOKIA; Lumia 525) like Gecko',
- 'Accept': 'application/vnd.linkedin.normalized+json+2.1',
- 'Accept-Language': 'en-US,en;q=0.5',
- 'Accept-Encoding': 'gzip, deflate, br',
- 'Referer': 'https://www.linkedin.com/',
- 'x-li-lang': 'en_US',
- 'x-li-track': '{"clientVersion":"1.2.9199","osName":"web","timezoneOffset":3,"deviceFormFactor":"PHONE","mpName":"voyager-web"}',
- 'x-li-page-instance': 'urn:li:page:p_flagship3_search_srp_companies;1q8KvNNDTuCf2RhL12N9HA==',
- 'csrf-token': 'ajax:3110815806591265107',
- 'x-restli-protocol-version': '2.0.0',
- 'Connection': 'keep-alive',
- 'Cookie': 'JSESSIONID="ajax:3110815806591265107"; bcookie="v=2&869cbbe1-3640-4af7-88d2-d1e75fe8e06b"; bscookie="v=1&201904071544015a21620d-7089-4805-8705-f73f780a014fAQEgAjbjoD0_1w7ThfWA9y5x4162VswX"; lidc="b=VB34:g=2116:u=5:i=1554655322:t=1554737664:s=AQGIKDAeEASURAv1cbUfd4BWPmSkiHBO"; _ga=GA1.2.1746016029.1554651844; sl=v=1&qAoOQ; liap=true; li_at=AQEDASsp3JYCpCdJAAABafitS1AAAAFqHLnPUE0AQIeRD5WeR77yejHOvGnyGp-tuZNbalti_wDi9k9GHsjDLGLxxse4Pc2ygnTs-VwBFuZBZvp1ZY-NqMq6m8uFR24Uzlmzl_Xw3XwgUkoWwphbyEeR; _lipt=CwEAAAFp-ImK3PTzGHqieW9zK-0KjZjhsFYLToIXIWdAvBaOXDwzPwapgJqizZTadaADnOYkhJSEcO01pd69sx4kZkX2Jlzz2SGj-nMw5f-PoT02jPrexDLTuwu_zQM0j444rBBC8NagLqkv3qsNZTc8KdQrAo4zMOFwJg1qtSHppFl4Bw9hUyhyCkd6DLdcRyjSB5JXlhG9NML0MZW6oaYS7Yo; UserMatchHistory=AQLzsTPwaD4EQgAAAWn4rWHiwtMRwrOws25Em8kecUu6AstWCIO-0oK-CUVkxPXNqskppMD8aVjsKt2PqonG6R8dGeHYARK8QvZSXk1NIoYVTGaqdh0DaWsYaw9EPzi-ZqKL9bcsPu2uNfnzZOzlP_uQAR_A8Al7qjhH53aP-qXjdFQdY4W-ZJ_DcRuKyi-0kjQMxowyvbekL4dADWVBPiTprlSZD9JEZgmC; _guid=d3bbdde2-eb43-4bbe-a709-030e1362ba1a; li_oatml=AQFAKn4QOjuXrQAAAWn4iF6fiOobuXVCMgQ9ndojogDMZcTellkgTl2cq4BwJQRVAnlJLklHxOuZmLQyDD31yW2QvNjIgVT1; lang=v=2&lang=en-us; _gat=1; AMCV_14215E3D5995C57C0A495C55%40AdobeOrg=-1303530583%7CMCIDTS%7C17994%7CMCMID%7C43258446753252445861663769055965269256%7CMCAAMLH-1555260128%7C6%7CMCAAMB-1555260128%7C6G1ynYcLPuiQxYZrsz_pkqfLG9yMXBpb2zX5dvJdYQJzPXImdj0y%7CMCOPTOUT-1554662528s%7CNONE%7CMCCIDH%7C1101017511%7CvVersion%7C3.3.0; AMCVS_14215E3D5995C57C0A495C55%40AdobeOrg=1; aam_uuid=43795614385153165771718626939627519683',
- 'TE': 'Trailers'
- }
- self.name = input('insert a company name: ')
- self.url = f'https://www.linkedin.com/voyager/api/search/cluster?count=10&guides=List(v->COMPANIES)&keywords={self.name}&origin=SWITCH_SEARCH_VERTICAL&q=guided'
- self.comp_name = []
- self.response = None
- self.tryjson = None
- self.compnum = None
- def getCompName(self):
- self.response = requests.get(self.url, headers=self.headers2)
- self.tryjson = json.loads(self.response.text)
- for i in range(10, 20):
- self.comp_name.append(self.tryjson['included'][i]['name'])
- for i in self.comp_name:
- print(i)
- Linkedin_contacs.choose_comp(self)
- def choose_comp(self):
- numchosene = int(input('for the first company insert 1 and so on..: ')) + 9
- precompnum = self.tryjson['included'][numchosene]['entityUrn']
- self.compnum = int(precompnum.split(':')[-1])
- Linkedin_contacs.get_contacts(self)
- def get_contacts(self):
- file = open('contactsLogs.txt', 'w')
- file.write(f'company name: {self.name}\ncontacts names and title:\n')
- for j in range(10):
- path2 = f"https://www.linkedin.com/voyager/api/search/blended?count=10&filters=List(currentCompany->{self.compnum},resultType->PEOPLE)&origin=FACETED_SEARCH&q=all&queryContext=List(spellCorrectionEnabled->true,relatedSearchesEnabled->true,kcardTypes->PROFILE|COMPANY)&start={j}0"
- response2 = requests.get(path2, headers=self.headers2)
- if response2.status_code == 200:
- tryjson2 = json.loads(response2.text)
- for i in tryjson2['included']:
- if 'firstName' in i.keys():
- file.write(f"{i['firstName']} {i['lastName']} - {i['occupation']}\n")
- file.close()
- # class Mails_scrape:
- # def __init__(self):
- # self.urls = mails_links.urls
- # self.mails = []
- # self.reg = f"[A-Za-z0-9\.\+_-]+@{domain}"
- #
- # def scaner(self):
- # for i in self.urls:
- # tmpurl = requests.get(i)
- # html = tmpurl.text
- # soup = BeautifulSoup(html, 'html.parser')
- # self.mails.append(re.findall(self.reg, soup.text))
- #
- # print(self.mails)
- class Resolver:
- def __init__(self):
- self.domains_list = []
- self.domains = ''
- self.ip_list = []
- def resoler(self):
- geturls = Scraper()
- geturls.scrape(searchstr, 2)
- self.domains_list = geturls.urls
- file = open('contactsLogs.txt', 'a')
- for i in self.domains_list:
- if i[-3:] != 'pdf':
- i = i.split('https://')[1].split("/")[0]
- answers = dns.resolver.query(i, 'A')
- for j in answers:
- file.write(f"{i} - {j}\n")
- self.ip_list.append(j)
- file.close()
- start = Linkedin_contacs()
- start.getCompName()
- domain = input('enter domain: ')
- searchstr = f'site: {domain} -site:www{domain}'
- res = Resolver()
- res.resoler()
- # mails_links = Scraper()
- # mails_links.scrape(domain,5)
- # Mails_scrape()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement