Advertisement
Guest User

Untitled

a guest
Apr 18th, 2019
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.31 KB | None | 0 0
  1. import requests
  2. import json
  3. from shaymachine2 import Scraper
  4. from bs4 import BeautifulSoup
  5. import threading
  6. import re
  7. import dns.resolver
  8.  
  9.  
  10. class Linkedin_contacs:
  11. def __init__(self):
  12. self.headers2 = {
  13. 'Host': 'www.linkedin.com',
  14. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
  15. 'Accept': 'application/vnd.linkedin.normalized+json+2.1',
  16. 'Accept-Language': 'en-US,en;q=0.5',
  17. 'Accept-Encoding': 'gzip, deflate, br',
  18. 'Referer': 'https://www.linkedin.com/search/results/people/?facetCurrentCompany=%5B%22895692%22%5D&origin=FACETED_SEARCH',
  19. 'x-li-lang': 'en_US',
  20. 'x-li-track': '{"clientVersion":"1.2.9400","osName":"web","timezoneOffset":3,"deviceFormFactor":"DESKTOP","mpName":"voyager-web"}',
  21. 'x-li-page-instance': 'urn:li:page:d_flagship3_search_srp_people;net2UJTURp6JmlTHJy4FKg==',
  22. 'csrf-token': 'ajax:3131330459841831968',
  23. 'x-restli-protocol-version': '2.0.0',
  24. 'Connection': 'keep-alive',
  25. 'Cookie': 'JSESSIONID="ajax:3131330459841831968"; bcookie="v=2&869cbbe1-3640-4af7-88d2-d1e75fe8e06b"; bscookie="v=1&201904071544015a21620d-7089-4805-8705-f73f780a014fAQEgAjbjoD0_1w7ThfWA9y5x4162VswX"; _ga=GA1.2.1746016029.1554651844; _lipt=CwEAAAFqDUkrsaJDOZTtsFDE3ckGbuAISeqyp6wiO5gA4DnmyNFHqdzo5fhnoM1KiCfgOHaGvF3m8S_HCdRMQKFFTy8_f7__yhIOY5NQ5IJq-CSrzxl59bJQoVYAru_YXSMQkko0TIgZaYyiaPmp4Wk2DuSMulRp6WfStw2pxmSwtl5ShT674JdJ_FBb0nILPvO0cz-KKTHVrZWbIJ9wCKlAfNyV5yIVLJrIoOVPY4ZxcV__CgfTJbqx_6mnikD_hniLPF7S0pqQH4IIhQ; UserMatchHistory=AQLOEH7LLqLr-wAAAWoNSTHZVympZKp74DumClA6pouq_s07vB4gpm-WuLgV7Av-45feVmYBroUhxdwtPxsS2zqWkMWPrSFYyXx05VF6c_ANKpE0g7HjgJBSbYeKqnA9JFZ4xTVBs5YFt9pKeUN8DFdkILNFTs8j59OavDEtbIFuMHbNB4qtbqE047I_e5sCFQuW-rgbTB_6umlkeYx0_FDew5_SLfuKdbN0; _guid=d3bbdde2-eb43-4bbe-a709-030e1362ba1a; li_oatml=AQFAKn4QOjuXrQAAAWn4iF6fiOobuXVCMgQ9ndojogDMZcTellkgTl2cq4BwJQRVAnlJLklHxOuZmLQyDD31yW2QvNjIgVT1; AMCV_14215E3D5995C57C0A495C55%40AdobeOrg=-1303530583%7CMCIDTS%7C17998%7CMCMID%7C43258446753252445861663769055965269256%7CMCAAMLH-1555605577%7C6%7CMCAAMB-1555605577%7C6G1ynYcLPuiQxYZrsz_pkqfLG9yMXBpb2zX5dvJdYQJzPXImdj0y%7CMCOPTOUT-1555007977s%7CNONE%7CMCCIDH%7C-843710154%7CvVersion%7C3.3.0; aam_uuid=43795614385153165771718626939627519683; lidc="b=OB56:g=1560:u=22:i=1555001090:t=1555087132:s=AQHFqu-BbT-rULh6BH62EY40-kZi-zB0"; _gat=1; visit=v=1&M; liap=true; li_at=AQEDASP4vVQBQDA4AAABag1EKKUAAAFqMVCspU4AxNZpWE6hlPMxz3UZ71RRG2h5n5pG8hiH70ZmWRGGjuzrnpI0PIi61eLQ5kvLnWI8bN6CRWkOCuvc9v717MgJfFJcCsR_3OwO2VyMFZl8XRseeUnO; sl=v=1&qpBwD; lang=v=2&lang=en-us; AMCVS_14215E3D5995C57C0A495C55%40AdobeOrg=1',
  26. 'Cache-Control': 'max-age=0',
  27. 'TE': 'Trailers'
  28. }
  29. self.headers = {
  30. 'Host': 'www.linkedin.com',
  31. 'User-Agent': 'Mozilla/5.0 (Windows Phone 8.1; ARM; Trident/7.0; Touch; WebView/2.0; rv:11.0; IEMobile/11.0; NOKIA; Lumia 525) like Gecko',
  32. 'Accept': 'application/vnd.linkedin.normalized+json+2.1',
  33. 'Accept-Language': 'en-US,en;q=0.5',
  34. 'Accept-Encoding': 'gzip, deflate, br',
  35. 'Referer': 'https://www.linkedin.com/',
  36. 'x-li-lang': 'en_US',
  37. 'x-li-track': '{"clientVersion":"1.2.9199","osName":"web","timezoneOffset":3,"deviceFormFactor":"PHONE","mpName":"voyager-web"}',
  38. 'x-li-page-instance': 'urn:li:page:p_flagship3_search_srp_companies;1q8KvNNDTuCf2RhL12N9HA==',
  39. 'csrf-token': 'ajax:3110815806591265107',
  40. 'x-restli-protocol-version': '2.0.0',
  41. 'Connection': 'keep-alive',
  42. 'Cookie': 'JSESSIONID="ajax:3110815806591265107"; bcookie="v=2&869cbbe1-3640-4af7-88d2-d1e75fe8e06b"; bscookie="v=1&201904071544015a21620d-7089-4805-8705-f73f780a014fAQEgAjbjoD0_1w7ThfWA9y5x4162VswX"; lidc="b=VB34:g=2116:u=5:i=1554655322:t=1554737664:s=AQGIKDAeEASURAv1cbUfd4BWPmSkiHBO"; _ga=GA1.2.1746016029.1554651844; sl=v=1&qAoOQ; liap=true; li_at=AQEDASsp3JYCpCdJAAABafitS1AAAAFqHLnPUE0AQIeRD5WeR77yejHOvGnyGp-tuZNbalti_wDi9k9GHsjDLGLxxse4Pc2ygnTs-VwBFuZBZvp1ZY-NqMq6m8uFR24Uzlmzl_Xw3XwgUkoWwphbyEeR; _lipt=CwEAAAFp-ImK3PTzGHqieW9zK-0KjZjhsFYLToIXIWdAvBaOXDwzPwapgJqizZTadaADnOYkhJSEcO01pd69sx4kZkX2Jlzz2SGj-nMw5f-PoT02jPrexDLTuwu_zQM0j444rBBC8NagLqkv3qsNZTc8KdQrAo4zMOFwJg1qtSHppFl4Bw9hUyhyCkd6DLdcRyjSB5JXlhG9NML0MZW6oaYS7Yo; UserMatchHistory=AQLzsTPwaD4EQgAAAWn4rWHiwtMRwrOws25Em8kecUu6AstWCIO-0oK-CUVkxPXNqskppMD8aVjsKt2PqonG6R8dGeHYARK8QvZSXk1NIoYVTGaqdh0DaWsYaw9EPzi-ZqKL9bcsPu2uNfnzZOzlP_uQAR_A8Al7qjhH53aP-qXjdFQdY4W-ZJ_DcRuKyi-0kjQMxowyvbekL4dADWVBPiTprlSZD9JEZgmC; _guid=d3bbdde2-eb43-4bbe-a709-030e1362ba1a; li_oatml=AQFAKn4QOjuXrQAAAWn4iF6fiOobuXVCMgQ9ndojogDMZcTellkgTl2cq4BwJQRVAnlJLklHxOuZmLQyDD31yW2QvNjIgVT1; lang=v=2&lang=en-us; _gat=1; AMCV_14215E3D5995C57C0A495C55%40AdobeOrg=-1303530583%7CMCIDTS%7C17994%7CMCMID%7C43258446753252445861663769055965269256%7CMCAAMLH-1555260128%7C6%7CMCAAMB-1555260128%7C6G1ynYcLPuiQxYZrsz_pkqfLG9yMXBpb2zX5dvJdYQJzPXImdj0y%7CMCOPTOUT-1554662528s%7CNONE%7CMCCIDH%7C1101017511%7CvVersion%7C3.3.0; AMCVS_14215E3D5995C57C0A495C55%40AdobeOrg=1; aam_uuid=43795614385153165771718626939627519683',
  43. 'TE': 'Trailers'
  44. }
  45. self.name = input('insert a company name: ')
  46. self.url = f'https://www.linkedin.com/voyager/api/search/cluster?count=10&guides=List(v->COMPANIES)&keywords={self.name}&origin=SWITCH_SEARCH_VERTICAL&q=guided'
  47. self.comp_name = []
  48. self.response = None
  49. self.tryjson = None
  50. self.compnum = None
  51.  
  52. def getCompName(self):
  53. self.response = requests.get(self.url, headers=self.headers2)
  54. self.tryjson = json.loads(self.response.text)
  55. for i in range(10, 20):
  56. self.comp_name.append(self.tryjson['included'][i]['name'])
  57. for i in self.comp_name:
  58. print(i)
  59. Linkedin_contacs.choose_comp(self)
  60.  
  61. def choose_comp(self):
  62. numchosene = int(input('for the first company insert 1 and so on..: ')) + 9
  63. precompnum = self.tryjson['included'][numchosene]['entityUrn']
  64. self.compnum = int(precompnum.split(':')[-1])
  65. Linkedin_contacs.get_contacts(self)
  66.  
  67. def get_contacts(self):
  68. file = open('contactsLogs.txt', 'w')
  69. file.write(f'company name: {self.name}\ncontacts names and title:\n')
  70. for j in range(10):
  71. path2 = f"https://www.linkedin.com/voyager/api/search/blended?count=10&filters=List(currentCompany->{self.compnum},resultType->PEOPLE)&origin=FACETED_SEARCH&q=all&queryContext=List(spellCorrectionEnabled->true,relatedSearchesEnabled->true,kcardTypes->PROFILE|COMPANY)&start={j}0"
  72. response2 = requests.get(path2, headers=self.headers2)
  73. if response2.status_code == 200:
  74. tryjson2 = json.loads(response2.text)
  75. for i in tryjson2['included']:
  76. if 'firstName' in i.keys():
  77. file.write(f"{i['firstName']} {i['lastName']} - {i['occupation']}\n")
  78. file.close()
  79.  
  80.  
  81. # class Mails_scrape:
  82. # def __init__(self):
  83. # self.urls = mails_links.urls
  84. # self.mails = []
  85. # self.reg = f"[A-Za-z0-9\.\+_-]+@{domain}"
  86. #
  87. # def scaner(self):
  88. # for i in self.urls:
  89. # tmpurl = requests.get(i)
  90. # html = tmpurl.text
  91. # soup = BeautifulSoup(html, 'html.parser')
  92. # self.mails.append(re.findall(self.reg, soup.text))
  93. #
  94. # print(self.mails)
  95.  
  96.  
  97. class Resolver:
  98. def __init__(self):
  99. self.domains_list = []
  100. self.domains = ''
  101. self.ip_list = []
  102.  
  103. def resoler(self):
  104. geturls = Scraper()
  105. geturls.scrape(searchstr, 2)
  106. self.domains_list = geturls.urls
  107. file = open('contactsLogs.txt', 'a')
  108. for i in self.domains_list:
  109. if i[-3:] != 'pdf':
  110. i = i.split('https://')[1].split("/")[0]
  111. answers = dns.resolver.query(i, 'A')
  112. for j in answers:
  113. file.write(f"{i} - {j}\n")
  114. self.ip_list.append(j)
  115. file.close()
  116.  
  117.  
  118. start = Linkedin_contacs()
  119. start.getCompName()
  120.  
  121. domain = input('enter domain: ')
  122. searchstr = f'site: {domain} -site:www{domain}'
  123. res = Resolver()
  124. res.resoler()
  125. # mails_links = Scraper()
  126. # mails_links.scrape(domain,5)
  127. # Mails_scrape()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement