SHARE
TWEET

Untitled

a guest Jul 24th, 2019 286 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import requests
  2. import mysql.connector
  3. from bs4 import BeautifulSoup
  4. from flask import Flask, jsonify, request
  5. from flask_cors import CORS, cross_origin
  6. from mysql.connector import Error
  7. import json
  8. import urllib.parse
  9. from http.cookies import SimpleCookie
  10. from flask import Response
  11.  
  12. app = Flask(__name__)
  13. app.config['JSONIFY_PRETTYPRINT_REGULAR'] = True
  14.  
  15. CORS(app,support_credentials=True)
  16. @cross_origin(supports_credentials=True)
  17.  
  18. connection = mysql.connector.connect(host='localhost',database='smart2',user='root',password='WmuaenPe123')
  19. if(connection.is_connected()):
  20.         cursor = connection.cursor()
  21.         cursor.execute("select database();")
  22.         print("connected to db")
  23.  
  24. def grab_session_data():
  25.     headers = {
  26.         'authority': 'www.linkedin.com',
  27.         'pragma': 'no-cache',
  28.         'cache-control': 'no-cache',
  29.         'upgrade-insecure-requests': '1',
  30.         'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36',
  31.         'dnt': '1',
  32.         'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
  33.         'accept-encoding': 'gzip, deflate, br',
  34.         'accept-language': 'en-US,en;q=0.9',
  35.     }
  36.  
  37.     params = (
  38.         ('session_redirect', '/sales'),
  39.         ('fromSignIn', 'true'),
  40.         ('trk', 'navigator'),
  41.     )
  42.  
  43.     response = requests.get('https://www.linkedin.com/uas/login', headers=headers, params=params)
  44.     print("This is login status..", response.status_code)
  45.     soup = BeautifulSoup(response.text)
  46.     payload = {}
  47.     login_form = soup.find('form', attrs={'class':'login__form'})
  48.     for items in login_form:
  49.         if items.get('name') != None:
  50.             payload[items.get('name')] = items.get('value')
  51.     payload['session_key'] = 'vikki@growthmachines.io'
  52.     payload['session_password'] = 'digital2'
  53.     #payload['session_key'] = 'jaceonmpgh@gmail.com'
  54.     #payload['session_password'] = '123123123vb'
  55.     del payload['fp_data']
  56.     cookies = response.cookies
  57.     cookie = 'lang=v=2&lang=en-us; bcookie={}; bscookie={}; lissc1=1; lissc2=1; lidc={};  JSESSIONID={}'.format(cookies.get('bcookie'), cookies.get('bscookie'), cookies.get('lidc'), cookies.get('JSESSIONID'))
  58.     return {'cookies':cookie, 'form':payload, 'response':response}
  59.  
  60.  
  61. def login_sales_navigator():
  62.     s = requests.Session()
  63.     main_data = grab_session_data()
  64.     sess_cookies = main_data['cookies']
  65.     sess_response = main_data['response']
  66.     headers = {
  67.         'authority': 'www.linkedin.com',
  68.         'pragma': 'no-cache',
  69.         'cache-control': 'no-cache',
  70.         'origin': 'https://www.linkedin.com',
  71.         'upgrade-insecure-requests': '1',
  72.         'dnt': '1',
  73.         'content-type': 'application/x-www-form-urlencoded',
  74.         'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36',
  75.         'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
  76.         'referer': 'https://www.linkedin.com/uas/login?session_redirect=%2Fsales&fromSignIn=true&trk=navigator',
  77.         'accept-encoding': 'gzip, deflate, br',
  78.         'accept-language': 'en-US,en;q=0.9',
  79.         'cookie': sess_cookies
  80.     }
  81.     data = main_data['form']
  82.     response = s.post('https://www.linkedin.com/checkpoint/lg/login-submit', headers=headers, data=data, allow_redirects=False)
  83.     accepted_cookies = 'JSESSIONID="{}"; bcookie="{}"; bscookie="{}"; lissc1=1; lissc2=1; li_at="{}"; liap=true; sl=v=1&2fsuB; lang=v=2&lang=en-us; lidc="{}"; chp_token={}'.format(sess_response.cookies.get('JSESSIONID'), sess_response.cookies.get("bcookie"), sess_response.cookies.get('bscookie'), response.cookies.get('li_at'), sess_response.cookies.get('lidc'), response.cookies.get('chp_token'))
  84.     #try:
  85.     print("chp token..", response.cookies.get('chp_token'))
  86.     core_cookies = fetch_core_cookies(accepted_cookies, response.cookies.get('JSESSIONID'))
  87.     headers['cookie'] = core_cookies
  88.  
  89.     store_header(headers)
  90.     return visit_sales_page(headers)
  91.  
  92.  
  93. def fetch_core_cookies(cookie_headers, sess_id):
  94.     try:
  95.         user_urn = grab_contract_user_info(cookie_headers, sess_id)
  96.     except:
  97.         return cookie_headers
  98.     headers = {
  99.         'cookie':cookie_headers,
  100.         'origin': 'https://www.linkedin.com',
  101.         'accept-encoding': 'gzip, deflate, br',
  102.         'x-li-lang': 'en_US',
  103.         'accept-language': 'en-US,en;q=0.9',
  104.         'x-requested-with': 'XMLHttpRequest',
  105.         'pragma': 'no-cache',
  106.         'cache-control': 'no-cache',
  107.         'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36',
  108.         'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
  109.         'accept': '*/*',
  110.         'csrf-token': str(sess_id),
  111.         'x-restli-protocol-version': '2.0.0',
  112.         'authority': 'www.linkedin.com',
  113.         'referer': 'https://www.linkedin.com/sales/contract-chooser?redirect=%2Fsales',
  114.         'dnt': '1',
  115.     }
  116.     params = (
  117.         ('redirect', '/sales'),
  118.     )
  119.     contract_urn = user_urn['elements'][0]['agnosticIdentity']['com.linkedin.sales.authentication.SalesCapIdentity']['contractUrn']
  120.     seat_urn = user_urn['elements'][0]['agnosticIdentity']['com.linkedin.sales.authentication.SalesCapIdentity']['seatUrn']
  121.     data = '{"viewerDeviceType":"DESKTOP","identity":{"name":"Sales Navigator Professional","agnosticIdentity":{"com.linkedin.sales.authentication.SalesCapIdentity":{"contractUrn":"'+contract_urn+'","seatUrn":"'+seat_urn+'"}}}}'
  122.     response = requests.post('https://www.linkedin.com/sales-api/salesApiAgnosticAuthentication', headers=headers, params=params, data=data)
  123.     print("This is response important.", response)
  124.     cookie_headers = cookie_headers + '; li_a="{}"'.format(response.cookies.get('li_a'))
  125.     print("sofar cookie", cookie_headers)
  126.     return cookie_headers
  127.  
  128.  
  129.  
  130. def grab_contract_user_info(header_cookie, csrf_token):
  131.     print("This is csrf", csrf_token)
  132.     print("This is header cookie", header_cookie)
  133.     headers = {
  134.         'cookie':header_cookie,
  135.         'dnt': '1',
  136.         'accept-encoding': 'gzip, deflate, br',
  137.         'x-li-lang': 'en_US',
  138.         'accept-language': 'en-US,en;q=0.9',
  139.         'x-requested-with': 'XMLHttpRequest',
  140.         'pragma': 'no-cache',
  141.         'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
  142.         'accept': '*/*',
  143.         'cache-control': 'no-cache',
  144.         'x-restli-protocol-version': '2.0.0',
  145.         'authority': 'www.linkedin.com',
  146.         'referer': 'https://www.linkedin.com/sales/login',
  147.         'Csrf-Token': csrf_token
  148.     }
  149.  
  150.     params = (
  151.         ('q', 'findLicensesByCurrentMember'),
  152.     )
  153.  
  154.     response = requests.get('https://www.linkedin.com/sales-api/salesApiIdentity', headers=headers, params=params)
  155.     print("Error is here..", response.text)
  156.     return response.json()
  157.  
  158.  
  159. def visit_sales_page(headers):
  160.     print("This is headers..", json.dumps(headers, indent=4))
  161.     response = requests.get('https://www.linkedin.com/sales', headers=headers)
  162.     soup = BeautifulSoup(response.text)
  163.     return response.text
  164.  
  165.  
  166. @app.route('/auth')
  167. def trigger_pipeline():
  168.     login = login_sales_navigator()
  169.     return login
  170.  
  171. #AQJ2l
  172. def grab_search_cookies(stale_cookies, headers):
  173.     import requests
  174.     params = (
  175.         ('keywords', ''),
  176.         ('count', '0'),
  177.         ('updateHistory', 'false'),
  178.         ('leadBuilder', 'true'),
  179.         ('trk', 'd_nav2_leadbuilder'),
  180.     )
  181.     response = requests.get('https://www.linkedin.com/sales/search', headers=headers, params=params, allow_redirects=False)
  182.     print("status..", response.status_code)
  183.     sdsc = response.cookies.get('sdsc')
  184.     print("other cookies", response.cookies, headers)
  185.     stale_cookie = headers['cookie'] + "; sdsc={}".format(sdsc)
  186.     return response.text
  187.  
  188.  
  189. @app.route('/search_test')
  190. def search_test():
  191.     import requests
  192.  
  193.     headers = {
  194.         'authority': 'www.linkedin.com',
  195.         'pragma': 'no-cache',
  196.         'cache-control': 'no-cache',
  197.         'upgrade-insecure-requests': '1',
  198.         'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
  199.         'dnt': '1',
  200.         'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
  201.         'accept-encoding': 'gzip, deflate, br',
  202.         'accept-language': 'en-US,en;q=0.9',
  203.         'cookie': 'bcookie="v=2&9138d28f-b303-4bdb-8689-a1f4ce4b45eb"; bscookie="v=1&20190614063642f259ecd1-df08-46d2-8885-7e179086011fAQGYsfnU-dyUxV3neYPcZCY68RkCO_mW"; lissc1=1; lissc2=1; li_at=AQEDASelkg8BR3kIAAABa1S0YtsAAAFreMDm21YAEMett0BAiAWFSnRoa_V1dK0zfIsx8DR0AXLUIloU_PXKB5B6RGT56OqtfYw71UHLP9qHBDfArFQ8xNHtCDnl0h8pX05yyyvcKD0wEmlTeG5-YZuJ; liap=true; sl=v=1&ZySx2; JSESSIONID="ajax:3651795386314512406"; lang=v=2&lang=en-us; lidc="b=OB55:g=1665:u=8:i=1560494236:t=1560578171:s=AQEbqjfEikHuh29aDYHLJymNV0AIYjT-"; li_a=AQJ2PTEmc2FsZXNfY2lkPTQ0NjkwOTkwMiUzQSUzQTEzNjUxNjUwMjE9M-bU2G5dw1Rm1R1GNLX44SZK; sdsc=22%3A1%2C1560494237073%7ECAOR%2C08s93eW3j5Z2QJzItP%2B5d%2FieC4tY%3D',
  204.     }
  205.  
  206.     params = (
  207.         ('viewAllFilters', 'true'),
  208.     )
  209.  
  210.     response = requests.get('https://www.linkedin.com/sales/search/people', headers=headers, params=params)
  211.     return str(response)
  212.    
  213. def fetch_seniority():
  214.     headers = auth_header()
  215.     response = requests.get('https://www.linkedin.com/sales-api/salesApiFacetTypeahead?q=query&start=0&type=SENIORITY&count=25', headers=headers)
  216.     list_  = response.json()['elements']
  217.     return list_
  218.  
  219. def fetch_industries():
  220.     headers = auth_header()
  221.     response = requests.get('https://www.linkedin.com/sales-api/salesApiFacetTypeahead?q=query&type=INDUSTRY', headers=headers)
  222.     list_  = response.json()['elements']
  223.     return list_
  224.  
  225. def fetch_schools(query='uc'):
  226.     headers = auth_header()
  227.     response = requests.get('https://www.linkedin.com/sales-api/salesApiFacetTypeahead?q=query&start=0&type=SCHOOL&count=25&query='+query, headers=headers)
  228.     list_  = response.json()['elements']
  229.     return list_
  230.  
  231. def fetch_company_size():
  232.     headers = auth_header()
  233.     response = requests.get('https://www.linkedin.com/sales-api/salesApiFacetTypeahead?q=query&start=0&type=COMPANY_SIZE&count=25', headers=headers)
  234.     list_  = response.json()['elements']
  235.     return list_
  236.  
  237. def fetch_tags():
  238.     headers = auth_header()
  239.     response = requests.get('https://www.linkedin.com/sales-api/salesApiTags?isActiveOnly=true&q=currentSeatHolder&tagType=LEAD_OR_ACCOUNT', headers=headers)
  240.     list_  = response.json()['elements']
  241.     return list_
  242.  
  243. def fetch_geo(query='query'):
  244.     headers = auth_header()
  245.     response = requests.get('https://www.linkedin.com/sales-api/salesApiFacetTypeahead?q=query&query='+query+'&start=0&type=GEO&count=25', headers=headers)
  246.     list_  = response.json()['elements']
  247.     return list_
  248.  
  249. def fetch_functions():
  250.     headers = auth_header()
  251.     response = requests.get('https://www.linkedin.com/sales-api/salesApiFacetTypeahead?q=query&start=0&type=FUNCTION&count=50', headers=headers)
  252.     list_ = response.json()['elements']
  253.     return list_
  254.  
  255. def fetch_company(query='apple'):
  256.     headers = auth_header()
  257.     response = requests.get('https://www.linkedin.com/sales-api/salesApiFacetTypeahead?q=query&start=0&type=COMPANY&count=25&query='+query, headers=headers)
  258.     list_ = response.json()['elements']
  259.     return list_
  260.  
  261. def fetch_groups(query='automotive'):
  262.     headers = auth_header()
  263.     response = requests.get('https://www.linkedin.com/sales-api/salesApiFacetTypeahead?q=query&start=0&type=GROUP&count=25&query='+query, headers=headers)
  264.     list_ = response.json()['elements']
  265.     return list_
  266.  
  267.  
  268. def fetch_members():
  269.     years = [
  270.         {
  271.             'displayValue':'1 Day Ago',
  272.             'id':'1'
  273.         },
  274.         {
  275.             'displayValue':'2-7 Days Ago',
  276.             'id':'2'
  277.         },
  278.         {
  279.             'displayValue':'8-14 Days Ago',
  280.             'id':'3'
  281.         },
  282.         {
  283.             'displayValue':'15-30 Days Ago',
  284.             'id':'4'
  285.         },
  286.         {
  287.             'displayValue':'1-3 months ago',
  288.             'id':'5'
  289.         },
  290.  
  291.     ]
  292.     return years
  293.  
  294. def fetch_years_in_current_positions():
  295.     years = [
  296.         {
  297.             'displayValue':'Less than 1 year',
  298.             'id':'1'
  299.         },
  300.         {
  301.             'displayValue':'1 to 2 year',
  302.             'id':'2'
  303.         },
  304.         {
  305.             'displayValue':'3 to 5 year',
  306.             'id':'3'
  307.         },
  308.         {
  309.             'displayValue':'6 to 10 year',
  310.             'id':'4'
  311.         },
  312.         {
  313.             'displayValue':'More than 10 year',
  314.             'id':'5'
  315.         },
  316.     ]
  317.     return years
  318.  
  319. def fetch_years_in_current_company():
  320.     years = [
  321.         {
  322.             'displayValue':'Less than 1 year',
  323.             'id':'1'
  324.         },
  325.         {
  326.             'displayValue':'1 to 2 year',
  327.             'id':'2'
  328.         },
  329.         {
  330.             'displayValue':'3 to 5 year',
  331.             'id':'3'
  332.         },
  333.         {
  334.             'displayValue':'6 to 10 year',
  335.             'id':'4'
  336.         },
  337.         {
  338.             'displayValue':'More than 10 year',
  339.             'id':'5'
  340.         },
  341.     ]
  342.     return years
  343.  
  344. def fetch_years_of_experience():
  345.     years = [
  346.         {
  347.             'displayValue':'Less than 1 year',
  348.             'id':'1'
  349.         },
  350.         {
  351.             'displayValue':'1 to 2 year',
  352.             'id':'2'
  353.         },
  354.         {
  355.             'displayValue':'3 to 5 year',
  356.             'id':'3'
  357.         },
  358.         {
  359.             'displayValue':'6 to 10 year',
  360.             'id':'4'
  361.         },
  362.         {
  363.             'displayValue':'More than 10 year',
  364.             'id':'5'
  365.         },
  366.     ]
  367.     return years
  368.  
  369. @app.route('/api')
  370. def api_endpoints():
  371.     data = {
  372.         '/filters' : "All the filters available",
  373.         '/school' : "Returns school data",
  374.         '/region':"Returns region data.",
  375.         '/seniorities' : "Returns seniority data",
  376.         '/industries':"Returns industry data",
  377.         '/tags':"Returns tags data",
  378.         '/company_sizes' : "Returns company sizes data",
  379.         '/current_position_years' : "Returns current position in years data.",
  380.         '/current_company_years' : "Returns current company in years data.",
  381.         '/current_years_of_experience':"Returns current years_of_experience.",
  382.         '/function':"Returns function data.",
  383.         '/became_member':"Returns became member data.",
  384.         '/company':"Returns company data.",
  385.         '/groups':"Returns groups data.",
  386.         "/api/scraper" : {
  387.             "params": {
  388.                 "keyword":"keywords ex: Employee Benefits",
  389.                 "region":"/region?query=United States",
  390.                 "industry":"/industries",
  391.                 "school":"/school?query=UC Berkeley",
  392.                 "profile_language":"Profile Language",
  393.                 "seniority":"Seniority",
  394.                 "years_in_experience":"/current_years_of_experience",
  395.                 "years_in_current_company":"/current_company_years",
  396.                 "years_of_experience":"/current_position_years",
  397.                 "function":"/function",
  398.                 "title":"Linkedin Title",
  399.                 "company":"/company?query=Google",
  400.                 "current_company":"Current company query work for",
  401.                 "member_since":"/became_member",
  402.                 "group":"/groups",
  403.                 "tag":"/tags",
  404.                 "company_size":"/company_sizes",
  405.                 "start":"10 (10 is the 2nd page, where as 0 is the first and is the default value."
  406.             }
  407.         }
  408.     }
  409.     return jsonify(results=data)
  410.  
  411.  
  412. @app.route('/filters')
  413. def fetch_all_filters():
  414.     seniorities = fetch_seniority()
  415.     geos = fetch_geo()
  416.     industries = fetch_industries()
  417.     schools = fetch_schools()
  418.     company_size = fetch_company_size()
  419.     tags = fetch_tags()
  420.     years_of_experience = fetch_years_of_experience()
  421.     years_in_current_company = fetch_years_of_experience()
  422.     function = fetch_functions()
  423.     member_since = fetch_members()
  424.     groups = fetch_groups()
  425.     company = fetch_company()
  426.     result_dict = {
  427.         'school':schools,
  428.         'seniority': seniorities,
  429.         'region':geos,
  430.         'industry':industries,
  431.         'company_size':company_size,
  432.         'tags':tags,
  433.         'years_in_experience':years_of_experience,
  434.         'member_since':member_since,
  435.         'groups':groups,
  436.         'years_in_current_company':years_in_current_company,
  437.         'function':function,
  438.         'company':company,     
  439.     }
  440.     return jsonify(result_dict)
  441.  
  442. @app.route('/school')
  443. def get_school_api():
  444.     query = request.args.get('query')
  445.     if query:
  446.         results=fetch_schools(query)
  447.     else:
  448.         results = fetch_schools()
  449.     return jsonify(results=results)
  450.  
  451. @app.route('/region')
  452. def get_region_api():
  453.     query = request.args.get('query')
  454.     if query:
  455.         results=fetch_geo(query)
  456.     else:
  457.         results = fetch_geo()
  458.     return jsonify(results=results)
  459.  
  460. @app.route('/seniorities')
  461. def get_seniorities():
  462.     return jsonify(results=fetch_seniority())
  463.  
  464.  
  465. @app.route('/industries')
  466. def get_industries():
  467.     return jsonify(results=fetch_industries())
  468.  
  469. @app.route('/tags')
  470. def get_tags():
  471.     return jsonify(results=fetch_tags())
  472.  
  473. @app.route('/company_sizes')
  474. def get_company_sizes():
  475.     return jsonify(results=fetch_company_size())
  476.  
  477. @app.route('/current_position_years')
  478. def get_years_in_current_position():
  479.     return jsonify(results=fetch_years_in_current_positions())
  480.  
  481. @app.route('/current_company_years')
  482. def get_years_in_current_company():
  483.     return jsonify(results=fetch_years_in_current_company())   
  484.  
  485. @app.route('/current_years_of_experience')
  486. def get_years_of_experience():
  487.     return jsonify(results=fetch_years_of_experience())
  488.    
  489. @app.route('/functions')
  490. def get_functions():
  491.     return jsonify(results=fetch_functions())
  492.  
  493. @app.route('/company')
  494. def get_company():
  495.     query = request.args.get('query')
  496.     if query:
  497.         results=fetch_company(query)
  498.     else:
  499.         results = fetch_company()
  500.     return jsonify(results=results)
  501.  
  502. @app.route('/became_member')
  503. def get_became_member():
  504.     return jsonify(results=fetch_members())
  505.  
  506. @app.route('/groups')
  507. def get_groups():
  508.     query = request.args.get('query')
  509.     if query:
  510.         results=fetch_groups(query)
  511.     else:
  512.         results = fetch_groups()
  513.     return jsonify(results=results)
  514.  
  515. def prepare_payload_params(payload):
  516.     payload_str = '(includedValues:List('
  517.     for key in payload:
  518.         value = payload[key].split(',')
  519.         for items in value:
  520.             payload_str += '(id:{}),'.format(urllib.parse.quote(items))
  521.     payload_str += '))'
  522.     return payload_str.replace(',))','))')
  523.  
  524.  
  525. def construct_payload(kwargs):
  526.     query = ''
  527.     if kwargs.get('keyword'):
  528.         query += 'keywords:' + urllib.parse.quote(kwargs.get('keyword')) + ','
  529.     if kwargs.get('region'):
  530.         payload = prepare_payload_params(kwargs)
  531.         query += 'geoV2:{},'.format(payload)
  532.     if kwargs.get('industry'):
  533.         payload = prepare_payload_params(kwargs)
  534.         query += 'industryV2:{},'.format(payload)
  535.     if kwargs.get('school'):
  536.         payload = prepare_payload_params(kwargs)
  537.         query += 'schoolV2:{},'.format(payload)
  538.     if kwargs.get('profile_language'):
  539.         query += 'profileLanguage:List({})'.format(urllib.parse.quote(kwargs.get('school'))) + ','
  540.     if kwargs.get('seniority'):
  541.         payload = prepare_payload_params(kwargs)
  542.         query += 'seniorityLevelV2:{},'.format(payload)
  543.     if kwargs.get('years_in_current_position'):
  544.         query += 'tenureAtCurrentPosition:List({})'.format(urllib.parse.quote(kwargs.get('years_in_current_position'))) + ','
  545.     if kwargs.get('years_in_current_company'):
  546.         query += 'tenureAtCurrentCompany:List({})'.format(urllib.parse.quote(kwargs.get('years_in_current_company'))) + ','
  547.     if kwargs.get('years_of_experience'):
  548.         query += 'yearsOfExperience:List({})'.format(urllib.parse.quote(kwargs.get('years_of_experience'))) + ','
  549.     if kwargs.get('function'):
  550.         query += 'functionV2:(includedValues:List((id:{})))'.format(urllib.parse.quote(kwargs.get('function'))) + ','
  551.     if kwargs.get('title'):
  552.         payload = prepare_payload_params(kwargs).replace('(in','in')
  553.         query += 'titleV2:(scope:CURRENT,{},'.format(payload)
  554.     if kwargs.get('company'):
  555.         query += '(scope:CURRENT,includedValues:List((id:{},text:Microsoft)))'.format(urllib.parse.quote(kwargs.get('company'))) + ','
  556.     if kwargs.get('current_company'):
  557.         query += 'currCompany:List((id:{},text:Apple))'.format(urllib.parse.quote(kwargs.get('current_company'))) + ','
  558.     if kwargs.get('member_since'):
  559.         query += 'memberSince:List({})'.format(urllib.parse.quote(kwargs.get('member_since'))) + ','
  560.     if kwargs.get('group'):
  561.         query += 'group:List({})'.format(urllib.parse.quote(kwargs.get('group'))) + ','
  562.     if kwargs.get('tag'):
  563.         query += 'tag:List({})'.format(urllib.parse.quote(kwargs.get('group'))) + ','
  564.     if kwargs.get('company_size'):
  565.         query += 'companySize:List({})'.format(urllib.parse.quote(kwargs.get('company_size'))) + ','
  566.     base_query = '(doFetchHeroCard:false,geoV2:(includedValues:List((id:us%3A0))),'+query+'recentSearchParam:(doLogHistory:true),searchHistoryParam:(doLogHistory:true,id:3344450516),spotlightParam:(selectedType:ALL),trackingParam:(sessionId:wd42NZZ2SZKZkWRANGyatw==),doFetchFilters:true,doFetchHits:true,doFetchSpotlights:true)&decoration=%28degree%2CentityUrn%2CfirstName%2ClastName%2CfullName%2CobjectUrn%2CgeoRegion%2Chighlight%28com.linkedin.sales.profile.profileHighlights.MentionedInTheNewsHighlight!_nt%3Dcom.linkedin.sales.deco.common.profile.highlights.DecoratedMentionedInTheNewsHighlight%28articleName%2Ccount%2Csource%2Curl%29%2Ccom.linkedin.sales.profile.profileHighlights.ProfileHighlights!_nt%3Dcom.linkedin.sales.deco.common.profile.highlights.DecoratedProfileHighlights%28sharedConnection%28sharedConnectionUrns*~fs_salesProfile%28entityUrn%2CfirstName%2ClastName%2CfullName%2CpictureInfo%2CprofilePictureDisplayImage%29%29%2CteamlinkInfo%28totalCount%29%2CsharedEducations*%28overlapInfo%2CentityUrn~fs_salesSchool%28entityUrn%2ClogoId%2Cname%2Curl%2CschoolPictureDisplayImage%29%29%2CsharedExperiences*%28overlapInfo%2CentityUrn~fs_salesCompany%28companyPictureDisplayImage%2CentityUrn%2Cname%2CpictureInfo%29%29%2CsharedGroups*%28entityUrn~fs_salesGroup%28entityUrn%2Cname%2ClargeLogoId%2CsmallLogoId%2CgroupPictureDisplayImage%29%29%29%2Ccom.linkedin.sales.profile.profileHighlights.RecentPositionChangeHighlight!_nt%3Dcom.linkedin.sales.deco.common.profile.highlights.DecoratedRecentPositionChangeHighlight%28companyUrn%2CcompanyName%2Cduration%2Ctitle%29%29%2CsharedConnectionsHighlight%2CteamlinkIntrosHighlight%2CopenLink%2Cpremium%2Cteamlink%2CpendingInvitation%2CprofilePictureDisplayImage%2Cviewed%2Csaved%2CcrmStatus%2CtrackingId%2CrecommendedLeadTrackingId%2ClistCount%2CcurrentPositions*%2Ctags*%2CpastPositions*%2CmatchedArticles*%2CfacePiles*%29'
  567.     return base_query
  568.  
  569.  
  570. @app.route('/api/scraper')
  571. def scraper():
  572.     headers = auth_header()
  573.     pipeline_payload = {}
  574.     payload = construct_payload(request.args)
  575.     start = request.args.get('start',0)
  576.     r = requests.get('https://www.linkedin.com/sales-api/salesApiPeopleSearch?q=peopleSearchQuery&start='+str(start)+'&count=25&query='+payload, headers=headers)
  577.     return jsonify(results=r.json())
  578.  
  579.  
  580. @app.route('/')
  581. def api_data():
  582.     import requests
  583.     headers = auth_header()
  584.     keywords_ = request.args.get('keyword',"employee benefits")
  585.     keywords = urllib.parse.quote(keywords_)
  586.     region_ = request.args.get('region',"jo")
  587.     region = urllib.parse.quote(region_)
  588.     start = request.args.get('start', "0")
  589.     end = request.args.get('count', "25")
  590.     region_code= address_code(headers, region)
  591.     seniority = request.args.get('seniority', 10)
  592.     region = urllib.parse.quote(region_code)
  593.     response = requests.get('https://www.linkedin.com/sales-api/salesApiPeopleSearch?q=peopleSearchQuery&start=0&count=25&query=(doFetchHeroCard:false,geoV2:(includedValues:List((id:'+region+'))),keywords:'+keywords+',recentSearchParam:(doLogHistory:true),doFetchFilters:true,doFetchHits:true,doFetchSpotlights:true)&decoration=%28degree%2CentityUrn%2CfirstName%2ClastName%2CfullName%2CobjectUrn%2CgeoRegion%2Chighlight%28com.linkedin.sales.profile.profileHighlights.MentionedInTheNewsHighlight!_nt%3Dcom.linkedin.sales.deco.common.profile.highlights.DecoratedMentionedInTheNewsHighlight%28articleName%2Ccount%2Csource%2Curl%29%2Ccom.linkedin.sales.profile.profileHighlights.ProfileHighlights!_nt%3Dcom.linkedin.sales.deco.common.profile.highlights.DecoratedProfileHighlights%28sharedConnection%28sharedConnectionUrns*~fs_salesProfile%28entityUrn%2CfirstName%2ClastName%2CfullName%2CpictureInfo%2CprofilePictureDisplayImage%29%29%2CteamlinkInfo%28totalCount%29%2CsharedEducations*%28overlapInfo%2CentityUrn~fs_salesSchool%28entityUrn%2ClogoId%2Cname%2Curl%2CschoolPictureDisplayImage%29%29%2CsharedExperiences*%28overlapInfo%2CentityUrn~fs_salesCompany%28companyPictureDisplayImage%2CentityUrn%2Cname%2CpictureInfo%29%29%2CsharedGroups*%28entityUrn~fs_salesGroup%28entityUrn%2Cname%2ClargeLogoId%2CsmallLogoId%2CgroupPictureDisplayImage%29%29%29%2Ccom.linkedin.sales.profile.profileHighlights.RecentPositionChangeHighlight!_nt%3Dcom.linkedin.sales.deco.common.profile.highlights.DecoratedRecentPositionChangeHighlight%28companyUrn%2CcompanyName%2Cduration%2Ctitle%29%29%2CsharedConnectionsHighlight%2CteamlinkIntrosHighlight%2CopenLink%2Cpremium%2Cteamlink%2CpendingInvitation%2CprofilePictureDisplayImage%2Cviewed%2Csaved%2CcrmStatus%2CtrackingId%2CrecommendedLeadTrackingId%2ClistCount%2CcurrentPositions*%2Ctags*%2CpastPositions*%2CmatchedArticles*%2CfacePiles*%29', headers=headers)
  594.     return jsonify(results=response.json())
  595.  
  596. def auth_api_validate_cookie():
  597.     headers_ = load_headers()
  598.     cookies_to_dict_ = cookies_to_dict(headers_['cookie'])
  599.     cookie_format = 'bcookie="v={}"; bscookie="v={}"; li_at="{}"; JSESSIONID="{}"; lidc="{}"; li_a="{}"'.format(cookies_to_dict_['bcookie'], cookies_to_dict_['bscookie'],cookies_to_dict_['li_at'], cookies_to_dict_['JSESSIONID'], cookies_to_dict_['lidc'], cookies_to_dict_['li_a'])
  600.     return {'cookie_format':cookie_format, 'cookies':cookies_to_dict_}
  601.  
  602. def auth_header():
  603.     auth_cookies = auth_api_validate_cookie()
  604.     headers = {
  605.         'cookie':auth_cookies['cookie_format'],
  606.         'dnt': '1',
  607.         'accept-encoding': 'gzip, deflate, br',
  608.         'accept-language': 'en-US,en;q=0.9',
  609.         'x-requested-with': 'XMLHttpRequest',
  610.         'pragma': 'no-cache',
  611.         'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
  612.         'accept': '*/*',
  613.         'cache-control': 'no-cache',
  614.         'x-restli-protocol-version': '2.0.0',
  615.         'authority': 'www.linkedin.com',
  616.         'csrf-token': auth_cookies['cookies'].get('JSESSIONID')
  617.     }
  618.     return headers
  619.  
  620. def address_code(headers, query):
  621.     headers = auth_header()
  622.     params = (
  623.         ('q', 'query'),
  624.         ('start', '0'),
  625.         ('type', 'GEO'),
  626.         ('count', '25'),
  627.         ('query', 'jo'),
  628.     )
  629.  
  630.     response = requests.get('https://www.linkedin.com/sales-api/salesApiFacetTypeahead', headers=headers, params=params)
  631.     try:
  632.         location_id = response.json()['elements'][0]['id']
  633.         return location_id
  634.     except:
  635.         print("Wooops cookie got expired!!!! Updating the cookie now...")
  636.         login_sales_navigator()
  637.         return "rerun_pipeline"
  638.  
  639.  
  640. def cookies_to_dict(cookie):
  641.     cookies_list = cookie.split(';')
  642.     cookies = {}
  643.     for items in cookies_list:
  644.         key = items.split('=')[0].strip().replace('"','')
  645.         value = items.split('=')[-1].replace('"','')
  646.         cookies[key] = value
  647.     return cookies
  648.  
  649.    
  650. def load_headers():
  651.     headers = json.loads(open('headers.json').read())
  652.     return headers
  653.  
  654. @app.route('/access')
  655. def access():
  656.     headers = load_headers()
  657.     r = requests.get('https://www.linkedin.com/sales', headers=headers)
  658.     return str(r.text)
  659.  
  660.  
  661. def store_header(headers):
  662.     with open('headers.json','w') as outfile:
  663.         json.dump(headers, outfile, indent=4)
  664.  
  665.  
  666.  
  667. def store_output_to_csv(data):
  668.     with open("data.json","w") as outfile:
  669.         json.dump(data, outfile, indent=4)
  670.  
  671. if __name__ == "__main__":
  672.     # keyword = input("Type the keyword you want to query for: ")
  673.     # pg_num_to_scraper = input("Type the page_number you want to scrape data for: ")
  674.     # data = scraper(keyword, pg_num_to_scraper)
  675.     #store_output_to_csv(data)
  676.  
  677.     #print (fetch_data())
  678.     #login_sales_navigator()
  679.     #app.run(use_reloader=True)
  680.     app.run(use_reloader=True,host="213.229.110.6")
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top