Advertisement
isakura132

.

Jan 14th, 2024
1,949
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.30 KB | None | 0 0
  1. cookie = {
  2.     "remember_token_leadgenie_v2": "eyJfcmFpbHMiOnsibWVzc2FnZSI6IklqWTBZbVpoWm1VME9ERm1aVEEwTURCbVl6QTFZVGhqT1Y4MlpESm1NV1JqTkdVNVl6bGxOMlpsTWpkaE1XWTBaR1l6TkRVMk5UWmxaQ0k9IiwiZXhwIjoiMjAyNC0wMS0zMFQxNjo0Mjo0Ny42NDVaIiwicHVyIjoiY29va2llLnJlbWVtYmVyX3Rva2VuX2xlYWRnZW5pZV92MiJ9fQ%3D%3D--989c54891a88a51bd56255fe3047e604112e32e6",
  3. }
  4. # async def open_page(browser: browser.Browser, row: List):
  5. async def worker(queue: asyncio.Queue, worker_id):
  6.     new_rows = []
  7.     while True:
  8.         if queue.empty():
  9.             return new_rows
  10.         print(f'Worker {worker_id}')
  11.         row = await queue.get()
  12.         company_id = row[-1]
  13.         if company_id is None:
  14.             break
  15.         url = "https://app.apollo.io/api/v1/news_articles/search"
  16.        
  17.         payload = {
  18.             "organization_ids": [str(company_id)],
  19.             "categories": [
  20.                 "leadership", "acquisition", "expansion", "new_offering", "investment",
  21.                 "cost_cutting", "partnership", "recognition", "contract", "corporate_challenges", "relational"
  22.             ],
  23.             "page": 1,
  24.             "display_mode": "explorer_mode",
  25.             "per_page": 10,
  26.             "open_factor_names": [],
  27.             "num_fetch_result": 1,
  28.             "show_suggestions": False,
  29.             "ui_finder_random_seed": "5o7v9tfe37",
  30.             "cacheKey": 1705253056406
  31.         }
  32.  
  33.         headers = {
  34.             'authority': 'app.apollo.io',
  35.             'accept': '*/*',
  36.             'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7',
  37.             'content-type': 'application/json',
  38.             'cookie': 'YOUR_COOKIES_HERE',  # Замените на свои куки
  39.             'origin': 'https://app.apollo.io',
  40.             'referer': 'https://app.apollo.io/',
  41.             'sec-ch-ua': '"Opera GX";v="105", "Chromium";v="119", "Not?A_Brand";v="24"',
  42.             'sec-ch-ua-mobile': '?0',
  43.             'sec-ch-ua-platform': '"Windows"',
  44.             'sec-fetch-dest': 'empty',
  45.             'sec-fetch-mode': 'cors',
  46.             'sec-fetch-site': 'same-origin',
  47.             'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 OPR/105.0.0.0',
  48.             'x-csrf-token': 'YOUR_CSRF_TOKEN_HERE'  # Замените на свой CSRF токен
  49.         }
  50.         info_url = f'https://app.apollo.io/api/v1/accounts/{company_id}'
  51.         try:
  52.             async with ClientSession(cookies=cookie) as session:
  53.                 async with session.post(url, headers=headers, data=json.dumps(payload)) as r:
  54.                     company_news = await r.json()
  55.                 async with session.get(info_url, headers=headers) as r:
  56.                     company_information = await r.json()
  57.                 description = company_information.get('seo_description', None)
  58.                 if description is None:
  59.                     description = company_information.get('short_description', None)
  60.            
  61.             news = company_news.get('news_articles', None)
  62.             row.remove(company_id)
  63.             if news is None or len(news)==0:
  64.                 new_rows.append(description)
  65.             else:
  66.                 new = news[0]
  67.                 title = new['title']
  68.                 link = new['url']
  69.                 input_time_str = new['published_at']
  70.                 input_time = datetime.fromisoformat(input_time_str[:-6])  # Убираем последние 6 символов (смещение часового пояса)
  71.                 date = input_time.strftime("%d/%m/%Y")
  72.                 print(f'Worker {worker_id} - {company_id}')
  73.                 new_rows.append([description, f'Заголовок - {title},\nСсылка - {link}', date])
  74.         except:
  75.             continue
  76.         else:
  77.             queue.task_done()
  78.  
  79.  
  80.  
  81. async def company_worker(rows: List[List[str]]):
  82.     queue = asyncio.Queue()
  83.     workers = []
  84.     new_rows = []
  85.     for i in range(5):
  86.         worker_task = asyncio.create_task(worker(queue, i + 1))
  87.         workers.append(worker_task)
  88.     for i in rows:
  89.         await queue.put(i)
  90.     result = await asyncio.gather(*workers)
  91.     print(len(result))
  92.     for worker_rows in result:
  93.         for row in worker_rows:
  94.             new_rows.append(row)
  95.     print(len(new_rows))
  96.     return new_rows
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement