Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- cookie = {
- "remember_token_leadgenie_v2": "eyJfcmFpbHMiOnsibWVzc2FnZSI6IklqWTBZbVpoWm1VME9ERm1aVEEwTURCbVl6QTFZVGhqT1Y4MlpESm1NV1JqTkdVNVl6bGxOMlpsTWpkaE1XWTBaR1l6TkRVMk5UWmxaQ0k9IiwiZXhwIjoiMjAyNC0wMS0zMFQxNjo0Mjo0Ny42NDVaIiwicHVyIjoiY29va2llLnJlbWVtYmVyX3Rva2VuX2xlYWRnZW5pZV92MiJ9fQ%3D%3D--989c54891a88a51bd56255fe3047e604112e32e6",
- }
- # async def open_page(browser: browser.Browser, row: List):
- async def worker(queue: asyncio.Queue, worker_id):
- new_rows = []
- while True:
- if queue.empty():
- return new_rows
- print(f'Worker {worker_id}')
- row = await queue.get()
- company_id = row[-1]
- if company_id is None:
- break
- url = "https://app.apollo.io/api/v1/news_articles/search"
- payload = {
- "organization_ids": [str(company_id)],
- "categories": [
- "leadership", "acquisition", "expansion", "new_offering", "investment",
- "cost_cutting", "partnership", "recognition", "contract", "corporate_challenges", "relational"
- ],
- "page": 1,
- "display_mode": "explorer_mode",
- "per_page": 10,
- "open_factor_names": [],
- "num_fetch_result": 1,
- "show_suggestions": False,
- "ui_finder_random_seed": "5o7v9tfe37",
- "cacheKey": 1705253056406
- }
- headers = {
- 'authority': 'app.apollo.io',
- 'accept': '*/*',
- 'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7',
- 'content-type': 'application/json',
- 'cookie': 'YOUR_COOKIES_HERE', # Замените на свои куки
- 'origin': 'https://app.apollo.io',
- 'referer': 'https://app.apollo.io/',
- 'sec-ch-ua': '"Opera GX";v="105", "Chromium";v="119", "Not?A_Brand";v="24"',
- 'sec-ch-ua-mobile': '?0',
- 'sec-ch-ua-platform': '"Windows"',
- 'sec-fetch-dest': 'empty',
- 'sec-fetch-mode': 'cors',
- 'sec-fetch-site': 'same-origin',
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 OPR/105.0.0.0',
- 'x-csrf-token': 'YOUR_CSRF_TOKEN_HERE' # Замените на свой CSRF токен
- }
- info_url = f'https://app.apollo.io/api/v1/accounts/{company_id}'
- try:
- async with ClientSession(cookies=cookie) as session:
- async with session.post(url, headers=headers, data=json.dumps(payload)) as r:
- company_news = await r.json()
- async with session.get(info_url, headers=headers) as r:
- company_information = await r.json()
- description = company_information.get('seo_description', None)
- if description is None:
- description = company_information.get('short_description', None)
- news = company_news.get('news_articles', None)
- row.remove(company_id)
- if news is None or len(news)==0:
- new_rows.append(description)
- else:
- new = news[0]
- title = new['title']
- link = new['url']
- input_time_str = new['published_at']
- input_time = datetime.fromisoformat(input_time_str[:-6]) # Убираем последние 6 символов (смещение часового пояса)
- date = input_time.strftime("%d/%m/%Y")
- print(f'Worker {worker_id} - {company_id}')
- new_rows.append([description, f'Заголовок - {title},\nСсылка - {link}', date])
- except:
- continue
- else:
- queue.task_done()
- async def company_worker(rows: List[List[str]]):
- queue = asyncio.Queue()
- workers = []
- new_rows = []
- for i in range(5):
- worker_task = asyncio.create_task(worker(queue, i + 1))
- workers.append(worker_task)
- for i in rows:
- await queue.put(i)
- result = await asyncio.gather(*workers)
- print(len(result))
- for worker_rows in result:
- for row in worker_rows:
- new_rows.append(row)
- print(len(new_rows))
- return new_rows
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement