Advertisement
jarekmor

librus_playwright_wiadomosci

Sep 13th, 2022
1,099
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.57 KB | Software | 0 0
  1. from playwright.sync_api import Playwright, sync_playwright
  2. from bs4 import BeautifulSoup
  3. import pandas as pd
  4.  
  5. def run(playwright: Playwright) -> None:        
  6.     browser = playwright.firefox.launch(headless=True)          #slow_mo=50
  7.     context = browser.new_context()
  8.      
  9.     # Open new page
  10.     page = context.new_page()
  11.    
  12.     # Go to https://portal.librus.pl/rodzina
  13.     page.goto("https://portal.librus.pl/rodzina")
  14.    
  15.     # Click text=LIBRUS Synergia >> nth=0
  16.     page.locator("text=LIBRUS Synergia").first.click()
  17.    
  18.     # Click nav >> text=Zaloguj >> nth=0
  19.     page.locator("nav >> text=Zaloguj").first.click()
  20.     page.wait_for_url("https://portal.librus.pl/rodzina/synergia/loguj")
  21.    
  22.     # Click input[name="Login"]
  23.     page.frame_locator("#caLoginIframe").locator("input[name=\"Login\"]").click()
  24.    
  25.     # Fill input[name="Login"]
  26.     page.frame_locator("#caLoginIframe").locator("input[name=\"Login\"]").fill("XXXXXXXXXX")       ### login
  27.    
  28.     # Click input[name="Pass"]
  29.     page.frame_locator("#caLoginIframe").locator("input[name=\"Pass\"]").click()
  30.    
  31.     # Fill input[name="Pass"]
  32.     page.frame_locator("#caLoginIframe").locator("input[name=\"Pass\"]").fill("XXXXXXXXXXX")       ### password
  33.    
  34.     # # Click button:has-text("Zaloguj")
  35.     page.frame_locator("#caLoginIframe").locator("button:has-text(\"Zaloguj\")").click()
  36.     page.wait_for_url("https://synergia.librus.pl/rodzic/index")
  37.     page.is_visible('div.container-background')
  38.    
  39.     # # Click #icon-oceny span
  40.     page.locator("#icon-wiadomosci span").click()
  41.     page.wait_for_url("https://synergia.librus.pl/wiadomosci")
  42.  
  43.     # Select tbody
  44.     html_1 = page.inner_html('xpath=/html/body/div[3]/div[3]/form/div/div/table/tbody/tr/td[2]/table[2]/tbody')
  45.     soup = BeautifulSoup(html_1, 'html.parser')
  46.    
  47.     list_temp = []    
  48.     for tr in soup.find_all('tr'):
  49.         slownik = []
  50.         for td in tr.find_all('td'):
  51.             if '\n' not in td.text:
  52.                 slownik.append(td.text)
  53.             else:
  54.                 pass
  55.         list_temp.append(slownik)
  56.    
  57.     mails = []
  58.     for i in list_temp:
  59.         slownik = {}
  60.         slownik = {
  61.             "Autor": i[0],
  62.             "Tytuł": i[1],
  63.             "Data": i[2]
  64.         }
  65.         mails.append(slownik)
  66.    
  67.     df = pd.DataFrame(mails)
  68.  
  69.     with open('df.json', 'w', encoding='utf-8') as file:
  70.         df.to_json(file, force_ascii=False)    
  71.    
  72.     # ---------------------
  73.     context.close()
  74.     browser.close()
  75.    
  76. with sync_playwright() as playwright:
  77.     run(playwright)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement