Advertisement
Guest User

Untitled

a guest
Jan 23rd, 2020
128
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.49 KB | None | 0 0
  1. # File Objects
  2. import time
  3. from selenium import webdriver
  4. from selenium.webdriver.common.keys import Keys
  5. from selenium.webdriver.common.action_chains import ActionChains
  6. import pyperclip
  7. import pandas as pd
  8. from nltk import tokenize
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.support import expected_conditions as EC
  11. from selenium.common.exceptions import TimeoutException
  12. from selenium.webdriver.common.by import By
  13. from selenium.common.exceptions import ElementNotVisibleException
  14.  
  15.  
  16.  
  17. chrome_options = webdriver.ChromeOptions()
  18. df = pd.read_csv("beerv.csv")
  19.  
  20. print(df.final)
  21. print(df.final[1])
  22. print(int(len((df.final[1]))/5000)+1)
  23.  
  24.  
  25. n=0
  26. header = ["id","link","titulo","final","traducido"]
  27.  
  28. lengthcsv =df.shape[0]
  29. with webdriver.Firefox(executable_path="/root/Downloads/geckodriver") as driver:
  30.     driver.get("https://www.deepl.com/es/translator")
  31.     for ii in range(lengthcsv):
  32.           if pd.isna(df.traducido[ii])==True:
  33.             print("voy por la ", str(ii))
  34.             traducir=[]
  35.             print(df.final[ii])
  36.             parts = int(len((df.final[ii]))/4000)+1
  37.             print(len(df.final[ii]))
  38.             print(parts)
  39.             phrases = tokenize.sent_tokenize(df.final[ii])
  40.             print(phrases)
  41.             phrases_num =len(phrases)
  42.             rel_num = len(phrases)//parts
  43.             print("esto es el numbero total"+ str(rel_num))
  44.             traduccion =[]
  45.             actions = ActionChains(driver)
  46.             time.sleep(2)
  47.             print(len(phrases))
  48.             index_2 = 1
  49.             index = 0
  50.  
  51.             for i in range(int(parts)):
  52.                 index_2 = index_2 + rel_num
  53.                 t = driver.find_element_by_xpath("//*[@dl-test='translator-source-input']")
  54.                 print("index 1" , int(index))
  55.                 print("index 2" , int(index_2))
  56.                 print(len(phrases))
  57.                 print(str("".join(phrases[index:index_2])))
  58.                 pyperclip.copy(str("".join(phrases[index:index_2])))
  59.                 t.send_keys(Keys.CONTROL, "v")
  60.                 index = index_2
  61.                 element = WebDriverWait(driver, 10).until(
  62.                 EC.invisibility_of_element_located((By.XPATH, "//*[@class='lmt__progress_popup']")))
  63.                 time.sleep(18)
  64.                 t.send_keys(Keys.TAB)
  65.                 time.sleep(2)
  66.                 ActionChains(driver).key_down(Keys.CONTROL).send_keys('a').key_up(Keys.CONTROL).perform()
  67.                 time.sleep(2)
  68.                 ActionChains(driver).key_down(Keys.CONTROL).send_keys('c').key_up(Keys.CONTROL).perform()
  69.                 s = pyperclip.paste()
  70.                 s = s.replace("Traducción realizada con la versión gratuita del traductor www.DeepL.com/Translator", "").replace('["',"").replace('["',"")
  71.                 traduccion.append(s)
  72.                 time.sleep(5)
  73.                 t = driver.find_element_by_xpath("//*[@dl-test='translator-source-input']")
  74.                 t.send_keys(Keys.CONTROL, "a")
  75.                 time.sleep(2)
  76.                 t.send_keys(Keys.DELETE)
  77.                 time.sleep(3)
  78.                 time.sleep(2)
  79.                 driver.delete_all_cookies()
  80.             print("Traduccion completa: " ,traduccion)
  81.             df.traducido[ii] = str(traduccion)
  82.             df.to_csv("beerv_traducido.csv", columns=header)
  83.  
  84.     headers = ["title", "final", "category", "for_mimad", "link", "imagss", "tagsgoogle", "traducido"]
  85.     df.to_csv( "beersecure.csv", columns=headers)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement