Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # File Objects
- import time
- from selenium import webdriver
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.common.action_chains import ActionChains
- import pyperclip
- import pandas as pd
- from nltk import tokenize
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.common.exceptions import TimeoutException
- from selenium.webdriver.common.by import By
- from selenium.common.exceptions import ElementNotVisibleException
- chrome_options = webdriver.ChromeOptions()
- df = pd.read_csv("beerv.csv")
- print(df.final)
- print(df.final[1])
- print(int(len((df.final[1]))/5000)+1)
- n=0
- header = ["id","link","titulo","final","traducido"]
- lengthcsv =df.shape[0]
- with webdriver.Firefox(executable_path="/root/Downloads/geckodriver") as driver:
- driver.get("https://www.deepl.com/es/translator")
- for ii in range(lengthcsv):
- if pd.isna(df.traducido[ii])==True:
- print("voy por la ", str(ii))
- traducir=[]
- print(df.final[ii])
- parts = int(len((df.final[ii]))/4000)+1
- print(len(df.final[ii]))
- print(parts)
- phrases = tokenize.sent_tokenize(df.final[ii])
- print(phrases)
- phrases_num =len(phrases)
- rel_num = len(phrases)//parts
- print("esto es el numbero total"+ str(rel_num))
- traduccion =[]
- actions = ActionChains(driver)
- time.sleep(2)
- print(len(phrases))
- index_2 = 1
- index = 0
- for i in range(int(parts)):
- index_2 = index_2 + rel_num
- t = driver.find_element_by_xpath("//*[@dl-test='translator-source-input']")
- print("index 1" , int(index))
- print("index 2" , int(index_2))
- print(len(phrases))
- print(str("".join(phrases[index:index_2])))
- pyperclip.copy(str("".join(phrases[index:index_2])))
- t.send_keys(Keys.CONTROL, "v")
- index = index_2
- element = WebDriverWait(driver, 10).until(
- EC.invisibility_of_element_located((By.XPATH, "//*[@class='lmt__progress_popup']")))
- time.sleep(18)
- t.send_keys(Keys.TAB)
- time.sleep(2)
- ActionChains(driver).key_down(Keys.CONTROL).send_keys('a').key_up(Keys.CONTROL).perform()
- time.sleep(2)
- ActionChains(driver).key_down(Keys.CONTROL).send_keys('c').key_up(Keys.CONTROL).perform()
- s = pyperclip.paste()
- s = s.replace("Traducción realizada con la versión gratuita del traductor www.DeepL.com/Translator", "").replace('["',"").replace('["',"")
- traduccion.append(s)
- time.sleep(5)
- t = driver.find_element_by_xpath("//*[@dl-test='translator-source-input']")
- t.send_keys(Keys.CONTROL, "a")
- time.sleep(2)
- t.send_keys(Keys.DELETE)
- time.sleep(3)
- time.sleep(2)
- driver.delete_all_cookies()
- print("Traduccion completa: " ,traduccion)
- df.traducido[ii] = str(traduccion)
- df.to_csv("beerv_traducido.csv", columns=header)
- headers = ["title", "final", "category", "for_mimad", "link", "imagss", "tagsgoogle", "traducido"]
- df.to_csv( "beersecure.csv", columns=headers)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement