Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from lxml import html
- import tormysql
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.common.exceptions import NoSuchElementException
- from selenium.common.exceptions import WebDriverException
- import time
- import sys
- import csv
- from shutil import copyfile
- import requests
- from bs4 import BeautifulSoup
- pool = tormysql.ConnectionPool(
- max_connections = 20, #max open connections
- idle_seconds = 7200, #conntion idle timeout time, 0 is not timeout
- wait_connection_timeout = 3, #wait connection timeout
- host = "unidb.cfavdkskfrrc.us-west-2.rds.amazonaws.com",
- user = "#",
- passwd = "#",
- db = "diplomacy_data",
- charset = "utf8"
- )
- def insert_to_db(game_url):
- print(game_url)
- with (yield pool.Connection()) as conn:
- try:
- with conn.cursor() as cursor:
- yield cursor.execute("INSERT INTO FvA_urls(URL) VALUES('%s')" % game_url)
- except:
- yield conn.rollback()
- else:
- print('committed', game_url)
- yield conn.commit()
- username = "#"
- password = "#"
- login_url = "https://webdiplomacy.net/logon.php"
- driver = webdriver.Chrome()
- driver.get(login_url)
- driver.find_element_by_css_selector('body > div.content.content-follow-on > form > ul > li:nth-child(2) > input[type="text"]').send_keys(username)
- driver.find_element_by_css_selector('body > div.content.content-follow-on > form > ul > li:nth-child(5) > input[type="password"]').send_keys(password)
- driver.find_element_by_css_selector('body > div.content.content-follow-on > form > ul > li:nth-child(10) > input').click()
- url = "https://webdiplomacy.net/gamelistings.php?"
- params = "page-games=1&gamelistType=Finished&searchOn=on"
- driver.get(url + params)
- driver.find_element_by_css_selector('body > div:nth-child(5) > div:nth-child(2) > form > li:nth-child(5) > input[type="radio"]:nth-child(6)').click()
- driver.find_element_by_css_selector('body > div:nth-child(5) > div:nth-child(2) > form > input').click()
- # Get all the URLS
- page_num = 0
- while True:
- page_num += 1
- if page_num % 20 == 0:
- print(page_num)
- a = driver.find_elements(By.XPATH, '/html/body/div[5]/div[3]/div[*]/div[6]/div[2]/a')
- if len(a) < 1:
- pool.close()
- exit()
- else:
- for button in a:
- game_url = button.get_attribute('href')
- insert_to_db(game_url)
- driver.find_element_by_css_selector('body > div:nth-child(6) > div:nth-child(4) > div:nth-child(1) > div:nth-child(2) > a:nth-child(3) > img').click()
Add Comment
Please, Sign In to add comment