Guest User

Untitled

a guest
May 6th, 2018
261
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.62 KB | None | 0 0
  1. from lxml import html
  2. import tormysql
  3. from selenium import webdriver
  4. from selenium.webdriver.common.by import By
  5. from selenium.webdriver.support.ui import WebDriverWait
  6. from selenium.webdriver.support import expected_conditions as EC
  7. from selenium.common.exceptions import NoSuchElementException
  8. from selenium.common.exceptions import WebDriverException
  9. import time
  10. import sys
  11. import csv
  12. from shutil import copyfile
  13.  
  14. import requests
  15. from bs4 import BeautifulSoup
  16.  
  17.  
  18.  
  19. pool = tormysql.ConnectionPool(
  20. max_connections = 20, #max open connections
  21. idle_seconds = 7200, #conntion idle timeout time, 0 is not timeout
  22. wait_connection_timeout = 3, #wait connection timeout
  23. host = "unidb.cfavdkskfrrc.us-west-2.rds.amazonaws.com",
  24. user = "#",
  25. passwd = "#",
  26. db = "diplomacy_data",
  27. charset = "utf8"
  28. )
  29.  
  30. def insert_to_db(game_url):
  31. print(game_url)
  32. with (yield pool.Connection()) as conn:
  33. try:
  34. with conn.cursor() as cursor:
  35. yield cursor.execute("INSERT INTO FvA_urls(URL) VALUES('%s')" % game_url)
  36. except:
  37. yield conn.rollback()
  38. else:
  39. print('committed', game_url)
  40. yield conn.commit()
  41.  
  42.  
  43. username = "#"
  44. password = "#"
  45. login_url = "https://webdiplomacy.net/logon.php"
  46.  
  47. driver = webdriver.Chrome()
  48. driver.get(login_url)
  49. driver.find_element_by_css_selector('body > div.content.content-follow-on > form > ul > li:nth-child(2) > input[type="text"]').send_keys(username)
  50. driver.find_element_by_css_selector('body > div.content.content-follow-on > form > ul > li:nth-child(5) > input[type="password"]').send_keys(password)
  51. driver.find_element_by_css_selector('body > div.content.content-follow-on > form > ul > li:nth-child(10) > input').click()
  52.  
  53. url = "https://webdiplomacy.net/gamelistings.php?"
  54. params = "page-games=1&gamelistType=Finished&searchOn=on"
  55. driver.get(url + params)
  56. driver.find_element_by_css_selector('body > div:nth-child(5) > div:nth-child(2) > form > li:nth-child(5) > input[type="radio"]:nth-child(6)').click()
  57. driver.find_element_by_css_selector('body > div:nth-child(5) > div:nth-child(2) > form > input').click()
  58. # Get all the URLS
  59. page_num = 0
  60. while True:
  61. page_num += 1
  62. if page_num % 20 == 0:
  63. print(page_num)
  64. a = driver.find_elements(By.XPATH, '/html/body/div[5]/div[3]/div[*]/div[6]/div[2]/a')
  65. if len(a) < 1:
  66. pool.close()
  67. exit()
  68. else:
  69. for button in a:
  70. game_url = button.get_attribute('href')
  71. insert_to_db(game_url)
  72. driver.find_element_by_css_selector('body > div:nth-child(6) > div:nth-child(4) > div:nth-child(1) > div:nth-child(2) > a:nth-child(3) > img').click()
Add Comment
Please, Sign In to add comment