Advertisement
Guest User

Untitled

a guest
Aug 19th, 2018
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.96 KB | None | 0 0
  1. from selenium import webdriver
  2. from selenium.webdriver.support.ui import WebDriverWait
  3. from selenium.common.exceptions import NoSuchElementException
  4. from selenium.webdriver.common.keys import Keys
  5. from selenium.webdriver.chrome.options import Options as ChromeOptions
  6. import math
  7. import time
  8.  
  9. cd = "D:\\Code\\Python\\chromedriver.exe"
  10. loot = "D:\\Code\\Python\\loot.txt"
  11. chrome_bin = "D:\\Programs\\Google\\Chrome\\Application\\chrome.exe"
  12.  
  13. #1 = "https://www.wowhead.com/battle-for-azeroth-dungeons-overview"
  14. aa = "https://www.wowhead.com/siege-of-boralus"
  15. bb = "https://www.wowhead.com/freehold"
  16. cc = "https://www.wowhead.com/shrine-of-the-storm"
  17. dd = "https://www.wowhead.com/tol-dagor"
  18. ee = "https://www.wowhead.com/waycrest-manor"
  19. ff = "https://www.wowhead.com/ataldazar"
  20. gg = "https://www.wowhead.com/the-motherlode"
  21. hh = "https://www.wowhead.com/temple-of-sethraliss"
  22. ii = "https://www.wowhead.com/the-underrot"
  23. jj = "https://www.wowhead.com/kings-rest"
  24. boe = "https://www.wowhead.com/armor/side:3/quality:4?filter=3:166;1:8;0:0"
  25. uldir = "https://www.wowhead.com/uldir"
  26.  
  27. ToS = "http://www.wowhead.com/tomb-of-sargeras"
  28. legendary = "http://www.wowhead.com/items/quality:5/slot:16:18:5:8:11:10:1:23:7:21:2:22:13:24:15:28:14:4:3:19:25:12:17:6:9?filter=166:2;7:1;0:0"
  29. argus = "http://www.wowhead.com/antorus-the-burning-throne"
  30. seat = "http://www.wowhead.com/the-seat-of-the-triumvirate"
  31. cos = "http://www.wowhead.com/court-of-stars"
  32. coen = "http://www.wowhead.com/cathedral-of-eternal-night"
  33. hov = "http://www.wowhead.com/halls-of-valor"
  34. neth = "http://www.wowhead.com/neltharions-lair"
  35. eye = "http://www.wowhead.com/eye-of-azshara-dungeon"
  36. dht = "http://www.wowhead.com/darkheart-thicket"
  37. vh = "http://www.wowhead.com/violet-hold"
  38. brh = "http://www.wowhead.com/black-rook-hold"
  39. maw = "http://www.wowhead.com/maw-of-souls"
  40. votw = "http://www.wowhead.com/vault-of-the-wardens"
  41. arc = "http://www.wowhead.com/the-arcway"
  42. kara = "http://www.wowhead.com/return-to-karazhan"
  43. leggo = "http://www.wowhead.com/armor/side:3/quality:5?filter=166;7;0#0-1-2"
  44. #HTML ERROR Varnish cache server
  45. #/html/body/p[3]
  46. link = [uldir, boe, bb, aa, cc, dd, ee, ff, gg, hh, ii, jj]
  47. def main():
  48. k = 0
  49. for i in link:
  50. print(i)
  51. sim(i)
  52. ++k
  53.  
  54. time.sleep(500)
  55. print("Sims Complete")
  56.  
  57.  
  58. def sim(link):
  59. opts = ChromeOptions()
  60.  
  61. opts.binary_location = chrome_bin
  62.  
  63. driver = webdriver.Chrome(executable_path=cd)
  64.  
  65.  
  66. while True:
  67. try:
  68. driver.set_window_size(1000, 5000)
  69. driver.set_page_load_timeout(20)
  70. break
  71. except:
  72. time.sleep(1)
  73.  
  74. while True:
  75. try:
  76. driver.get(link)
  77. break
  78. except:
  79. time.sleep(5)
  80.  
  81. time.sleep(10)
  82. #get what kind of wowhead table it
  83. try:
  84. driver.find_element_by_xpath("//*[@id='tab-drops']/div[1]/div[1]/span/b[2]").get_attribute("innerHTML")
  85. dort = 2
  86. except:
  87. dort = 1
  88.  
  89. #filter
  90. try:
  91. time.sleep(1)
  92. if dort == 2:
  93. driver.find_element_by_xpath("//*[@id='tab-drops']/div[1]/span/input").send_keys("3")
  94. elif dort == 1:
  95. driver.find_element_by_xpath("//*[@id='lv-items']/div[1]/span/input").send_keys("3")
  96. print("filter")
  97. time.sleep(1)
  98. except:
  99. print("cant filter")
  100. time.sleep(0)
  101.  
  102. if dort == 2:
  103. try:
  104. count = driver.find_element_by_xpath("//*[@id='tab-drops']/div[1]/div[1]/span/b[3]").get_attribute("innerHTML")
  105. except:
  106. print("cant find countdort == 2")
  107. if dort == 1:
  108. try:
  109. count = driver.find_element_by_xpath("//*[@id='lv-items']/div[1]/div[1]/span/b[3]").get_attribute("innerHTML")
  110. except:
  111. print("cannot find count dort == 1")
  112. print("Count is: " + count)
  113. try:
  114. count = math.ceil(float(count)/50)
  115. except:
  116. print("MATH IS HARD")
  117. time.sleep(999)
  118. print(count)
  119.  
  120. if dort == 2:
  121. perpage = int(driver.find_element_by_xpath("//*[@id='tab-drops']/div[1]/div[1]/span/b[3]").get_attribute("innerHTML"),10)
  122. elif dort == 1:
  123. perpage = int(driver.find_element_by_xpath("//*[@id='lv-items']/div[1]/div[1]/span/b[3]").get_attribute("innerHTML"),10)
  124. print(perpage)
  125.  
  126. lastpage = perpage
  127. while lastpage > 50:
  128. lastpage = lastpage - 50
  129.  
  130. print(lastpage)
  131. if perpage > 50:
  132. #do all the full 50 page lists
  133. for j in range (1, int(count)):
  134.  
  135. print("Page " + str(count) + " of " + str(j))
  136.  
  137. for i in range (1, 51):
  138. print("columns " + str(perpage) + " of " + str(i))
  139. parcePage(i, driver, dort, link)
  140.  
  141. try:
  142. print("Next Page")
  143. print(j)
  144. print(int(count))
  145. while j != int(count):
  146. if dort == 2:
  147. print("click tab-drops")
  148. driver.find_element_by_xpath("//*[@id='tab-drops']/div[1]/div[1]/a[3]").click()
  149. print("clicked ")
  150.  
  151. break
  152. elif dort == 1:
  153. print("click lv-items")
  154. driver.find_element_by_xpath("//*[@id='lv-items']/div[1]/div[1]/a[3]").click()
  155. print("clicked ")
  156.  
  157. break
  158.  
  159. except:
  160. print("Failed to click NEXT retrying")
  161. time.sleep(1)
  162.  
  163. time.sleep(5)
  164.  
  165. for i in range (1, lastpage+1):
  166. print("columns " + str(perpage) + " of " + str(i))
  167. parcePage(i, driver, dort, link)
  168.  
  169. print("quitting")
  170. print("Page " + str(count))
  171. time.sleep(5)
  172. driver.quit()
  173.  
  174. def dungLoc(driver):
  175. while True:
  176. try:
  177. driver.switch_to_window(driver.window_handles[1])
  178. driver.find_element_by_xpath("//*[@id='tab-dropped-by']/div[2]/table/thead/tr/th[2]").click()
  179. srcItem = driver.find_element_by_xpath("//*[@id='tab-dropped-by']/div[2]/table/tbody/tr[1]/td[4]/a").get_attribute("innerHTML")
  180. break
  181. except:
  182. time.sleep(1)
  183. driver.close()
  184. driver.switch_to_window(driver.window_handles[0])
  185. return srcItem
  186.  
  187.  
  188. def tosBoss(driver):
  189. time.sleep(3)
  190. driver.switch_to_window(driver.window_handles[1])
  191. try:
  192. srcItem = driver.find_element_by_xpath("//*[@id='tab-dropped-by']/div[2]/table/tbody/tr/td[2]/a").get_attribute("innerHTML")
  193. except:
  194. driver.close()
  195. driver.switch_to_window(driver.window_handles[0])
  196. return "any"
  197.  
  198.  
  199. driver.close()
  200. driver.switch_to_window(driver.window_handles[0])
  201. return srcItem
  202.  
  203. def trinketType(driver):
  204. while True:
  205. print("TRINKET TYPE")
  206. time.sleep(2)
  207. try:
  208.  
  209. driver.switch_to_window(driver.window_handles[1])
  210.  
  211. class1 = driver.find_element_by_xpath("//*[@id='lootspecs']/li[1]").get_attribute("innerHTML")
  212.  
  213. class2 = driver.find_element_by_xpath("//*[@id='lootspecs']/li[3]").get_attribute("innerHTML")
  214.  
  215. try:
  216. class3 = driver.find_element_by_xpath("//*[@id='lootspecs']/li[5]").get_attribute("innerHTML")
  217. except:
  218. class3 = driver.find_element_by_xpath("//*[@id='lootspecs']/li[2]").get_attribute("innerHTML")
  219.  
  220. break
  221.  
  222. except:
  223. try:
  224. print("all specs")
  225. q = 'all'
  226. driver.close()
  227. driver.switch_to_window(driver.window_handles[0])
  228. return q
  229. except:
  230. time.sleep(1)
  231.  
  232.  
  233.  
  234. class1 = class1.split(":")
  235. class1 = class1[0] #First
  236.  
  237. class2 = class2.split(":")
  238. class2 = class2[0] #Third
  239.  
  240. class3 = class3.split(":")
  241. class3 = class3[0] #Fifth
  242.  
  243. #Spellcaster mage druid hunter priest
  244. #Healer Pally Druid Priest sham mook
  245. #Tank Pally Warrior Druid DK Moonk
  246. #Mele Pally Warrior Druid DK Hunter
  247.  
  248. c = 'error'
  249.  
  250. if class1 == 'Paladin':
  251. if class3 == 'Monk':
  252. c = 'tank'
  253. if class2 == 'Priest':
  254. c = 'healer'
  255. if class3 == 'Warrior':
  256. c = 'strength'
  257.  
  258. if class3 == 'Hunter':
  259. c = 'melee'
  260.  
  261. if class1 == 'Druid':
  262. c = 'agility'
  263.  
  264. if class1 == 'Mage':
  265. c = 'ranged'
  266.  
  267.  
  268. #close
  269. driver.close();
  270. driver.switch_to_window(driver.window_handles[0])
  271. return c
  272.  
  273. def parcePage(i, driver, dort, link):
  274. #Find data
  275. if dort == 2:
  276. print("Trying Drops tab-drops")
  277. modeCheck = driver.find_element_by_xpath("//*[@id='tab-drops']/div[2]/table/thead/tr/th[3]/div/a/span/span").get_attribute("innerHTML")
  278. if modeCheck == "Mode":
  279. print("MODE RULES")
  280.  
  281. name = ["//*[@id='tab-drops']/div[2]/table/tbody/tr[", str(i), "]/td[3]/div/a"]
  282. nString = "".join(name)
  283.  
  284. #tier //*[@id="tab-drops"]/div[2]/table/tbody/tr[1]/td[11]/a
  285. type = ["//*[@id='tab-drops']/div[2]/table/tbody/tr[", str(i), "]]/td[11]/a"]
  286. tierString = "".join(type)
  287.  
  288. type = ["//*[@id='tab-drops']/div[2]/table/tbody/tr[", str(i), "]/td[11]/a"] #Type
  289. tString = "".join(type)
  290.  
  291. slot = ["//*[@id='tab-drops']/div[2]/table/tbody/tr[", str(i), "]/td[9]"] #Slot
  292. sString = "".join(slot)
  293.  
  294. gSource = ["//*[@id='tab-drops']/div[2]/table/tbody/tr[", str(i), "]/td[10]/div/a"] #location
  295. gString = "".join(gSource)
  296.  
  297. source = ["//*[@id='tab-drops']/div[2]/table/tbody/tr[", str(i), "]td[10]/div/a"] #sub location of existant
  298. eString = "".join(source)
  299. else:
  300. print("NORMAL RULES")
  301.  
  302. name = ["//*[@id='tab-drops']/div[2]/table/tbody/tr[", str(i), "]/td[3]/div/a"]
  303. nString = "".join(name)
  304.  
  305. #tier //*[@id="tab-drops"]/div[2]/table/tbody/tr[1]/td[11]/a
  306. type = ["//*[@id='tab-drops']/div[2]/table/tbody/tr[", str(i), "]]/td[11]/a"]
  307. tierString = "".join(type)
  308.  
  309. type = ["//*[@id='tab-drops']/div[2]/table/tbody/tr[", str(i), "]/td[10]/a"] #Type
  310. tString = "".join(type)
  311.  
  312. slot = ["//*[@id='tab-drops']/div[2]/table/tbody/tr[", str(i), "]/td[8]"] #Slot
  313. sString = "".join(slot)
  314.  
  315. gSource = ["//*[@id='tab-drops']/div[2]/table/tbody/tr[", str(i), "]/td[9]"] #location
  316. gString = "".join(gSource)
  317.  
  318. source = ["//*[@id='tab-drops']/div[2]/table/tbody/tr[", str(i), "]/td[9]/div/a"] #sub location of existant
  319. eString = "".join(source)
  320.  
  321. elif dort == 1:
  322. print("Trying Items lv-items")
  323. modeCheck = driver.find_element_by_xpath("//*[@id='lv-items']/div[2]/table/thead/tr/th[3]/div/a/span/span").get_attribute("innerHTML")
  324. if modeCheck == "Mode":
  325. print("MODE RULES")
  326.  
  327. name = ["//*[@id='lv-items']/div[2]/table/tbody/tr[", str(i), "]/td[3]/div/a"]
  328. nString = "".join(name)
  329.  
  330. #tier //*[@id="lv-items"]/div[2]/table/tbody/tr[1]/td[11]/a
  331. type = ["//*[@id='lv-items']/div[2]/table/tbody/tr[", str(i), "]]/td[11]/a"]
  332. tierString = "".join(type)
  333.  
  334. type = ["//*[@id='lv-items']/div[2]/table/tbody/tr[", str(i), "]/td[11]/a"] #Type
  335. tString = "".join(type)
  336.  
  337. slot = ["//*[@id='lv-items']/div[2]/table/tbody/tr[", str(i), "]/td[9]"] #Slot
  338. sString = "".join(slot)
  339.  
  340. gSource = ["//*[@id='lv-items']/div[2]/table/tbody/tr[", str(i), "]/td[10]/div/a"] #location
  341. gString = "".join(gSource)
  342.  
  343. source = ["//*[@id='lv-items']/div[2]/table/tbody/tr[", str(i), "]td[10]/div/a"] #sub location of existant
  344. eString = "".join(source)
  345. else:
  346. print("NORMAL RULES")
  347.  
  348. name = ["//*[@id='lv-items']/div[2]/table/tbody/tr[", str(i), "]/td[3]/div/a"]
  349. nString = "".join(name)
  350.  
  351. #tier //*[@id="lv-items"]/div[2]/table/tbody/tr[1]/td[11]/a
  352. #//*[@id='lv-items']/div[2]/table/tbody/tr[1]/td[11]
  353. type = ["//*[@id='lv-items']/div[2]/table/tbody/tr[", str(i), "]/td[11]/a"]
  354. tierString = "".join(type)
  355.  
  356. type = ["//*[@id='lv-items']/div[2]/table/tbody/tr[", str(i), "]/td[10]/a"] #Type
  357. tString = "".join(type)
  358.  
  359. slot = ["//*[@id='lv-items']/div[2]/table/tbody/tr[", str(i), "]/td[9]"] #Slot
  360. sString = "".join(slot)
  361.  
  362. gSource = ["//*[@id='lv-items']/div[2]/table/tbody/tr[", str(i), "]/td[9]"] #location
  363. gString = "".join(gSource)
  364.  
  365. source = ["//*[@id='lv-items']/div[2]/table/tbody/tr[", str(i), "]/td[9]/div/a"] #sub location of existant
  366. eString = "".join(source)
  367.  
  368. elif dort == 0:
  369. exit(-1)
  370.  
  371. #grab data
  372. #id
  373. while True:
  374. try:
  375. idItem = driver.find_element_by_xpath(nString).get_attribute("href")
  376. print(idItem)
  377. id = idItem.split("=")
  378. id = id[1].split("&")
  379. idItem = id[0]
  380. print(idItem)
  381. break
  382. except:
  383. print("Failed to grab the ID retrying")
  384. print(nString)
  385. time.sleep(1)
  386. #name
  387. while True:
  388. try:
  389. nItem = driver.find_element_by_xpath(nString).get_attribute("innerHTML") #name
  390.  
  391. nItem = nItem.replace(" ", "_")
  392. nItem = nItem.replace("'", "''")
  393. print(nItem)
  394. break
  395. except:
  396. print("Failed to grab the name retrying")
  397. time.sleep(1)
  398. #Slot
  399. while True:
  400. try:
  401. sItem = driver.find_element_by_xpath(sString).get_attribute("innerHTML")
  402. print(sItem)
  403. break
  404. except:
  405. print("Failed to grab the slot retrying")
  406. time.sleep(1)
  407. #type
  408. while True:
  409. #Tier
  410. try:
  411. try:
  412. print("tierString")
  413. print(tierString)
  414. tItem = driver.find_element_by_xpath(tierString).get_attribute("innerHTML")
  415. print("split")
  416. type = tItem.split()
  417. tItem = type[0]
  418. break
  419. except:
  420. print("tString")
  421. tItem = driver.find_element_by_xpath(tString).get_attribute("innerHTML")
  422. print("split")
  423. type = tItem.split()
  424. tItem = type[0]
  425. break
  426. except:
  427. print("Failed to something in Tier")
  428. time.sleep(5)
  429.  
  430.  
  431. #Trinket Type
  432. if sItem == 'Trinket':
  433. print("trinket found")
  434. driver.find_element_by_xpath(nString).send_keys(Keys.CONTROL + Keys.SHIFT + Keys.RETURN)
  435. tItem = trinketType(driver)
  436.  
  437.  
  438. print(tItem)
  439.  
  440. #source
  441.  
  442. print("SOURCE")
  443.  
  444. #format what it found
  445.  
  446. if dort == 2:
  447. driver.find_element_by_xpath(nString).send_keys(Keys.CONTROL + Keys.SHIFT + Keys.RETURN)
  448. srcItem = tosBoss(driver)
  449. elif dort == 1:
  450. srcItem = "drop"
  451.  
  452. srcItem = srcItem.replace(" ", "_")
  453. srcItem = srcItem.replace("'", "''")
  454. print(srcItem)
  455.  
  456. wtf(idItem, nItem.lower(), tItem.lower(), sItem.lower(), srcItem.lower(), link)
  457.  
  458. def wtf(itemid, name, type, slot, source, link):
  459.  
  460. link = link.split("/")
  461. link = link[3]
  462. link = link.replace("-","_")
  463.  
  464. f = open(link + ".txt", 'a')
  465. # //ITEM ID, ITEM NAME, ARMOR TYPE, SLOT, BOSS, TABLE
  466. print("\"INSERT INTO "+ link +" VALUES(" + itemid + ", '" + name + "', '" + type + "', '" + slot + "', '" + source + "', '" + link + "');\"" + "\n")
  467.  
  468. f.write("\"INSERT INTO "+ link +" VALUES(" + itemid + ", '" + name + "', '" + type + "', '" + slot + "', '" + source + "', '" + link + "');\"" + "\n")
  469.  
  470. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement