Advertisement
feasel

parsehtml .py

Oct 16th, 2011
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 15.90 KB | None | 0 0
  1. #######################################################
  2. #parsehtml - Parses html from websites, specifically srl
  3. #July 2011
  4. #
  5. # - Use the methods to get info on the game variable.
  6. #######################################################
  7.  
  8.  
  9. import urllib
  10. import string
  11. import array
  12. import excel
  13.  
  14. #The Game that you want to look up (eventually will be a variable set in a bot cmd)
  15. GAME = "smw"
  16.  
  17. def changeGame(var):
  18. if (getGameName(var) == "DNE"):
  19. return "false"
  20. else:
  21. global GAME
  22. GAME = var
  23. return "true"
  24.  
  25.  
  26.  
  27.  
  28.  
  29.  
  30. ###################################EXCEL FUNCTIONS##################
  31. def createExcel():
  32. #import the srl game page into htmlSource string
  33. sock = urllib.urlopen("http://www.speedrunslive.com/gamelist/game.php?game=" + GAME)
  34. htmlSource = sock.read()
  35. sock.close()
  36. indexes = 0
  37. goallist = []
  38. while (indexes > -1):
  39. #Get the tracked goals put them into list!
  40. indexes = htmlSource.find("raceFeedStatus\"><tr>")
  41. print "index = " + str(indexes)
  42. if (indexes==-1):
  43. break
  44. indexes = indexes + 36
  45. print "index = " + str(indexes)
  46. tempgoal = ""
  47. tempgoal = getWord(htmlSource,indexes,"<")
  48. print tempgoal
  49. goallist.append(tempgoal)
  50. endof = len(htmlSource)
  51. htmlSource = htmlSource[indexes:endof]
  52. print goallist
  53. excel.writeGoals(goallist)
  54.  
  55. def updateRaces():
  56. #The Game that you want to look up (eventually will be a variable set in a bot cmd)
  57. smwlist = []
  58. i = 1000
  59.  
  60. while i<4100:
  61.  
  62. #import the srl game page into htmlSource string
  63. sock = urllib.urlopen("http://speedrunslive.com/races/race.php?race=" + str(i))
  64. htmlSource = sock.read()
  65. sock.close()
  66.  
  67. itthere = htmlSource.find("Super Mario World")
  68. if itthere > 0:
  69. smwlist.append(i)
  70. htmlSource = " "
  71. i = i + 1
  72. print smwlist
  73.  
  74.  
  75.  
  76.  
  77.  
  78.  
  79.  
  80.  
  81. #####################################END EXCEL##############################
  82.  
  83. ####
  84. #Internal functions
  85. #
  86. ####
  87.  
  88. #returns characters from source. Starting from index 'where' to character 'char'
  89. def getWord(source,where,char):
  90.  
  91. letter = ' '
  92. temp = ' '
  93. while letter != char:
  94. print "getWord - while '" + letter + "' != '" + char + "'"
  95. print "looking at index: " + str(where)
  96. letter = source[where]
  97. temp = temp + letter
  98. where = where + 1
  99. if where+1 >= len(source):
  100. return "*NO MORE*"
  101.  
  102.  
  103. temp = temp[0:len(temp) - 1]
  104. where = 0
  105. return temp
  106.  
  107. #Returns the text for a certain race
  108. def getRaceText(race):
  109. #import the srl game page into htmlSource string
  110. sock = urllib.urlopen("http://www.speedrunslive.com/gamelist/game.php?game=" + GAME)
  111. htmlSource = sock.read()
  112. sock.close()
  113. htmlSource = htmlSource.lower()
  114. race = race.lower()
  115. begin = htmlSource.find("<th colspan=\"3\">" + race + "</th>")
  116. length = len(htmlSource)
  117. end = htmlSource.find("</table>", begin, length)
  118. tempracetext = htmlSource[begin:end]
  119. if tempracetext == '':
  120. return -1
  121. return tempracetext
  122.  
  123. #Returns the text for top 10 players (if 10 players do not exist - should put in some code to only grab 9,8,7,6,5,4 etc..)
  124. def getPlayersText(amt):
  125. #import the srl game page into htmlSource string
  126. sock = urllib.urlopen("http://www.speedrunslive.com/gamelist/game.php?game=" + GAME)
  127. htmlSource = sock.read()
  128. sock.close()
  129. begin = htmlSource.find("<col class=\"playersListRating\">")
  130. length = len(htmlSource)
  131. counter = 1
  132. end = htmlSource.find("<td>" + str((amt+1)) + "</td>", begin, length)
  133. tempracetext = htmlSource[begin:end]
  134. print tempracetext
  135. return tempracetext
  136.  
  137. def getAllPlayers():
  138. #import the srl game page into htmlSource string
  139. sock = urllib.urlopen("http://www.speedrunslive.com/gamelist/game.php?game=" + GAME)
  140. htmlSource = sock.read()
  141. sock.close()
  142. begin = htmlSource.find("<col class=\"playersListRating\">")
  143. length = len(htmlSource)
  144. counter = 1
  145. end = htmlSource.find("</div>", begin, length)
  146. tempracetext = htmlSource[begin:end]
  147. print tempracetext
  148. return tempracetext
  149.  
  150.  
  151.  
  152. def createGameList(var):
  153.  
  154.  
  155. #import the srl game page into htmlSource string
  156. sock = urllib.urlopen("http://www.speedrunslive.com/gamelist")
  157. htmlSource2 = sock.read()
  158. sock.close()
  159.  
  160.  
  161. index = htmlSource2.find("<a href=\"game.php?game=")
  162. thegamelist = []
  163. i = 0
  164. prevpointdiff = []
  165. prevpoint = 0
  166. postpointdiff = []
  167. postpoint = 0
  168. gamepointdiff = []
  169. gamepoint = 0
  170.  
  171. #if your main game isn't #1 then create list till you find it
  172. while i < 100:
  173. #position to the next game
  174. index = htmlSource2.find("<a href=\"game.php?game=",index)
  175. index = index+23
  176. tempgame = getWord(htmlSource2,index,"\"").strip()
  177.  
  178. index = index + (len(tempgame))
  179. index = htmlSource2.find("<td title=\"",index)
  180. index = index+11
  181. temppoints = getWord(htmlSource2,index,"p").strip()
  182. print "temppoint =" + str(temppoints)
  183. thegamelist.append([tempgame,temppoints])
  184.  
  185. #if game is #1
  186. if (i==0 and tempgame == GAME):
  187. i = i+1
  188. prevpoint = 1
  189. #position to the next game
  190. index = htmlSource2.find("<a href=\"game.php?game=",index)
  191. index = index+23
  192. tempgame = getWord(htmlSource2,index,"\"").strip()
  193.  
  194. index = index + (len(tempgame))
  195. index = htmlSource2.find("<td title=\"",index)
  196. index = index+11
  197. temppoints = getWord(htmlSource2,index,"p").strip()
  198. print "temppoint =" + str(temppoints)
  199. thegamelist.append([tempgame,temppoints])
  200.  
  201. #need to pass the game then add after game and before game (your game #1)
  202.  
  203.  
  204. #find your game and the game after your game
  205. if (prevpoint == 1 and postpoint == 0):
  206. pointdiff = 0
  207. pointdiff = int(thegamelist[i-1][1]) - int(temppoints.strip())
  208. postpointdiff = [i+1,thegamelist[i][0],pointdiff]
  209. postpoint = 1
  210. gamepointdiff = [i,thegamelist[i-1][0],thegamelist[i-1][1]]
  211. if (i==1):
  212. return [gamepointdiff,postpointdiff]
  213.  
  214.  
  215. #find the game before your game (if it is not first)
  216. if (tempgame == GAME and i <> 0):
  217. pointdiff = 0
  218. pointdiff = int(thegamelist[i-1][1]) - int(temppoints.strip())
  219. prevpointdiff = [i,thegamelist[i-1][0],pointdiff]
  220. prevpoint = 1
  221. i = i+1
  222. print thegamelist
  223. print "PREV POINT DIFF::"
  224. print prevpointdiff
  225. print "POST POINT DIFF::"
  226. print postpointdiff
  227.  
  228.  
  229. return [gamepointdiff,prevpointdiff,postpointdiff]
  230.  
  231.  
  232. def playerExists(racer):
  233.  
  234. #import the srl game page into htmlSource string
  235. sock = urllib.urlopen("http://www.speedrunslive.com/gamelist/game.php?game=" + GAME)
  236. htmlSource = sock.read()
  237. sock.close()
  238.  
  239. astring = getAllPlayers()
  240. astring = htmlSource.lower()
  241. player = astring.find("<a href=\"/profiles/?player=" + racer.lower())
  242. if player == -1:
  243. return 'false'
  244. else:
  245. return 'true'
  246.  
  247. #Really Shitty code that returns the rank of a player. There has got to be a better way to do this.
  248. def getPlayerRank(racer):
  249.  
  250. #import the srl game page into htmlSource string
  251. sock = urllib.urlopen("http://www.speedrunslive.com/gamelist/game.php?game=" + GAME)
  252. htmlSource = sock.read()
  253. sock.close()
  254.  
  255. astring = getAllPlayers()
  256. astring = htmlSource.lower()
  257. player = astring.find("<a href=\"/profiles/?player=" + racer)
  258. if player == -1:
  259. return -1
  260. player = player - 9
  261. while htmlSource[player] <> '#':
  262. player = player - 1
  263.  
  264. numb = ''
  265. player = player + 1
  266. while htmlSource[player] <> '<':
  267. numb = numb + htmlSource[player]
  268. player = player + 1
  269.  
  270. return numb
  271.  
  272.  
  273.  
  274. # OUTSIDE FUNCTIONS - only these should be called from outside
  275. #
  276. #
  277.  
  278. #input:shorthand version of game name
  279. #returns: full version of game name
  280. def getGameName(var):
  281.  
  282. #import the srl game page into htmlSource string
  283. sock = urllib.urlopen("http://www.speedrunslive.com/gamelist")
  284. htmlSource2 = sock.read()
  285. sock.close()
  286.  
  287.  
  288. index = htmlSource2.find("<a href=\"game.php?game=" + var)
  289. if (index == -1):
  290. return "DNE"
  291. else:
  292. index = index+25+len(var)
  293. tempgame = getWord(htmlSource2,index,"<").strip()
  294.  
  295. return tempgame
  296.  
  297.  
  298.  
  299.  
  300. #Return the Top 5 for a certain race in an array
  301. def topFive(race):
  302. #getGamePage()
  303.  
  304. race_lower = race.lower()
  305. print race + " IS THE RACE!!! "
  306. beginstring = ""
  307. beginstring = getRaceText(race_lower)
  308. #print beginstring
  309.  
  310. #If the race DNE then let the user know!
  311. if beginstring == "":
  312. return "DNE" #in smwBot if we return this - he will display a nice message.
  313.  
  314. index = 0
  315. counter = 0
  316. racearray = list()
  317. stringracearray = ""
  318. while counter < 5:
  319. index = beginstring.find("<a href=\"/profiles/?player=")
  320. if index == -1:
  321. break
  322. index = index + 27
  323. tempracer = (getWord(beginstring,index,"\""))
  324. if (tempracer == "*NO MORE*"):
  325. break
  326. tempnum = len(tempracer)
  327. index = index + (tempnum*2) + 13
  328. temptime = getWord(beginstring,index,"<")
  329. if (temptime == "*NO MORE*"):
  330. break
  331.  
  332. #for now just output to a string
  333. stringracearray = stringracearray + str(counter+1) + ")" + tempracer + "-" + temptime + " | "
  334.  
  335. endof = len(beginstring)
  336. beginstring = beginstring[index:endof]
  337. counter = counter + 1
  338. print stringracearray
  339.  
  340. #Since smwBot doesn't like arrays looks like we have to convert to string.. for now
  341. return stringracearray
  342.  
  343.  
  344. def rank(name):
  345.  
  346. #import the srl game page into htmlSource string
  347. sock = urllib.urlopen("http://www.speedrunslive.com/gamelist/game.php?game=" + GAME)
  348. htmlSource = sock.read()
  349. sock.close()
  350.  
  351. #get game name
  352. gamename = getGameName(GAME)
  353.  
  354. name = name.lower()
  355. playerRank = -1
  356. playerRank = getPlayerRank(name)
  357. playerRank = int(playerRank)
  358.  
  359. if (playerRank > 0 and playerRank <6) or name == 'fuzzey_ninja':
  360. #get random chuck noris fact and replace name with racer
  361. #import the srl game page into htmlSource string
  362. sock = urllib.urlopen("http://www.funnyconcept.com/")
  363. chuckSource = sock.read()
  364. sock.close()
  365. index = chuckSource.find("<div class=\"jokebox\">")
  366. fact = getWord(chuckSource,index+23,"<")
  367. fact = fact.replace("Chuck Norris", name)
  368. print "You are Ranked #" + str(playerRank)
  369. fact = ' '.join(fact.split())
  370. print fact
  371.  
  372. returnText = ["You are Ranked #" + str(playerRank) + " in "+gamename+"!", fact.strip()]
  373.  
  374.  
  375.  
  376. return returnText
  377.  
  378. elif playerRank >= 6 and playerRank <=10:
  379. returnText = ["You are Ranked #" + str(playerRank) + " in "+gamename+"!", "You are in the top 10, not too Shabby!"]
  380. return returnText
  381.  
  382. elif playerRank >= 11 and playerRank <=14:
  383. returnText = ["You are Ranked #" + str(playerRank) + " in "+gamename+"!", "Ehh you are alright. I guess. (not really)"]
  384. return returnText
  385.  
  386. elif playerRank > 14 or name == 'trakof':
  387. sock = urllib.urlopen("http://insultgenerator.net/")
  388. insultSource = sock.read()
  389. sock.close()
  390. index = insultSource.find("<h1 class=\"insult\">")
  391. tempy = getWord(insultSource,index,",")
  392. lent = len(tempy)
  393. insulty = getWord(insultSource,index+lent,'<')
  394. fact = "Nice rank," + insulty
  395.  
  396. #print "You are Ranked #" + str(playerRank)
  397. #print fact
  398. returnText = ["You are Ranked #" + str(playerRank) + " in "+gamename+"!", fact.strip()]
  399. return returnText
  400.  
  401. #if player dne dont bother getting a message
  402. else:
  403. returnText = ["You have never raced "+gamename+"!!", "Time to Start!! http://www.smwwiki.com"]
  404. return returnText
  405.  
  406. def blechy():
  407. #import the srl game page into htmlSource string
  408. sock = urllib.urlopen("http://www.speedrunslive.com/gamelist/game.php?game=" + GAME)
  409. htmlSource = sock.read()
  410. sock.close()
  411.  
  412. #get random chuck noris fact and replace name with racer
  413. #import the srl game page into htmlSource string
  414. sock = urllib.urlopen("http://toykeeper.net/programs/mad/compliments")
  415. compSource = sock.read()
  416. sock.close()
  417. index = compSource.find("<h3 class=\"blurb_title_1\">")
  418. comp = getWord(compSource,index+26,"<")
  419. comp = ' '.join(comp.split())
  420. return comp
  421.  
  422.  
  423. #Return the Top 5 for a certain race in an array
  424. def topTen():
  425. gamename = getGameName(GAME)
  426. print GAME + " IS THE GAME!!! "
  427. beginstring = ""
  428. beginstring = getPlayersText(10) #~~~~~~Ten is the amount of players to return make variable?
  429. #print beginstring
  430.  
  431. #If the race DNE then let the user know!
  432. if beginstring == "":
  433. return "DNE" #in smwBot if we return this - he will display a nice message.
  434.  
  435.  
  436. #Check to see if there is less than 10 racers!
  437. indexs = beginstring.find("Players: <strong>")
  438. indexs = indexs + 17
  439. numracers = int(getWord(beginstring,indexs,"<"))
  440. numberofracers = 10
  441. if (numracers <10):
  442. numberofracers = numracers
  443.  
  444. index = 0
  445. counter = 0
  446. racearray = list()
  447. stringracearray = ""
  448. while counter < numberofracers: #~~~~~~Ten is the amount of players to return (make variable)? Or could make when index is greater than length of text.
  449. index = beginstring.find("<a href=\"/profiles/?player=")
  450. tempracer = (getWord(beginstring,index+27,"\""))
  451. tempnum = len(tempracer)
  452. index = index + (tempnum*2) + 37
  453. temppoints = getWord(beginstring,index,"<")
  454. temppoints = temppoints.strip()
  455. #print tempracer + " - " + temppoints
  456. #print "THE INDEX IS: "
  457. #print index
  458.  
  459. #######put string in next spot in list
  460. #######racearray.append([tempracer,temptime]) ###not doing this yet because smwBot doesn't like to print arrays
  461.  
  462. #for now just output to a string
  463. stringracearray = stringracearray + str(counter+1) + ": " + tempracer + " - " + temppoints + " | "
  464.  
  465. endof = len(beginstring)
  466. beginstring = beginstring[index:endof]
  467. counter = counter + 1
  468. #print stringracearray
  469.  
  470. #Since smwBot doesn't like arrays looks like we have to convert to string.. for now
  471.  
  472.  
  473. return [gamename,stringracearray]
  474.  
  475.  
  476.  
  477.  
  478.  
  479.  
  480.  
  481.  
  482. #A function that shows our rank AND
  483. #shows where we are in comparison to:
  484. # 1) Game above
  485. # 2) Game below
  486. # (if 1 or 2 DNE) 3) game passed as variable
  487. def gameRank(var):
  488.  
  489. #get game name
  490. gamename = getGameName(GAME)
  491.  
  492. ranklist = []
  493. ranklist = createGameList(var)
  494. print ranklist
  495.  
  496. if (len(ranklist[0]) ==0):
  497. return "DNE"
  498.  
  499. elif (len(ranklist) == 2):
  500. returnlist=[]
  501. returnlist.append(gamename+" is Ranked #" + str(ranklist[0][0]) + " with " + str(ranklist[0][2]) + " points!")
  502. returnlist.append("It is " + str(ranklist[1][2]) + " point(s) ahead #" + str(ranklist[1][0]) + " " + getGameName(str(ranklist[1][1])))
  503. return returnlist
  504.  
  505. else:
  506. returnlist=[]
  507. returnlist.append(gamename+" is Ranked #" + str(ranklist[0][0]) + " with " + str(ranklist[0][2]) + " points!")
  508. returnlist.append("It is " + str(ranklist[1][2]) + " point(s) behind #" + str(ranklist[1][0]) + " " + getGameName(str(ranklist[1][1])))
  509. returnlist.append("It is " + str(ranklist[2][2]) + " point(s) ahead #" + str(ranklist[2][0]) + " " + getGameName(str(ranklist[2][1])))
  510. return returnlist
  511.  
  512.  
  513.  
  514.  
  515.  
  516.  
  517.  
  518.  
  519. ########THINGS TO RUN
  520.  
  521. #print getRaceText("ANY%")
  522.  
  523.  
  524.  
  525.  
  526.  
  527.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement