Guest User

Untitled

a guest
May 18th, 2017
213
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 28.86 KB | None | 0 0
  1. library(httr)
  2. library(rjson)
  3. library(XML)
  4. library(stringr)
  5. library(curl)
  6. httr::set_config(config(ssl_verifypeer = 0L))
  7. options(HTTPUserAgent="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0")
  8.  
  9. #Define Input
  10. abb_race_list = list(list("AIK","AJX","ABT","ALB","ANF","AQU","ARP","AP","ASD","ATH","ATL","ATO","BSR","BM","BMF","BEL","BTP","BHP","BEU","BRD","BRO","BCF","CRC","CAM","CBY","CAS","CWF","CHA","CHL","CPW","CD","CNL","NS2","CLS","DAY","DMR","NS9","DEL","DED","DEP","DET","DXD","DUQ","UN","ELK","ELP","EMD","EMT","ED","EUR","EVD","FG","FAI","FMT","FPL","FAX","FP","FPX","FH","FER","FL","FON","FE","FTP","FX","FNO","GV","GIL","GLN","GG","SAF","GN","GPR","GRP","GBF","GF","GLD","GRM","N11","GRA","DUN","GP","GPW","BRN","HST","HAW","HP","HIA","CT","HOL","HOO","HPO","HCF","IND","ING","JRM","KSP","KAM","KEE","KD","KIN","LAM","LBT","LRL","BOI","LBG","LEX","LNN","LEV","LS","LA","LRC","LAD","MVR","MAL","MAN","MAF","MAR","MD","MED","MDA","MID","MC","MS","MIL","MON","NS4","MTH","MTP","MOR","MNR","MPM","S11","FAR","NP","NVD","OKR","OTH","OTP","OSA","OP","OTC","ONE","PMB","PRX","NS6","PEN","NS5","UNI","PW","PHA","PIC","PIM","PMT","PNL","PLN","POD","PM","PRM","PID","RB","RDM","RP","RET","RIL","RD","RKM","RPD","RUI","RUP","SAC","HOU","SJD","SLR","SAN","SDY","SA","SON","SFE","SR","SAR","NS8","SEO","SHW","SHD","SOL","SOP","SPT","STP","STK","STN","SH","SUF","N12","SUD","SND","SUN","SRP","SWF","TAM","WDS","TDN","TIL","TIM","TGD","TRY","TUP","TP","WTS","WW","WBR","WMF","WPR","ELY","WRD","WIL","WNT","WO","WYO","YM","YAV","YD","ZIA"),list("AIKEN","AJAX DOWNS","ALBERTA DOWNS","ALBUQUERQUE","ANTHONY DOWNS","AQUEDUCT","ARAPAHOE PARK","ARLINGTON","ASSINIBOIA DOWNS","ATLANTA","ATLANTIC CITY","ATOKAD DOWNS","BARRETTS RACE MEET AT FAIRPLEX","BAY MEADOWS","BAY MEADOWS FAIR","BELMONT PARK","BELTERRA PARK","BETFAIR HOLLYWOOD PARK","BEULAH PARK","BLUE RIBBON DOWNS","BROOKHILL FARM","BROWN COUNTY FAIR","CALDER RACE COURSE","CAMDEN","CANTERBURY PARK","CASSIA COUNTY FAIR","CENTRAL WYOMING FAIR","CHARLESTON","CHARLOTTE","CHIPPEWA DOWNS","CHURCHILL DOWNS","COLONIAL DOWNS","COLONIAL NSA","COLUMBUS","DAYTON","DEL MAR","DELAWARE NSA","DELAWARE PARK","DELTA DOWNS","DESERT PARK","DETROIT RACE COURSE","DIXIE DOWNS","DU QUOIN","EASTERN OREGON LIVESTOCK SHOW","ELKO COUNTY FAIR","ELLIS PARK","EMERALD DOWNS","EMMETT","ENERGY DOWNS","EUREKA","EVANGELINE DOWNS","FAIR GROUNDS","FAIR HILL","FAIR MEADOWS","FAIR PLAY PARK","FAIRFAX","FAIRMOUNT PARK","FAIRPLEX PARK","FAR HILLS","FERNDALE","FINGER LAKES","FONNER PARK","FORT ERIE","FORT PIERRE","FOXFIELD","FRESNO","GENESEE VALLEY","GILLESPIE COUNTY FAIRGROUND","GLYNDON","GOLDEN GATE FIELDS","GRAHAM COUNTY FAIR @ SAFFORD","GRAND NATIONAL","GRANDE PRAIRIE","GRANTS PASS","GREAT BARRINGTON FAIR","GREAT FALLS","GREAT LAKES DOWNS","GREAT MEADOW","GREAT MEADOW NSA","GREEN ACRES","GREENELEE COUNTY FAIR @ DUNCAN","GULFSTREAM PARK","GULFSTREAM PARK WEST","HARNEY COUNTY FAIR","HASTINGS RACECOURSE","HAWTHORNE","HAZEL PARK","HIALEAH PARK","HOLLYWOOD CASINO AT CHARLES TOWN RACES","HOLLYWOOD PARK","HOOSIER PARK","HORSEMEN'S PARK","HUMBOLDT COUNTY FAIR","INDIANA GRAND RACE COURSE","INGLESIDE","JEROME COUNTY FAIR","KALISPELL","KAMLOOPS","KEENELAND","KENTUCKY DOWNS","KIN PARK","LA MESA PARK","LAUREL BROWN RACETRACK","LAUREL PARK","LES BOIS PARK","LETHBRIDGE","LEXINGTON","LINCOLN RACE COURSE","LITTLE EVERGLADES","LONE STAR PARK","LOS ALAMITOS","LOS ALAMITOS RACE COURSE","LOUISIANA DOWNS","MAHONING VALLEY RACE COURSE","MALVERN","MANOR DOWNS","MARIAS FAIR","MARLBORO","MARQUIS DOWNS","MEADOWLANDS","MELVILLE DISTRICT AGRIPAR","MIDDLEBURG","MILES CITY","MILL SPRING","MILLARVILLE","MONKTON","MONMOUTH NSA","MONMOUTH PARK","MONTPELIER","MORVEN PARK","MOUNTAINEER CASINO RACETRACK & RESORT","MT. PLEASANT MEADOWS","Monterrico Peru","NORTH DAKOTA HORSE PARK","NORTHLANDS PARK","NORTHVILLE DOWNS","OAK RIDGE","OAK TREE AT HOLLYWOOD PARK","OAK TREE AT PLEASANTON","OAK TREE AT SANTA ANITA","OAKLAWN PARK","OCALA TRAINING CENTER","ONEIDA COUNTY FAIR","PALM BEACH POLO CLUB","PARX RACING","PARX RACING NSA","PENN NATIONAL","PENN NATIONAL NSA","PENNSYLVANIA HUNT CUP","PERCY WARNER","PHILADELPHA PARK","PICOV DOWNS","PIMLICO","PINE MTN-CALLAWAY GARDEN","PINNACLE RACE COURSE","PLEASANTON","POCATELLO DOWNS","PORTLAND MEADOWS","PRAIRIE MEADOWS","PRESQUE ISLE DOWNS","RED BANK","RED MILE","REMINGTON PARK","RETAMA PARK","RILLITO","RIVER DOWNS","ROCKINGHAM PARK","ROSSBURN PARKLAND DOWNS","RUIDOSO DOWNS","RUPERT DOWNS","SACRAMENTO","SAM HOUSTON RACE PARK","SAN JUAN DOWNS","SAN LUIS REY TRAINING CENTER","SANDOWN PARK","SANDY DOWNS","SANTA ANITA PARK","SANTA CRUZ COUNTY FAIR @ SONOITA","SANTA FE","SANTA ROSA","SARATOGA","SARATOGA NSA","SEARCH ENGINE OPTIMIZATION","SHAWAN DOWNS","SHENNANDOAH DOWNS","SOLANO","SOUTHERN PINES","SPORTSMAN'S PARK","STAMPEDE PARK","STOCKTON","STONEYBROOK AT FIVE POINTS","STRAWBERRY HILL","SUFFOLK DOWNS","SUFFOLK NSA","SUN DOWNS","SUNFLOWER","SUNLAND PARK","SUNRAY PARK","SWEETWATER DOWNS","TAMPA BAY DOWNS","THE WOODLANDS","THISTLEDOWN","TILLAMOOK COUNTY FAIR","TIMONIUM","TIOGA DOWNS","TRYON","TURF PARADISE","TURFWAY PARK","WAITSBURG RACE TRACK","WALLA WALLA","WEBER DOWNS","WESTERN MT FAIR","WHITE PINE RACEWAY","WHITE PINE RACING","WILL ROGERS DOWNS","WILLOWDALE STEEPLECHASE","WINTERTHUR","WOODBINE","WYOMING DOWNS","YAKIMA MEADOWS","YAVAPAI DOWNS","YELLOWSTONE DOWNS","ZIA PARK"))
  11. year = "2017"
  12.  
  13. race_date <- "5-20-2017"
  14. track_name <- "Evangeline"
  15.  
  16. login <- function(){
  17. #Grab CSRF Token
  18. r <- GET("https://timeformus.com/login?ReturnUrl=/")
  19. temp<-content(r, "text")
  20. doc <- htmlParse(temp)
  21. RequestVerificationToken<-xpathSApply(doc,"/html/body/div/div/div[2]/div/form/input[1]", xmlGetAttr, 'value')[[1]]
  22. #Login
  23. r <- POST("https://timeformus.com/Login", body = list('__RequestVerificationToken'=RequestVerificationToken,returnUrl="/",UserName="mutuelinvest",Password="mi1Timeform"), encode = "form")
  24. temp<-content(r, "text")
  25. #Keep cookie
  26. a<-cookies(r)
  27. .AspNet.ApplicationCookie <-a[7][[1]][3]
  28. ASP.NET_SessionId <-a[7][[1]][1]
  29. RequestVerificationToken <- a[7][[1]][2]
  30. }
  31.  
  32.  
  33. purchase_track <- function(race_date,name){
  34. #Check track and purchase
  35. url<-paste(c("https://timeformus.com/buytracks/",race_date),collapse='')
  36. print(url)
  37. r <- GET(url)
  38. temp<-content(r, "text")
  39. doc <- htmlParse(temp)
  40. track_name <- xpathSApply(doc,"//form/div[@class=\"race clearfix\"]/div/strong",xmlValue)
  41. #track_time <-xpathSApply(doc,"//form/div[@class=\"race clearfix\"]/div[2]/span/strong",xmlValue)
  42. track_id <- xpathSApply(doc,"//form/div[@class=\"race clearfix\"]/input[1]", xmlGetAttr, 'value')
  43. track_status <- xpathSApply(doc,"//form/div[@class=\"race clearfix\"]/div[3]", xmlValue)
  44.  
  45. for(i in 1:length(track_status))
  46. {
  47. if(regexpr(name,track_name[i])!=-1)
  48. {
  49. if(regexpr("Add",track_status[i])!=-1)
  50. {
  51. print("Purchase")
  52. #Purchase
  53. r <- POST("https://timeformus.com/BuyTracks/Add", body = list(trackRaceDateId=track_id[i],raceDate=race_date), encode = "form")
  54. r <- POST("https://timeformus.com/buypps/checkout", body = list(payWith="537b15b2-15af-4450-abdd-4b15d0b6fa9d"), encode = "form")
  55. }
  56. else
  57. {
  58. print('Already own')
  59. }
  60. }
  61. }
  62. }
  63.  
  64. #Add double quote and clear speical char
  65. clear_text <- function(raw){
  66. raw <- paste(c('"',gsub("^\\s+|\\s+$", "", raw),'"'),collapse='')
  67. return(raw)
  68. }
  69.  
  70. #Open output file
  71. fileConn<-file("output.csv","w")
  72. #Login
  73. login()
  74. #Check / Purchase track
  75. purchase_track(race_date,track_name)
  76.  
  77. #Get Race date page
  78. r <- GET(paste(c("https://timeformus.com/account/purchases/",race_date),collapse=''))
  79. temp<-content(r, "text")
  80. doc <- htmlParse(temp)
  81.  
  82. #Grab track link
  83. link<-xpathSApply(doc,"//div[@class=\"race clearfix\"]/div[3]/a", xmlGetAttr,'href')
  84. name <-xpathSApply(doc,"//div[@class=\"race clearfix\"]/div[1]/strong",xmlValue)
  85. href<-''
  86. for(i in 1:length(name))
  87. {
  88. if(regexpr(track_name,name[i])!=-1)
  89. {
  90. href<-link[i]
  91. print("Found link")
  92. }
  93. }
  94.  
  95. #Go to track link
  96. url <- paste(c("https://timeformus.com",href),collapse='')
  97. print(url)
  98. r <- GET(url)
  99. temp<-content(r, "text")
  100. doc <- htmlParse(temp)
  101.  
  102. #Start Loggin result
  103.  
  104. #1. Header
  105. record_type <-'"H"'
  106. track_name<-clear_text(xpathSApply(doc,"//div[@class=\"track-options\"]/select/option[@selected=\"selected\"]", xmlValue)[[1]])
  107. track_code <- track_name
  108. #track_code <- xpathSApply(doc,"//div[@class=\"location\"]", xmlValue)
  109. #Match Race Abb with full-name
  110. for(count in 1:length(abb_race_list[[2]]))
  111. {
  112. if(regexpr(toupper(abb_race_list[[2]][[count]]), toupper(track_code))!=-1)
  113. {
  114. print(abb_race_list[[2]][[count]])
  115. track_code <- clear_text(abb_race_list[[1]][[count]])
  116. }
  117. }
  118. #clean up in case of no match abb
  119. track_code<-gsub("\r\n", "",track_code)
  120. track_code<-gsub(" {2,20}", "",track_code)
  121.  
  122. race_date <-xpathSApply(doc,"//div[@class=\"track-date\"]/select/option[@selected=\"selected\"]", xmlValue)[[1]]
  123. race_date <- clear_text(as.Date(strptime(race_date, format="%A, %b %d")))
  124. no_race <-clear_text(length(xpathSApply(doc,"//div[@class=\"race-options\"]/select/option", xmlValue)))
  125. writeLines(paste(c(record_type,track_name,track_code,race_date,no_race),collapse=','), fileConn)
  126.  
  127. #Loop through all race no
  128. for(href in xpathSApply(doc,"//div[@class=\"race-options\"]/select/option", xmlGetAttr,'value'))
  129. {
  130. url <- paste(c("https://timeformus.com",href),collapse='')
  131. r <- GET(url)
  132. temp<-content(r, "text")
  133. doc <- htmlParse(temp)
  134. print(url)
  135. #3. Race Record
  136. record_type <- '"R"'
  137. race_info <- clear_text(xpathSApply(doc,"//div[@class=\"race-options\"]/select/option[@selected=\"selected\"]", xmlValue)[1])
  138. #print(race_info)
  139.  
  140. #race_type <- clear_text(strsplit(race_info, "[,]")[[1]][2])
  141. race_type <- clear_text(xpathSApply(doc,"//div[@class=\"content-info diagram-info\"]/p[2]", xmlValue)[1])
  142. #print(race_type)
  143. claiming_price <-clear_text(strsplit(race_info, "[,]")[[1]][3])
  144. #race_date<-'' Same
  145. race_no<- clear_text(strsplit(strsplit(race_info, "[,]")[[1]][1],"[ ]")[[1]][2])
  146. surface <-clear_text(strsplit(race_info, "[,]")[[1]][5])
  147. distance<-clear_text(strsplit(race_info, "[,]")[[1]][4])
  148. purse<-clear_text(xpathSApply(doc,"//div[@class=\"content-info diagram-info\"]/p[4]", xmlValue)[1])
  149. race_text<-clear_text(xpathSApply(doc,"//div[@class=\"content-info diagram-info\"]/p[5]", xmlValue)[1])
  150. field_size<-clear_text(length(xpathSApply(doc,"//div[@id=\"pp-horse-data\"]/div", xmlValue))-1)
  151. #writeLines(paste(c(record_type,race_type,claiming_price,race_date,race_no,surface,distance,purse,race_text,field_size),collapse=','), fileConn)
  152. writeLines(paste(c(record_type,race_type,race_date,race_no,surface,distance,purse,race_text,field_size),collapse=','), fileConn)
  153.  
  154. #5.Wager
  155. record_type<-'"E"'
  156. #track_code Same
  157. #race_date same
  158. #race_no same
  159. wager_list <- xpathSApply(doc,"//span[@class=\"wagers\"]", xmlValue)[1]
  160. wager_list <- strsplit(wager_list, "[,]")
  161. for(wager_t in wager_list[[1]])
  162. {
  163. for(wager in strsplit(wager_t, "[/]")[[1]])
  164. {
  165. writeLines(paste(c(record_type,track_code,race_date,race_no,clear_text(wager)),collapse=','), fileConn)
  166. }
  167. }
  168.  
  169.  
  170. #2. Starter Record
  171. for( i in 2:length(xpathSApply(doc,"//div[@id=\"pp-horse-data\"]/div", xmlValue)))
  172. {
  173. xpath_temp <- paste(c("//div[@id=\"pp-horse-data\"]/div[",i,"]"),collapse='')
  174.  
  175. record_type <- '"S"'
  176. horse_name <- xpathSApply(doc,xpath_temp, xmlGetAttr,'data-name')[1]
  177. print ("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
  178. print(horse_name)
  179. print ("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
  180. xpath_main_track <- paste(c("//div[@id=\"pp-horse-data\"]/div[",i,"]/div/div/div/div/i"),collapse='')
  181. if(nchar(clear_text(xpathSApply(doc,xpath_main_track, xmlValue)[1]))>6)
  182. {
  183. horse_name <- paste(horse_name,"Main track Only",collapse = '|')
  184. print(horse_name)
  185. }
  186. horse_name <- clear_text(horse_name)
  187. #race_no
  188. #race_date
  189. #track_code
  190. sire <- clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-sire')[1])
  191. dam <-xpathSApply(doc,xpath_temp, xmlGetAttr,'data-dam')[1]
  192. if(length(gregexpr("\\(", dam)[[1]])==1)
  193. {
  194. dam_sire <- clear_text(substring(dam,regexpr('\\(', dam)+1,regexpr('\\)', dam)-1))
  195. }
  196. else if(length(gregexpr("\\(", dam)[[1]])==2 || length(gregexpr("\\(", dam)[[1]])==3)
  197. {
  198. dam_sire <- clear_text(substring(dam,gregexpr("\\(", dam)[[1]][2]+1,rev(gregexpr("\\)", dam)[[1]])[1]-1))
  199. if(nchar(dam_sire)<=6)
  200. {
  201. dam_sire <- clear_text(substring(dam,gregexpr("\\(", dam)[[1]][1]+1,rev(gregexpr("\\)", dam)[[1]])[1]-1))
  202. }
  203. }
  204. dam <- clear_text(substring(dam,0,regexpr('\\(', dam)-2))
  205.  
  206. horse_age <-gsub("Age", "", xpathSApply(doc,xpath_temp, xmlGetAttr,'data-sexagestring')[1])
  207. birth_month <-clear_text(strsplit(horse_age, "[|]")[[1]][3])
  208. horse_sex <- clear_text(strsplit(horse_age, "[|]")[[1]][1])
  209. foaling_area <-strsplit(horse_age, "[|]")[[1]][4]
  210. foaling_area <- clear_text(strsplit(foaling_area,"[-]")[[1]][1])
  211. horse_age <- clear_text(strsplit(horse_age, "[|]")[[1]][2])
  212. if(regexpr('Filly', horse_sex)!=-1 || regexpr('Mare', horse_sex)!=-1)
  213. {
  214. horse_gender <- clear_text("Female")
  215. } else { horse_gender <- clear_text("Male") }
  216. jockey<-clear_text(xpathSApply(doc,paste(c(xpath_temp,"/div[@class=\"col jockey\"]"),collapse=''), xmlValue)[1])
  217. trainer<-clear_text(xpathSApply(doc,paste(c(xpath_temp,"/div[@class=\"col trainer\"]"),collapse=''), xmlValue)[1])
  218. owner<-clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-owner')[1])
  219. breeder<-clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-breeder')[1])
  220. weight <-clear_text(gsub("lbs", "",xpathSApply(doc,xpath_temp, xmlGetAttr,'data-weight')[1]))
  221. morning_line_odds <-clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-ml')[1])
  222. post<-clear_text(i-1)
  223. program_no<-clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-programnumber')[1])
  224. claiming_price<-clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-tagdisplay')[1])
  225. equipment<-clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-equipment')[1])
  226. running_style<-clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-runningstyle')[1])
  227.  
  228. data_id <- xpathSApply(doc,xpath_temp, xmlGetAttr,'data-id')[1]
  229. writeLines(paste(c(record_type,race_no,race_date,track_code,horse_name,sire,dam,dam_sire,horse_age,birth_month,horse_sex,horse_gender,foaling_area,jockey,trainer,owner,breeder,weight,morning_line_odds,post,program_no,claiming_price,equipment,running_style),collapse=','), fileConn)
  230.  
  231. #4.Past Performance
  232.  
  233. record_type <-'"P"'
  234. r_t <- POST("https://timeformus.com/basicpps/starterPastPerformances2Ajax", body = list(thisHourse='true',accrued="true",adjusted="false",weightOn="false",paceFigOn="false",starterId=data_id), encode = "form",add_headers("X-Requested-With" = "XMLHttpRequest"))
  235. temp_t<-content(r_t, "text")
  236. doc_t <- htmlParse(temp_t)
  237.  
  238. r_t_t <- POST("https://timeformus.com/basicpps/starterPastPerformances2Ajax", body = list(thisHourse='true',accrued="true",adjusted="true",weightOn="false",paceFigOn="false",starterId=data_id), encode = "form",add_headers("X-Requested-With" = "XMLHttpRequest"))
  239. temp_t_t<-content(r_t_t, "text")
  240. doc_t_t <- htmlParse(temp_t_t) #div[@id=\"pastperformancs\"]/
  241. if(length(xpathSApply(doc_t,"//div[@class=\"unraced-horse\"]", xmlValue))!=0)
  242. {
  243. writeLines(paste(c(record_type,"\"UNRACED \"",horse_name),collapse=','), fileConn)
  244. }
  245. else
  246. {
  247. for( i in 2:length(xpathSApply(doc_t,"//table/tbody/tr", xmlValue)))
  248. {
  249. xpath_string <- paste(c("//table/tbody/tr[",i,"]"),collapse='')
  250.  
  251.  
  252. temp_xpath <- paste(c("//table/tbody/tr[2]","/td[@class=\"no-wrap winner-plces\"]//a/@href"),collapse='')
  253. temp_year<-xpathSApply(doc_t,"//table/tbody/tr[2]/td[12]", xmlValue)
  254. print("Link")
  255. print(temp_year)
  256.  
  257. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap track-race-date\"]/div/div[2]"),collapse='')
  258. track_code_t <-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  259. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap track-race-date\"]/div/div[3]"),collapse='')
  260. race_date_t<-xpathSApply(doc_t,temp_xpath, xmlValue)
  261. race_int = FALSE
  262. if(regexpr("[|]",race_date_t)!=-1)
  263. {
  264. race_date_t<-strsplit(race_date_t, "[|]")[[1]][2]
  265. race_int = TRUE
  266. }
  267. race_date_t<-clear_text(race_date_t)
  268.  
  269. #Information Record
  270. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap track-race-date\"]/div/div[1]/span[2]"),collapse='')
  271. info<-xpathSApply(doc_t,temp_xpath, xmlValue)
  272.  
  273.  
  274.  
  275.  
  276. #Finding year indicator
  277. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap track-race-date\"]"),collapse='')
  278. temp_year<-xpathSApply(doc_t,temp_xpath, xmlValue)
  279. temp_year<-substr(temp_year,regexpr("20\\d\\d",temp_year),regexpr("20\\d\\d",temp_year)+4)
  280. if(temp_year!="\r\n ")
  281. {
  282. if(year!=temp_year)
  283. {
  284.  
  285. writeLines(paste(c('"I"',temp_year,""),collapse=','), fileConn)
  286. }
  287. year<-temp_year
  288.  
  289. }
  290. if(info!="")
  291. {
  292. #Break multi result apart
  293. while(regexpr('Previously trained', info)>10 || regexpr('Reported gelding', info)>10 || regexpr('Claimed from', info)>10 || regexpr('Vet Scratch', info)>10)
  294. {
  295. out<-clear_text(substr(info,max(regexpr('Previously trained', info),regexpr('Reported gelding', info),regexpr('Claimed from', info),regexpr('Vet Scratch', info)),nchar(info)))
  296. #writeLines(paste(c('"I"',out),collapse=','), fileConn)
  297. writeLines(paste(c('"I"',year,out),collapse=','), fileConn)
  298. info<-substr(info,0,rev(gregexpr(" - ", info)[[1]])[1])
  299. }
  300. print(paste("info",info))
  301. info<-gsub(" - ","",info)
  302. info<-clear_text(substring(info,max(regexpr('Previously trained', info),regexpr('Reported gelding', info),regexpr('Claimed from', info),regexpr('Vet Scratch', info)),nchar(info)))
  303. #writeLines(paste(c('"I"',info),collapse=','), fileConn)
  304. writeLines(paste(c('"I"',year,info),collapse=','), fileConn)
  305. }
  306.  
  307. temp_xpath <- paste(c(xpath_string,"/td[@class=\"pps-class-rating\"]"),collapse='')
  308. race_speed_fig<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  309. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[@class=\"diagram-info modal\"]/div/p[1]"),collapse='')
  310. race_number<-clear_text(gsub("Race", "",xpathSApply(doc_t,temp_xpath, xmlValue)))
  311. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[@class=\"diagram-info modal\"]/div/p[3]"),collapse='')
  312. race_type<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  313. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[1]/div[2]/span[1]"),collapse='')
  314. claiming_price<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  315. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[@class=\"diagram-info modal\"]/div/p[4]"),collapse='')
  316. distance<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  317. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[@class=\"diagram-info modal\"]/div/p[2]"),collapse='')
  318. surface<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  319. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[1]/div[2]/span[3]"),collapse='')
  320. surface_condition<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  321. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[@class=\"diagram-info modal\"]/div/p[5]"),collapse='')
  322. purse<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  323. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[@class=\"diagram-info modal\"]/div/p[6]"),collapse='')
  324. race_text<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  325.  
  326. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][2]/div[2]"),collapse='')
  327. temp <-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  328. weight<-clear_text(gsub('"', '',strsplit(temp, "[|]")[[1]][1]))
  329. equipment<-clear_text(strsplit(temp, "[|]")[[1]][2])
  330. odds<-clear_text(gsub('"', "",strsplit(temp, "[|]")[[1]][3]))
  331.  
  332. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][2]/div[1]"),collapse='')
  333. #jockey<-clear_text(gsub("P", "",xpathSApply(doc_t,temp_xpath, xmlValue)))
  334. jockey<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  335. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"]/div/div[1]"),collapse='')
  336. post_position<-clear_text(gsub("P", "",xpathSApply(doc_t,temp_xpath, xmlValue)))
  337. temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"]/div/div[2]"),collapse='')
  338. field_size<-clear_text(gsub("F", "",xpathSApply(doc_t,temp_xpath, xmlValue)))
  339.  
  340. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][2]/div/b"),collapse='')
  341. position_1<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  342. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][2]/div/sup"),collapse='')
  343. beaten_length_1<-clear_text(gsub("[^0-9A-Za-z///' ]","",xpathSApply(doc_t,temp_xpath, xmlValue)))
  344.  
  345. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][3]/div/b"),collapse='')
  346. position_2<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  347. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][3]/div/sup"),collapse='')
  348. beaten_length_2<-clear_text(gsub("[^0-9A-Za-z///' ]","",xpathSApply(doc_t,temp_xpath, xmlValue)))
  349.  
  350. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][4]/div/b"),collapse='')
  351. position_3<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  352. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][4]/div/sup"),collapse='')
  353. beaten_length_3<-clear_text(gsub("[^0-9A-Za-z///' ]","",xpathSApply(doc_t,temp_xpath, xmlValue)))
  354.  
  355. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][5]/div/b"),collapse='')
  356. position_4<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  357. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][5]/div/sup"),collapse='')
  358. beaten_length_4<-clear_text(gsub("[^0-9A-Za-z///' ]","",xpathSApply(doc_t,temp_xpath, xmlValue)))
  359.  
  360. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][6]/div/b"),collapse='')
  361. position_5<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  362. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][6]/div/sup"),collapse='')
  363. beaten_length_5<-clear_text(gsub("[^0-9A-Za-z///' ]","",xpathSApply(doc_t,temp_xpath, xmlValue)))
  364. position_6<-'"-"'
  365. beaten_length_6<-'"-"'
  366.  
  367. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][2]/div[2]"),collapse='')
  368. fraction_1_off<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  369. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][3]/div[2]"),collapse='')
  370. fraction_2_off<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  371. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][4]/div[2]"),collapse='')
  372. fraction_3_off<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  373. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][5]/div[2]"),collapse='')
  374. fraction_4_off<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  375. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][6]/div[2]"),collapse='')
  376. fraction_5_off<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  377. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][7]/div[2]"),collapse='')
  378. fraction_6_off<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  379.  
  380. temp_xpath<-paste(c(xpath_string,"/td[11]/div"),collapse='')
  381. speed_fig<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  382. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap winner-places\"]/a/div"),collapse='')
  383. company_line<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  384. temp_xpath<-paste(c(xpath_string,"/td/div[@class=\"hide\"]"),collapse='')
  385. comment<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
  386. xpath_string <- paste(c("//table/tbody/tr[",i,"]"),collapse='')
  387. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][2]/div[2]"),collapse='')
  388. fraction_1_adj<-clear_text(xpathSApply(doc_t_t,temp_xpath, xmlValue))
  389. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][3]/div[2]"),collapse='')
  390. fraction_2_adj<-clear_text(xpathSApply(doc_t_t,temp_xpath, xmlValue))
  391. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][4]/div[2]"),collapse='')
  392. fraction_3_adj<-clear_text(xpathSApply(doc_t_t,temp_xpath, xmlValue))
  393. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][5]/div[2]"),collapse='')
  394. fraction_4_adj<-clear_text(xpathSApply(doc_t_t,temp_xpath, xmlValue))
  395. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][6]/div[2]"),collapse='')
  396. fraction_5_adj<-clear_text(xpathSApply(doc_t_t,temp_xpath, xmlValue))
  397. temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][7]/div[2]"),collapse='')
  398. fraction_6_adj<-clear_text(xpathSApply(doc_t_t,temp_xpath, xmlValue))
  399.  
  400. if(race_int==TRUE)
  401. {
  402. position_5<-position_1
  403. beaten_length_5<-beaten_length_1
  404. position_1<-""
  405. beaten_length_1<-""
  406. }
  407.  
  408. next_out_winners<-''
  409. writeLines(paste(c(record_type,track_code_t,horse_name,jockey,race_date_t,race_speed_fig,race_number,race_type,claiming_price,distance,surface,surface_condition,purse,race_text,weight,equipment,odds,post_position,field_size,position_1,beaten_length_1,position_2,beaten_length_2,position_3,beaten_length_3,position_4,beaten_length_4,position_5,beaten_length_5,position_6,beaten_length_6,fraction_1_off,fraction_2_off,fraction_3_off,fraction_4_off,fraction_5_off,fraction_6_off,fraction_1_adj,fraction_2_adj,fraction_3_adj,fraction_4_adj,fraction_5_adj,fraction_6_adj,speed_fig,company_line,comment),collapse=','), fileConn)
  410. }
  411. }
  412. #6 Workout record
  413. url_t <- paste(c("https://timeformus.com/basicpps/starterWorkouts?starterId=",data_id),collapse='')
  414. r_t <- GET(url_t,add_headers("X-Requested-With" = "XMLHttpRequest"))
  415. temp_t<-content(r_t, "text")
  416. doc_t <- htmlParse(temp_t)
  417. record_type<-'"W"'
  418. #horse_name <-'' Same
  419. year<-'2016'
  420.  
  421.  
  422. #2nd part
  423. for( workout in rev(xpathSApply(doc_t,"//div[@class=\"content\"]/section/ul/li[not(@class)]", xmlValue)))
  424. {
  425.  
  426. if(regexpr('\\d{4}', workout)!=-1)
  427. {
  428. year<-substring(workout,regexpr('\\d{4}', workout),regexpr('\\d{4}', workout)+4)
  429. }
  430. else
  431. {
  432.  
  433. date <- strsplit(workout, "[ ]")[[1]][1]
  434. date <- clear_text(as.Date(strptime(paste(c(date,year),collapse='-'), format="%b%d-%Y")))
  435. track_code_t <- clear_text(strsplit(workout, "[ ]")[[1]][2])
  436. surface <- clear_text(strsplit(workout, "[ ]")[[1]][3])
  437. if(length(strsplit((strsplit(workout, "[:]")[[1]][1])," ")[[1]])==5)
  438. {
  439. surface <- clear_text(paste0(c(strsplit(workout, "[ ]")[[1]][3],strsplit(workout, "[ ]")[[1]][4]),collapse=' '))
  440. surface_condition <- clear_text(strsplit(workout, "[ ]")[[1]][5])
  441.  
  442. }
  443. else if(length(strsplit((strsplit(workout, "[:]")[[1]][1])," ")[[1]])==4)
  444. {
  445. surface_condition <- clear_text(strsplit(workout, "[ ]")[[1]][4])
  446. }
  447. else
  448. {
  449. surface_condition <- clear_text(strsplit(workout, "[ ]")[[1]][5])
  450. }
  451.  
  452. #print(length(strsplit((strsplit(workout, "[:]")[[1]][1])," ")[[1]]))
  453. half_part<-""
  454. if(length(strsplit(workout,"[:]")[[1]])==3)
  455. {
  456. half_part<- paste(c(strsplit(workout, "[:]")[[1]][2],strsplit(workout, "[:]")[[1]][3]),collapse=':')
  457. }
  458. else
  459. {
  460. half_part <- strsplit(workout,"[:]")[[1]][2]
  461. }
  462.  
  463. distance <- clear_text(strsplit(half_part, "[ ]")[[1]][2])
  464. time<-clear_text(substr(half_part,regexpr('in ',half_part)+3,regexpr('\\(',half_part)-1))
  465. rank<-clear_text(substr(half_part,regexpr('\\(',half_part),nchar(half_part)))
  466. #print(paste(c(workout,surface_condition,distance,time,rank),collapse='||'))
  467.  
  468. writeLines(paste(c(record_type,date,horse_name,track_code_t,surface,surface_condition,distance,time,rank),collapse=','), fileConn)
  469. }
  470. }
  471. }
  472. }
  473.  
  474. #Close file
  475. close(fileConn)
Add Comment
Please, Sign In to add comment